Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split plugin #381

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
372 changes: 372 additions & 0 deletions acdcli/plugins/split.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,372 @@
"""
Plugin to upload and download bug files in chunks

openssl aes-256-cbc -d -in 00000000 > 00000000.xz
7za x 00000000.xz

"""

import io
import os
import sys
import json
import lzma
import hashlib
import logging

from Crypto import Random
from Crypto.Cipher import AES

from . import *

logger = logging.getLogger(__name__)

# compress settings
lzma_filters = [{"id": lzma.FILTER_LZMA2, "preset": lzma.PRESET_EXTREME}]


def derive_key_and_iv(password, salt, key_length, iv_length):
"""
Helper function to get the key and IV from a password and optional salt
OpenSSL compatible
"""
d = d_i = b''
while len(d) < key_length + iv_length:
d_i = hashlib.md5(d_i + str.encode(password) + salt).digest()
d += d_i
return d[:key_length], d[key_length:key_length+iv_length]


def prepare_chunk(chunk, compress=False, encrypt_password=None):
"""
Compresses and encrypt a chunk of data
"""
if compress:
# save original chunk size for comparing
# to the compressed size
chunk_length_original = len(chunk)

# compress
chunk = lzma.compress(chunk, filters=lzma_filters)

logger.debug("Compressed %d bytes to %s, saved %02.2f%%",
chunk_length_original,
len(chunk),
100.0 * ((chunk_length_original - len(chunk)) / chunk_length_original))

if encrypt_password:
# create a new Random salt for each chunk
salt = Random.new().read(8)
# get key and IV based on password and the salt
key, iv = derive_key_and_iv(encrypt_password, salt, 32, 16)
# header for an OpenSSL encrypted file
# the term "Salted__" followed by 8 bytes salt
cipher_header = b'Salted__' + salt

# create the cipher, AES-256
cipher = AES.new(key, AES.MODE_CBC, iv)
# we need to padd
padding_length = (AES.block_size - len(chunk) % AES.block_size) or AES.block_size
# add the padding at the end of the chunk
# for OpenSSL compatibility and to get original
# size of the chunk after decryption
chunk += str.encode(padding_length * chr(padding_length))
# finally add the header and the encrypted
chunk = cipher_header + cipher.encrypt(chunk)

# return a fake stream
return io.BytesIO(chunk)


def unprepare_chunk(chunk, decrypt_password=None):
"""
Helper function that decrypted and decompresses
a chunk of data
"""

if b'Salted__' == chunk[:8]:
# the chunk starts with "Salted__"
# so it's encrypted
if not decrypt_password:
# no dice without password
logger.critical("Found encrypted chunk but no password specified")
# we can't recover from that
sys.exit(-1)

# read the salt, the 8 bytes following the "Salted__"
salt = chunk[8:16]
# again generate key and IV
key, iv = derive_key_and_iv(decrypt_password, salt, 32, 16)
# and the AES-256 cipher
cipher = AES.new(key, AES.MODE_CBC, iv)
# decrypt
chunk = cipher.decrypt(bytes(chunk[16:]))
# get the length of the padding bytes
padding_length = chunk[-1]
# cut them off
chunk = chunk[:-padding_length]

try:
# decompress
chunk = lzma.decompress(chunk)
except:
# not so nice code, should check if it at least
# looks like a LZMA compressed chunk
pass

return chunk


class SplitPlugin(Plugin):
MIN_VERSION = '0.3.1'

@classmethod
def attach(cls, subparsers: argparse.ArgumentParser, log: list, **kwargs):
""" Attaches this plugin to the top-level argparse subparser group
:param subparsers the action subparser group
:param log a list to put initialization log messages in
"""
arg_parser = subparsers.add_parser('split-upload', add_help=False)
arg_parser.add_argument('--lzma-compress', '-lc', action='store_true')
arg_parser.add_argument('--password', '-p', type=str)
arg_parser.add_argument('localpath')
arg_parser.add_argument('remotepath')
arg_parser.add_argument('splitsize', type=int)
arg_parser.set_defaults(func=cls.split_upload)

arg_parser = subparsers.add_parser('split-download', add_help=False)
arg_parser.add_argument('--password', '-p', type=str)
arg_parser.add_argument('remotepath')
arg_parser.add_argument('localpath')
arg_parser.add_argument('splitsize', type=int)
arg_parser.set_defaults(func=cls.split_download)

log.append(str(cls) + ' attached.')

@classmethod
def split_upload(cls, args: argparse.Namespace) -> int:
"""
Upload method
"""

# extract the importanr value for easier access
local_path = os.path.abspath(args.localpath)
remote_path = args.remotepath
chunk_size = args.splitsize
client = args.acd_client
cache = args.cache

# does the local file exists at all?
if not os.path.exists(local_path):
# Nope, we are done
logger.critical("File %s doe not exist", local_path)
return 1

# does the remote already exists
remote_node = cache.resolve(remote_path)
if not remote_node:
# But we need at least the parent node
# to create a directory in it
parent_name = os.path.dirname(remote_path)
parent_node = cache.resolve(parent_name)
if not parent_node:
# No parent, no go
logger.critical("Parent %s for %s does not exist", parent_name, remote_path)
return 1

# create a directory we can put the chunks in
remote_node = client.create_folder(os.path.basename(remote_path), parent_node.id)
cache.insert_node(remote_node)
remote_node = cache.resolve(remote_path)

# sanity check if the remote is a directory
if not remote_node.is_folder:
logger.critical("Remote path %s is not a directory", remote_path)
return 1

# default for chunks data
chunks_data = {"chunks": []}
chunks_node = cache.resolve(remote_path + "/chunks")
if chunks_node:
# load from ACD
chunks_data = json.loads(unprepare_chunk(client.download_chunk(chunks_node.id,
0,
chunks_node.size),
decrypt_password=args.password).decode("utf8"))
else:
# chunks file doesn't existsi yet, create it with the defalt values'
result = client.upload_stream(prepare_chunk(json.dumps(chunks_data).encode("utf8"),
args.lzma_compress,
args.password),
"chunks",
remote_node.id)
cache.insert_node(result)
chunks_node = cache.resolve(remote_path + "/chunks")

# okay, time to get started
with open(local_path, "rb") as file_stream:
chunk_id = 0
while True:
# read the local file chunk by chunk
chunk = file_stream.read(chunk_size)
if not chunk:
# nothing to ready anymore, WE ARE DONE
break

# calculate the hash
md5_digest = hashlib.md5(chunk).hexdigest()

upload_chunk = True
# the name for our current chunk
chunk_name = "%x" % chunk_id

# does it already exist on ACD
remote_child_node = cache.get_child(remote_node.id, chunk_name)
if remote_child_node:
# YES

# if we have a MD5 hash in the chunks file and the user has requested compression
# or encryption us the cached MD5 to compare
if len(chunks_data["chunks"]) > chunk_id and (args.lzma_compress or args.password):
upload_chunk = md5_digest != chunks_data["chunks"][chunk_id]
else:
# otherwise compare against the MD5 of ACD
upload_chunk = md5_digest != remote_child_node.md5

if upload_chunk:
logger.info('%s: MD5 mis-match (%s / %s)' % (chunk_name, remote_child_node.md5, md5_digest))

if upload_chunk:
# So, we need to upload something

# update the chunks data
if len(chunks_data["chunks"]) > chunk_id:
chunks_data["chunks"][chunk_id] = md5_digest
else:
chunks_data["chunks"].append(md5_digest)

# prepate the chunk for ulpoad (maybe compress and encrypt)
chunk = prepare_chunk(chunk, compress=args.lzma_compress, encrypt_password=args.password)

if remote_child_node:
# if it exists, overwrite
logger.info("%s: Overwriting %d bytes to node %s", chunk_name, len(chunk.getvalue()), remote_child_node.id)
remote_child_node = client.overwrite_stream(chunk, remote_child_node.id)
else:
# if not, create a new file
logger.info("%s: Uploading %d bytes", chunk_name, len(chunk.getvalue()))
remote_child_node = client.upload_stream(chunk, chunk_name, remote_node.id)
cache.insert_node(remote_child_node)

# always update the chunks file on ACD
result = client.overwrite_stream(prepare_chunk(json.dumps(chunks_data).encode("utf8"),
compress=args.lzma_compress,
encrypt_password=args.password),
chunks_node.id)
cache.insert_node(result)
else:
logger.info("%s: Keeping %d bytes", chunk_name, len(chunk))

# and the next chunk
chunk_id += 1

return 0

@classmethod
def split_download(cls, args: argparse.Namespace) -> int:
"""
Download method
"""

# extract the importanr value for easier access
remote_path = args.remotepath
local_path = os.path.abspath(args.localpath)
chunk_size = args.splitsize
client = args.acd_client
cache = args.cache

# get the remote node
remote_node = cache.resolve(remote_path)
if not remote_node:
# does not exist?
logger.critical("Remote %s does not exist", remote_path)
return 1

# at least it's folder
if not remote_node.is_folder:
# NO?!?!? WTF?!
logger.critical("Remote path %s is not a directory", remote_path)
return 1

# default for chunks data
chunks_data = {"chunks": []}
chunks_node = cache.resolve(remote_path + "/chunks")
if chunks_node:
# load from ACD
chunks_data = json.loads(unprepare_chunk(client.download_chunk(chunks_node.id,
0,
chunks_node.size),
decrypt_password=args.password).decode("utf8"))

# if the file already exists use this so we can also read from it
open_mode = "rb+"
if not os.path.exists(local_path):
# if there is no local file we just need to write
open_mode = "wb+"

# okay, time to get started
with open(local_path, open_mode) as file_stream:
chunk_id = 0
while True:
# the name for our current chunk
chunk_name = "%x" % chunk_id
chunk_node = cache.get_child(remote_node.id, chunk_name)
if not chunk_node:
# no chunk file on ACD, we are done
logger.debug("Chunk %s not found, stopping", chunk_name)
break

overwrite_chunk = False
# save the current position so we can jump back when we
# need to overwrite this
current_position = file_stream.tell()
# read the chunk
current_chunk = file_stream.read(chunk_size)
# get the hash
current_md5 = hashlib.md5(current_chunk).hexdigest()

# Do we have a MD5 from the chunks file?
if len(chunks_data["chunks"]) > chunk_id:
# Yes, compare the local hash against the one from
# the caches file
overwrite_chunk = current_md5 != chunks_data["chunks"][chunk_id]
else:
# no data

# overwrite if the filesizes don't match
if len(current_chunk) != chunk_node.size:
logger.debug("%s: Size mis-match %d / %d", chunk_name, len(current_chunk), chunk_node.size)
overwrite_chunk = True
else:
# or if the hash from ACD is different
if current_md5 != chunk_node.md5:
logger.debug("%s: MD5 mis-match %s / %s", chunk_name, current_md5, chunk_node.md5)
overwrite_chunk = True

if not overwrite_chunk:
logger.info("%s: Keeping chunk with %d bytes", chunk_name, chunk_node.size)
else:
# we need to overwrite our local file chunk
logger.info("Download chunk %s with %d bytes", chunk_name, chunk_node.size)
# download the chunk from ACD
chunk = client.download_chunk(chunk_node.id, 0, chunk_node.size)

# jump back to the start of the chunk
file_stream.seek(current_position)
# overwrite it
file_stream.write(unprepare_chunk(chunk, decrypt_password=args.password))

# and the next chunk
chunk_id += 1

return 0