diff --git a/build_wasm.sh b/build_wasm.sh index d80bb74..c54b454 100755 --- a/build_wasm.sh +++ b/build_wasm.sh @@ -88,6 +88,6 @@ emcc unpack.c -o $WASM_LIB/unpack.js \ -s INITIAL_MEMORY=128MB \ -s ENVIRONMENT=web \ -s EXPORTED_RUNTIME_METHODS='["ccall", "cwrap", "getValue", "UTF8ToString", "wasmMemory"]' \ - -s EXPORTED_FUNCTIONS="['_extract_archive', '_free_extracted_archive', '_malloc', '_free']" + -s EXPORTED_FUNCTIONS="['_extract', '_free_extracted_archive', '_malloc', '_free']" echo "Build completed successfully!" \ No newline at end of file diff --git a/src/index.ts b/src/index.ts index 1136ba4..4b96291 100644 --- a/src/index.ts +++ b/src/index.ts @@ -18,21 +18,21 @@ export const initUntarJS = async (): Promise => { decompressionOnly: boolean = false ): Promise => { /**Since WebAssembly, memory is accessed using pointers - and the first parameter of extract_archive method from unpack.c, which is Uint8Array of file data, should be a pointer + and the first parameter of extract method from unpack.c, which is Uint8Array of file data, should be a pointer so we have to allocate memory for file data **/ let inputPtr: number | null = wasmModule._malloc(data.length); wasmModule.HEAPU8.set(data, inputPtr); - let resultPtr: number | null = wasmModule._extract_archive( + let resultPtr: number | null = wasmModule._extract( inputPtr, data.length, decompressionOnly ); const files: FilesData = {}; /** - * Since extract_archive returns a pointer that refers to an instance of the ExtractedArchive in unpack.c + * Since extract returns a pointer that refers to an instance of the ExtractedArchive in unpack.c typedef struct { FileData* files; size_t fileCount; @@ -99,7 +99,6 @@ export const initUntarJS = async (): Promise => { dataPtr, dataSize ); - const fileDataCopy = fileData.slice(0); files[filename] = fileDataCopy; } @@ -121,7 +120,7 @@ export const initUntarJS = async (): Promise => { const checkIsArchive = (url: string): boolean => { let isArchive: boolean = false; - let archiveExtArr = ['.conda', 'tar.bz2', 'tar.gz']; + let archiveExtArr = ['.conda', 'tar.bz2', 'tar.gz', '.zip']; archiveExtArr.forEach(type => { if (url.toLowerCase().endsWith(type)) { isArchive = true; diff --git a/src/unpack.d.ts b/src/unpack.d.ts index 5bb7442..ba74e1c 100644 --- a/src/unpack.d.ts +++ b/src/unpack.d.ts @@ -5,7 +5,7 @@ export interface IWasmModule { HEAPU8: Uint8Array; _malloc(size: number): number; _free(ptr: number): void; - _extract_archive( + _extract( inputPtr: number, inputSize: number, decompressionOnly: boolean diff --git a/unpack.c b/unpack.c index 302e414..8756e91 100644 --- a/unpack.c +++ b/unpack.c @@ -19,14 +19,28 @@ typedef struct { char error_message[256]; } ExtractedArchive; +ExtractedArchive* error_handler(ExtractedArchive* result, const char *error_message, struct archive* archive) { + + if (!result || !archive) { + fprintf(stderr, "Archive is null\n"); + return NULL; + } + + result->status = 0; + + snprintf(result->error_message, sizeof(result->error_message), "%s", error_message); + archive_read_free(archive); + return result; +} EMSCRIPTEN_KEEPALIVE -ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, bool decompressionOnly ) { +ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { struct archive* archive; struct archive_entry* entry; size_t files_struct_length = 100; FileData* files = NULL; size_t files_count = 0; + const char *error_message; ExtractedArchive* result = (ExtractedArchive*)malloc(sizeof(ExtractedArchive)); if (!result) { @@ -41,32 +55,24 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, bool dec archive = archive_read_new(); archive_read_support_filter_all(archive); archive_read_support_format_all(archive); - if (decompressionOnly) { - archive_read_support_format_raw(archive); - } if (archive_read_open_memory(archive, inputData, inputSize) != ARCHIVE_OK) { - result->status = 0; - snprintf(result->error_message, sizeof(result->error_message), "%s", archive_error_string(archive)); - archive_read_free(archive); - return result; + return error_handler(result,archive_error_string(archive), archive); } files = malloc(sizeof(FileData) * files_struct_length); while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) { - const char* filename = decompressionOnly ? "decompression.json": archive_entry_pathname(entry); - size_t entrySize = decompressionOnly ? inputSize: archive_entry_size(entry); + const char* filename = archive_entry_pathname(entry); + size_t entrySize = archive_entry_size(entry); if (files_count + 1 > files_struct_length) { files_struct_length *= 2; // double the length FileData* oldfiles = files; files= realloc(files, sizeof(FileData) * files_struct_length); if (!files) { - archive_read_free(archive); - result->status = 0; result->fileCount = files_count; result->files = oldfiles; // otherwise memory is lost, alternatively also everything can be freed. - snprintf(result->error_message, sizeof(result->error_message), "Memory allocation error for file data."); - return result; + error_message = "Memory allocation error for file data."; + return error_handler(result, error_message, archive); } } files[files_count].filename = strdup(filename); @@ -75,13 +81,11 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, bool dec if (!files[files_count].data) { free(files[files_count].filename); - files[files_count].filename = NULL; - archive_read_free(archive); - result->status = 0; + files[files_count].filename = NULL; result->fileCount = files_count; result->files = files; // otherwise memory is lost, alternatively also everything can be freed. - snprintf(result->error_message, sizeof(result->error_message), "Memory allocation error for file contents."); - return result; + error_message = "Memory allocation error for file contents."; + return error_handler(result, error_message, archive); } size_t bytesRead = 0; @@ -94,10 +98,7 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, bool dec } free(files); result->files = NULL; - result->status = 0; - snprintf(result->error_message, sizeof(result->error_message), "%s", archive_error_string(archive)); - archive_read_free(archive); - return result; + return error_handler(result, archive_error_string(archive), archive); } bytesRead += ret; } @@ -111,6 +112,142 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, bool dec return result; } +char* write_to_temp_file(uint8_t* data, size_t size) { + char* temp_file_name = strdup("/tmp/decompressionXXXXXX"); + int fd = mkstemp(temp_file_name); + if (fd == -1) { + perror("Failed to create temporary file for decompression file"); + free(temp_file_name); + return NULL; + } + + FILE* temp_file = fdopen(fd, "wb"); + if (!temp_file) { + perror("Failed to open temporary file"); + close(fd); + unlink(temp_file_name); + free(temp_file_name); + return NULL; + } + + if (fwrite(data, 1, size, temp_file) != size) { + perror("Failed to write to temporary file"); + fclose(temp_file); + unlink(temp_file_name); + free(temp_file_name); + return NULL; + } + + fclose(temp_file); + return temp_file_name; +} + +EMSCRIPTEN_KEEPALIVE +ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { + struct archive* archive; + struct archive_entry* entry; + size_t files_count = 0; + + const size_t buffsize = 64 * 1024; + char buff[buffsize]; + size_t total_size = 0; + const char *error_message; + + FileData* files = malloc(sizeof(FileData) * (files_count + 1)); + + if (!files) { + printf("Failed to allocate memory for files array\n"); + return NULL; + } + + ExtractedArchive* result = (ExtractedArchive*)malloc(sizeof(ExtractedArchive)); + if (!result) { + free(files); + return NULL; + } + + result->files = NULL; + result->fileCount = 0; + result->status = 1; + result->error_message[0] = '\0'; + + char* temp_file_name = write_to_temp_file(inputData, inputSize); + if (!temp_file_name) { + free(files); + error_message = "Failed to create temporary file"; + return error_handler(result, error_message, archive); + } + + archive = archive_read_new(); + archive_read_support_filter_all(archive); + archive_read_support_format_raw(archive); + + if (archive_read_open_filename(archive, temp_file_name, inputSize) != ARCHIVE_OK) { + unlink(temp_file_name); + free(temp_file_name); + free(files); + return error_handler(result, archive_error_string(archive), archive); + } + + while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) { + const char* filename = archive_entry_pathname(entry); + if (!filename) filename = "decompression"; + + files[files_count].filename = strdup(filename); + files[files_count].data = NULL; + files[files_count].data_size = 0; + + ssize_t ret; + + for (;;) { + ret = archive_read_data(archive, buff, buffsize); + if (ret < 0) { + for (size_t i = 0; i <= files_count; i++) { + free(files[i].filename); + free(files[i].data); + } + free(files); + result->files = NULL; + return error_handler(result, archive_error_string(archive), archive); + } + if (ret == 0) { + break; + } + + void* new_data = realloc(files[files_count].data, total_size + ret); + if (!new_data) { + free(files[files_count].data); + error_message = "Memory allocation error"; + return error_handler(result, error_message, archive); + } + + files[files_count].data = new_data; + memcpy(files[files_count].data + total_size, buff, ret); + total_size += ret; + } + files[files_count].data_size = total_size; + files_count++; + } + + archive_read_free(archive); + unlink(temp_file_name); + free(temp_file_name); + + result->files = files; + result->fileCount = files_count; + result->status = 1; + return result; +} + +EMSCRIPTEN_KEEPALIVE +ExtractedArchive* extract(uint8_t* inputData, size_t inputSize, bool decompressionOnly ) { + if (!decompressionOnly) { + return extract_archive(inputData, inputSize); + } else { + return decompression(inputData, inputSize); + } +} + EMSCRIPTEN_KEEPALIVE void free_extracted_archive(ExtractedArchive* archive) { if (!archive) {