Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix decompression #40

Merged
merged 3 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build_wasm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,6 @@ emcc unpack.c -o $WASM_LIB/unpack.js \
-s INITIAL_MEMORY=128MB \
-s ENVIRONMENT=web \
-s EXPORTED_RUNTIME_METHODS='["ccall", "cwrap", "getValue", "UTF8ToString", "wasmMemory"]' \
-s EXPORTED_FUNCTIONS="['_extract_archive', '_free_extracted_archive', '_malloc', '_free']"
-s EXPORTED_FUNCTIONS="['_extract', '_free_extracted_archive', '_malloc', '_free']"

echo "Build completed successfully!"
9 changes: 4 additions & 5 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,21 @@ export const initUntarJS = async (): Promise<IUnpackJSAPI> => {
decompressionOnly: boolean = false
): Promise<FilesData> => {
/**Since WebAssembly, memory is accessed using pointers
and the first parameter of extract_archive method from unpack.c, which is Uint8Array of file data, should be a pointer
and the first parameter of extract method from unpack.c, which is Uint8Array of file data, should be a pointer
so we have to allocate memory for file data
**/
let inputPtr: number | null = wasmModule._malloc(data.length);
wasmModule.HEAPU8.set(data, inputPtr);


let resultPtr: number | null = wasmModule._extract_archive(
let resultPtr: number | null = wasmModule._extract(
inputPtr,
data.length,
decompressionOnly
);
const files: FilesData = {};
/**
* Since extract_archive returns a pointer that refers to an instance of the ExtractedArchive in unpack.c
* Since extract returns a pointer that refers to an instance of the ExtractedArchive in unpack.c
typedef struct {
FileData* files;
size_t fileCount;
Expand Down Expand Up @@ -99,7 +99,6 @@ export const initUntarJS = async (): Promise<IUnpackJSAPI> => {
dataPtr,
dataSize
);

const fileDataCopy = fileData.slice(0);
files[filename] = fileDataCopy;
}
Expand All @@ -121,7 +120,7 @@ export const initUntarJS = async (): Promise<IUnpackJSAPI> => {

const checkIsArchive = (url: string): boolean => {
let isArchive: boolean = false;
let archiveExtArr = ['.conda', 'tar.bz2', 'tar.gz'];
let archiveExtArr = ['.conda', 'tar.bz2', 'tar.gz', '.zip'];
archiveExtArr.forEach(type => {
if (url.toLowerCase().endsWith(type)) {
isArchive = true;
Expand Down
2 changes: 1 addition & 1 deletion src/unpack.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ export interface IWasmModule {
HEAPU8: Uint8Array;
_malloc(size: number): number;
_free(ptr: number): void;
_extract_archive(
_extract(
inputPtr: number,
inputSize: number,
decompressionOnly: boolean
Expand Down
183 changes: 160 additions & 23 deletions unpack.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,28 @@ typedef struct {
char error_message[256];
} ExtractedArchive;

ExtractedArchive* error_handler(ExtractedArchive* result, const char *error_message, struct archive* archive) {

if (!result || !archive) {
fprintf(stderr, "Archive is null\n");
return NULL;
}

result->status = 0;

snprintf(result->error_message, sizeof(result->error_message), "%s", error_message);
archive_read_free(archive);
return result;
}

EMSCRIPTEN_KEEPALIVE
ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, bool decompressionOnly ) {
ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) {
struct archive* archive;
struct archive_entry* entry;
size_t files_struct_length = 100;
FileData* files = NULL;
size_t files_count = 0;
const char *error_message;

ExtractedArchive* result = (ExtractedArchive*)malloc(sizeof(ExtractedArchive));
if (!result) {
Expand All @@ -41,32 +55,24 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, bool dec
archive = archive_read_new();
archive_read_support_filter_all(archive);
archive_read_support_format_all(archive);
if (decompressionOnly) {
archive_read_support_format_raw(archive);
}

if (archive_read_open_memory(archive, inputData, inputSize) != ARCHIVE_OK) {
result->status = 0;
snprintf(result->error_message, sizeof(result->error_message), "%s", archive_error_string(archive));
archive_read_free(archive);
return result;
return error_handler(result,archive_error_string(archive), archive);
}
files = malloc(sizeof(FileData) * files_struct_length);

while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) {
const char* filename = decompressionOnly ? "decompression.json": archive_entry_pathname(entry);
size_t entrySize = decompressionOnly ? inputSize: archive_entry_size(entry);
const char* filename = archive_entry_pathname(entry);
size_t entrySize = archive_entry_size(entry);
if (files_count + 1 > files_struct_length) {
files_struct_length *= 2; // double the length
FileData* oldfiles = files;
files= realloc(files, sizeof(FileData) * files_struct_length);
if (!files) {
archive_read_free(archive);
result->status = 0;
result->fileCount = files_count;
result->files = oldfiles; // otherwise memory is lost, alternatively also everything can be freed.
snprintf(result->error_message, sizeof(result->error_message), "Memory allocation error for file data.");
return result;
error_message = "Memory allocation error for file data.";
return error_handler(result, error_message, archive);
}
}
files[files_count].filename = strdup(filename);
Expand All @@ -75,13 +81,11 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, bool dec

if (!files[files_count].data) {
free(files[files_count].filename);
files[files_count].filename = NULL;
archive_read_free(archive);
result->status = 0;
files[files_count].filename = NULL;
result->fileCount = files_count;
result->files = files; // otherwise memory is lost, alternatively also everything can be freed.
snprintf(result->error_message, sizeof(result->error_message), "Memory allocation error for file contents.");
return result;
error_message = "Memory allocation error for file contents.";
return error_handler(result, error_message, archive);
}

size_t bytesRead = 0;
Expand All @@ -94,10 +98,7 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, bool dec
}
free(files);
result->files = NULL;
result->status = 0;
snprintf(result->error_message, sizeof(result->error_message), "%s", archive_error_string(archive));
archive_read_free(archive);
return result;
return error_handler(result, archive_error_string(archive), archive);
}
bytesRead += ret;
}
Expand All @@ -111,6 +112,142 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize, bool dec
return result;
}

char* write_to_temp_file(uint8_t* data, size_t size) {
char* temp_file_name = strdup("/tmp/decompressionXXXXXX");
int fd = mkstemp(temp_file_name);
if (fd == -1) {
perror("Failed to create temporary file for decompression file");
free(temp_file_name);
return NULL;
}

FILE* temp_file = fdopen(fd, "wb");
if (!temp_file) {
perror("Failed to open temporary file");
close(fd);
unlink(temp_file_name);
free(temp_file_name);
return NULL;
}

if (fwrite(data, 1, size, temp_file) != size) {
perror("Failed to write to temporary file");
fclose(temp_file);
unlink(temp_file_name);
free(temp_file_name);
return NULL;
}

fclose(temp_file);
return temp_file_name;
}

EMSCRIPTEN_KEEPALIVE
ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) {
struct archive* archive;
struct archive_entry* entry;
size_t files_count = 0;

const size_t buffsize = 64 * 1024;
char buff[buffsize];
size_t total_size = 0;
const char *error_message;

FileData* files = malloc(sizeof(FileData) * (files_count + 1));

if (!files) {
printf("Failed to allocate memory for files array\n");
return NULL;
}

ExtractedArchive* result = (ExtractedArchive*)malloc(sizeof(ExtractedArchive));
if (!result) {
free(files);
return NULL;
}

result->files = NULL;
result->fileCount = 0;
result->status = 1;
result->error_message[0] = '\0';

char* temp_file_name = write_to_temp_file(inputData, inputSize);
if (!temp_file_name) {
free(files);
error_message = "Failed to create temporary file";
return error_handler(result, error_message, archive);
}

archive = archive_read_new();
archive_read_support_filter_all(archive);
archive_read_support_format_raw(archive);

if (archive_read_open_filename(archive, temp_file_name, inputSize) != ARCHIVE_OK) {
unlink(temp_file_name);
free(temp_file_name);
free(files);
return error_handler(result, archive_error_string(archive), archive);
}

while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) {
const char* filename = archive_entry_pathname(entry);
if (!filename) filename = "decompression";

files[files_count].filename = strdup(filename);
files[files_count].data = NULL;
files[files_count].data_size = 0;

ssize_t ret;

for (;;) {
ret = archive_read_data(archive, buff, buffsize);
if (ret < 0) {
for (size_t i = 0; i <= files_count; i++) {
free(files[i].filename);
free(files[i].data);
}
free(files);
result->files = NULL;
return error_handler(result, archive_error_string(archive), archive);
}
if (ret == 0) {
break;
}

void* new_data = realloc(files[files_count].data, total_size + ret);
if (!new_data) {
free(files[files_count].data);
error_message = "Memory allocation error";
return error_handler(result, error_message, archive);
}

files[files_count].data = new_data;
memcpy(files[files_count].data + total_size, buff, ret);
total_size += ret;
}
files[files_count].data_size = total_size;
files_count++;
}

archive_read_free(archive);
unlink(temp_file_name);
free(temp_file_name);

result->files = files;
result->fileCount = files_count;
result->status = 1;
return result;
}

EMSCRIPTEN_KEEPALIVE
ExtractedArchive* extract(uint8_t* inputData, size_t inputSize, bool decompressionOnly ) {
if (!decompressionOnly) {
return extract_archive(inputData, inputSize);
} else {
return decompression(inputData, inputSize);
}
}

EMSCRIPTEN_KEEPALIVE
void free_extracted_archive(ExtractedArchive* archive) {
if (!archive) {
Expand Down
Loading