Skip to content

Commit

Permalink
updated convert
Browse files Browse the repository at this point in the history
  • Loading branch information
beatfactor committed Aug 10, 2024
1 parent e1d9720 commit bfa2091
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 39 deletions.
11 changes: 7 additions & 4 deletions oceanstream/cli/main.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import traceback
from asyncio import CancelledError

import typer
Expand Down Expand Up @@ -128,8 +129,6 @@ def convert(
from oceanstream.process import convert_raw_file
print(f"[blue]Converting raw file {source} to Zarr...[/blue]")
convert_raw_file(filePath, configData)
print(
f"[blue]✅ Converted raw file {source} to Zarr and wrote output to: {configData['output_folder']} [/blue]")
elif filePath.is_dir():
from oceanstream.process import convert_raw_files
convert_raw_files(configData, workers_count=workers_count)
Expand All @@ -138,8 +137,12 @@ def convert(
except KeyboardInterrupt:
logging.info("KeyboardInterrupt received, terminating processes...")
except Exception as e:
logging.exception("Error processing folder %s", configData['raw_path'])
print(Traceback())
if filePath.is_file():
logging.error("Error processing file %s", filePath)
else:
logging.exception("Error processing folder %s", configData['raw_path'])

logging.error(traceback.format_exc())


@app.command()
Expand Down
2 changes: 1 addition & 1 deletion oceanstream/process/azure/blob_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def list_zarr_files(path, azfs=None):
zarr_files = []
for blob in azfs.ls(path, detail=True):
if blob['type'] == 'directory' and not blob['name'].endswith('.zarr'):
subdir_files = list_zarr_files(azfs, blob['name'])
subdir_files = list_zarr_files(blob['name'], azfs)
zarr_files.extend(subdir_files)
elif blob['name'].endswith('.zarr'):
zarr_files.append({
Expand Down
63 changes: 32 additions & 31 deletions oceanstream/process/file_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,45 +221,46 @@ async def process_raw_file_with_progress(config_data, plot_echogram, waveform_mo
def convert_raw_file(file_path, config_data, base_path=None, progress_queue=None):
logging.debug("Starting processing of file: %s", file_path)

try:
file_path_obj = Path(file_path)
file_config_data = {**config_data, 'raw_path': file_path_obj}

if base_path:
relative_path = file_path_obj.relative_to(base_path)
relative_path = relative_path.parent
else:
relative_path = None
file_path_obj = Path(file_path)
file_config_data = {**config_data, 'raw_path': file_path_obj}

echodata, encode_mode = read_file(file_config_data, use_swap=True, skip_integrity_check=True)
file_name = file_path_obj.stem + ".zarr"
if base_path:
relative_path = file_path_obj.relative_to(base_path)
relative_path = relative_path.parent
else:
relative_path = None

if 'cloud_storage' in config_data:
if relative_path:
file_location = Path(relative_path) / file_name
else:
file_location = file_name
store = _get_chunk_store(config_data['cloud_storage'], file_location)
echodata.to_zarr(save_path=store, overwrite=True, parallel=False)
output_zarr_path = store
echodata, encode_mode = read_file(file_config_data, use_swap=True, skip_integrity_check=True)
file_name = file_path_obj.stem + ".zarr"

if 'cloud_storage' in config_data:
if relative_path:
file_location = Path(relative_path) / file_name
else:
if relative_path:
output_path = Path(config_data["output_folder"]) / relative_path
output_path.mkdir(parents=True, exist_ok=True)
else:
output_path = Path(config_data["output_folder"])
file_location = file_name
store = _get_chunk_store(config_data['cloud_storage'], file_location)
echodata.to_zarr(save_path=store, overwrite=True, parallel=False)
output_zarr_path = store
output_dest = config_data['cloud_storage']['container_name'] + "/" + file_location
else:
if relative_path:
output_path = Path(config_data["output_folder"]) / relative_path
output_path.mkdir(parents=True, exist_ok=True)
else:
output_path = Path(config_data["output_folder"])

output_zarr_path = output_path / file_name
echodata.to_zarr(save_path=output_zarr_path, overwrite=True, parallel=False)
output_zarr_path = output_path / file_name
output_dest = output_zarr_path
echodata.to_zarr(save_path=output_zarr_path, overwrite=True, parallel=False)

if progress_queue:
progress_queue.put(file_path)
if progress_queue:
progress_queue.put(file_path)

return output_zarr_path
print(
f"[blue]✅ Converted raw file {file_path} to Zarr and wrote output to: {output_dest} [/blue]")

except Exception as e:
logging.error("Error processing file %s", file_path)
print(Traceback())
return output_zarr_path


def write_zarr_file(zarr_path, zarr_file_name, ds_processed, config_data=None, output_path=None):
Expand Down
7 changes: 4 additions & 3 deletions oceanstream/process/folder_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,11 +350,12 @@ def merge_json_files(output_folder):

def from_filename(file_name):
"""Extract creation time from the file name if it follows a specific pattern."""
pattern = r'(\d{4}[A-Z])?-D(\d{8})-T(\d{6})\.raw'
# pattern = r'(\d{4}[A-Z])?-D(\d{8})-T(\d{6})\.raw'
pattern = r'.*-D(\d{8})-T(\d{6})(-\d+)?\.raw'
match = re.search(pattern, file_name)
if match:
date_str = match.group(2)
time_str = match.group(3)
date_str = match.group(1)
time_str = match.group(2)
creation_time = datetime.strptime(date_str + time_str, '%Y%m%d%H%M%S')
return creation_time

Expand Down

0 comments on commit bfa2091

Please sign in to comment.