diff --git a/scripts/fluxnet/01_raw_to_parquet.ipynb b/scripts/fluxnet/01_raw_to_parquet.ipynb index fea0da2..06dba0d 100644 --- a/scripts/fluxnet/01_raw_to_parquet.ipynb +++ b/scripts/fluxnet/01_raw_to_parquet.ipynb @@ -69,11 +69,26 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, "outputs": [], "source": [ "zips = (workdir / \"fluxnet\").glob(\"*zip\")\n", "\n", + "\n", + "def make_fname(stem):\n", + " p = stem.lower().split(\"_\")\n", + " if \"AUX\" in stem:\n", + " name = \"_\".join([p[1], *p[3:4]])\n", + " else:\n", + " name = \"_\".join([p[1], *p[3:5]])\n", + " return name\n", + "\n", + "\n", "for zipfile in tqdm(zips):\n", " print(zipfile)\n", "\n", @@ -82,9 +97,8 @@ "\n", " for csv in csvs:\n", " fname = pathlib.PosixPath(csv)\n", - " blob = (\n", - " blob\n", - " ) = f\"gcs://carbonplan-data/raw/fluxnet/{fname.stem.lower()}.parquet\"\n", + " name = make_fname(fname.stem)\n", + " blob = blob = f\"gcs://carbonplan-data/raw/fluxnet/{name}.parquet\"\n", "\n", " df = pd.read_csv(zipfs.open(csv, mode=\"rb\"))\n", " ddf = dd.from_pandas(df, chunksize=1000).repartition(\n",