Skip to content

Commit

Permalink
rerun fluxnet raw
Browse files Browse the repository at this point in the history
  • Loading branch information
Joseph Hamman committed Aug 18, 2020
1 parent 021156a commit b887552
Showing 1 changed file with 18 additions and 4 deletions.
22 changes: 18 additions & 4 deletions scripts/fluxnet/01_raw_to_parquet.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,26 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [],
"source": [
"zips = (workdir / \"fluxnet\").glob(\"*zip\")\n",
"\n",
"\n",
"def make_fname(stem):\n",
" p = stem.lower().split(\"_\")\n",
" if \"AUX\" in stem:\n",
" name = \"_\".join([p[1], *p[3:4]])\n",
" else:\n",
" name = \"_\".join([p[1], *p[3:5]])\n",
" return name\n",
"\n",
"\n",
"for zipfile in tqdm(zips):\n",
" print(zipfile)\n",
"\n",
Expand All @@ -82,9 +97,8 @@
"\n",
" for csv in csvs:\n",
" fname = pathlib.PosixPath(csv)\n",
" blob = (\n",
" blob\n",
" ) = f\"gcs://carbonplan-data/raw/fluxnet/{fname.stem.lower()}.parquet\"\n",
" name = make_fname(fname.stem)\n",
" blob = blob = f\"gcs://carbonplan-data/raw/fluxnet/{name}.parquet\"\n",
"\n",
" df = pd.read_csv(zipfs.open(csv, mode=\"rb\"))\n",
" ddf = dd.from_pandas(df, chunksize=1000).repartition(\n",
Expand Down

0 comments on commit b887552

Please sign in to comment.