From cf3cc80c33b1d9c2058b0fd053ca2eac4925f7dc Mon Sep 17 00:00:00 2001 From: ttuff Date: Mon, 25 Mar 2024 18:49:32 +0000 Subject: [PATCH] reshape update --- reshape.ipynb | 540 ++++++++++++++++++++++++++++---------------------- 1 file changed, 302 insertions(+), 238 deletions(-) diff --git a/reshape.ipynb b/reshape.ipynb index 20d4954..911d6d8 100644 --- a/reshape.ipynb +++ b/reshape.ipynb @@ -2,10 +2,19 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "c501955a-4e56-40df-93e4-346c6e5ad935", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", @@ -23,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "id": "ebe8b0c5-0171-4140-a9d2-e038fa3239d4", "metadata": {}, "outputs": [], @@ -64,7 +73,7 @@ "\n", "# Example usage:\n", "# Replace 'your_envi_file_path' with the actual path to your ENVI file\n", - "raster_path = \"NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200807_155314_reflectance/NEON_D13_NIWO_DP1_20200807_155314_reflectanceNEON_D13_NIWO_DP1_20200807_155314_reflectance__envi\" # Update this to your actual raster file path\n", + "raster_path = \"NIWOT_calibration_flight_08_2020/NEON_D13_NIWO_DP1_20200801_161441_reflectance/NEON_D13_NIWO_DP1_20200801_161441_reflectance\" # Update this to your actual raster file path\n", "processor = ENVIProcessor(raster_path)\n", "chunk = processor.get_chunk_from_extent(corrections=['some_correction'], resample=False)\n", "\n" @@ -82,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "id": "1039151f-97ae-48b2-b757-efa640080906", "metadata": {}, "outputs": [], @@ -127,7 +136,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "id": "e7f55111-3cb3-4b5d-a3bd-d7cf588ce1ef", "metadata": {}, "outputs": [ @@ -178,121 +187,121 @@ " \n", " \n", " 0\n", - " 0.797209\n", - " 0.314943\n", - " 0.515625\n", - " 0.289574\n", - " 0.567998\n", - " 0.711330\n", - " 0.985263\n", - " 0.488050\n", - " 0.669980\n", - " 0.047356\n", + " 0.687524\n", + " 0.304722\n", + " 0.419995\n", + " 0.259451\n", + " 0.507092\n", + " 0.323172\n", + " 0.403418\n", + " 0.410082\n", + " 0.041743\n", + " 0.759389\n", " ...\n", - " 0.251188\n", - " 0.565922\n", - " 0.188862\n", - " 0.551562\n", - " 0.465831\n", - " 0.023405\n", - " 0.336994\n", - " 0.450468\n", + " 0.382299\n", + " 0.014212\n", + " 0.008055\n", + " 0.713550\n", + " 0.675177\n", + " 0.513762\n", + " 0.248871\n", + " 0.072115\n", " 0\n", " 0\n", " \n", " \n", " 1\n", - " 0.936276\n", - " 0.837195\n", - " 0.695625\n", - " 0.631541\n", - " 0.701025\n", - " 0.960226\n", - " 0.141791\n", - " 0.381211\n", - " 0.114170\n", - " 0.788448\n", + " 0.718638\n", + " 0.490737\n", + " 0.360811\n", + " 0.531658\n", + " 0.511925\n", + " 0.502386\n", + " 0.924304\n", + " 0.908558\n", + " 0.062361\n", + " 0.543072\n", " ...\n", - " 0.230309\n", - " 0.182223\n", - " 0.290250\n", - " 0.975250\n", - " 0.227430\n", - " 0.175060\n", - " 0.567754\n", - " 0.865518\n", + " 0.448249\n", + " 0.347809\n", + " 0.912210\n", + " 0.629690\n", + " 0.046711\n", + " 0.932032\n", + " 0.579875\n", + " 0.617307\n", " 0\n", " 1\n", " \n", " \n", " 2\n", - " 0.283778\n", - " 0.107825\n", - " 0.300096\n", - " 0.302607\n", - " 0.572963\n", - " 0.244061\n", - " 0.893596\n", - " 0.930337\n", - " 0.607978\n", - " 0.133053\n", + " 0.911718\n", + " 0.410745\n", + " 0.934476\n", + " 0.256145\n", + " 0.123791\n", + " 0.206306\n", + " 0.282671\n", + " 0.817732\n", + " 0.919495\n", + " 0.264951\n", " ...\n", - " 0.597720\n", - " 0.048488\n", - " 0.564245\n", - " 0.728381\n", - " 0.535179\n", - " 0.961610\n", - " 0.464103\n", - " 0.565047\n", + " 0.284105\n", + " 0.207181\n", + " 0.075888\n", + " 0.470861\n", + " 0.049957\n", + " 0.371652\n", + " 0.781437\n", + " 0.655946\n", " 0\n", " 2\n", " \n", " \n", " 3\n", - " 0.457389\n", - " 0.719769\n", - " 0.118473\n", - " 0.407864\n", - " 0.979559\n", - " 0.097455\n", - " 0.728281\n", - " 0.564473\n", - " 0.634733\n", - " 0.200184\n", + " 0.405207\n", + " 0.791517\n", + " 0.936675\n", + " 0.433172\n", + " 0.866013\n", + " 0.461116\n", + " 0.322686\n", + " 0.013696\n", + " 0.010301\n", + " 0.600920\n", " ...\n", - " 0.239004\n", - " 0.365207\n", - " 0.195417\n", - " 0.237427\n", - " 0.699486\n", - " 0.205661\n", - " 0.734998\n", - " 0.589876\n", + " 0.764502\n", + " 0.966043\n", + " 0.137867\n", + " 0.466293\n", + " 0.657372\n", + " 0.682398\n", + " 0.611508\n", + " 0.221807\n", " 0\n", " 3\n", " \n", " \n", " 4\n", - " 0.366526\n", - " 0.529941\n", - " 0.417289\n", - " 0.847633\n", - " 0.193126\n", - " 0.982161\n", - " 0.822412\n", - " 0.445544\n", - " 0.044492\n", - " 0.017709\n", + " 0.063385\n", + " 0.777884\n", + " 0.941228\n", + " 0.666460\n", + " 0.575846\n", + " 0.827420\n", + " 0.859770\n", + " 0.105069\n", + " 0.319631\n", + " 0.696536\n", " ...\n", - " 0.742343\n", - " 0.772829\n", - " 0.340681\n", - " 0.146870\n", - " 0.946994\n", - " 0.066265\n", - " 0.499453\n", - " 0.388927\n", + " 0.038016\n", + " 0.638541\n", + " 0.814261\n", + " 0.258037\n", + " 0.573052\n", + " 0.936515\n", + " 0.425533\n", + " 0.659791\n", " 0\n", " 4\n", " \n", @@ -322,121 +331,121 @@ " \n", " \n", " 11483273\n", - " 0.152260\n", - " 0.766855\n", - " 0.169944\n", - " 0.878360\n", - " 0.383142\n", - " 0.539870\n", - " 0.985659\n", - " 0.650074\n", - " 0.760505\n", - " 0.952661\n", + " 0.762910\n", + " 0.475233\n", + " 0.448766\n", + " 0.360571\n", + " 0.277480\n", + " 0.073007\n", + " 0.423881\n", + " 0.083852\n", + " 0.307329\n", + " 0.339650\n", " ...\n", - " 0.317700\n", - " 0.078385\n", - " 0.648448\n", - " 0.056393\n", - " 0.945091\n", - " 0.665414\n", - " 0.529980\n", - " 0.593835\n", + " 0.202143\n", + " 0.914050\n", + " 0.561372\n", + " 0.528521\n", + " 0.656066\n", + " 0.789577\n", + " 0.642838\n", + " 0.791538\n", " 11137\n", " 1026\n", " \n", " \n", " 11483274\n", - " 0.345999\n", - " 0.479799\n", - " 0.496870\n", - " 0.479557\n", - " 0.999687\n", - " 0.157588\n", - " 0.562706\n", - " 0.481475\n", - " 0.395999\n", - " 0.912812\n", + " 0.517250\n", + " 0.058740\n", + " 0.460863\n", + " 0.808285\n", + " 0.752132\n", + " 0.039975\n", + " 0.165822\n", + " 0.996934\n", + " 0.440954\n", + " 0.302505\n", " ...\n", - " 0.514387\n", - " 0.211770\n", - " 0.179397\n", - " 0.664200\n", - " 0.767492\n", - " 0.368559\n", - " 0.885074\n", - " 0.608405\n", + " 0.252671\n", + " 0.592800\n", + " 0.900815\n", + " 0.566804\n", + " 0.788629\n", + " 0.046903\n", + " 0.734878\n", + " 0.644062\n", " 11137\n", " 1027\n", " \n", " \n", " 11483275\n", - " 0.748482\n", - " 0.771680\n", - " 0.299341\n", - " 0.296593\n", - " 0.102600\n", - " 0.957731\n", - " 0.038437\n", - " 0.549724\n", - " 0.076379\n", - " 0.128406\n", + " 0.119028\n", + " 0.988765\n", + " 0.295493\n", + " 0.011601\n", + " 0.173720\n", + " 0.642613\n", + " 0.409197\n", + " 0.529874\n", + " 0.447891\n", + " 0.788607\n", " ...\n", - " 0.251922\n", - " 0.673968\n", - " 0.346867\n", - " 0.888135\n", - " 0.471597\n", - " 0.753125\n", - " 0.630612\n", - " 0.409271\n", + " 0.692102\n", + " 0.625139\n", + " 0.715138\n", + " 0.082538\n", + " 0.059340\n", + " 0.858740\n", + " 0.892223\n", + " 0.610468\n", " 11137\n", " 1028\n", " \n", " \n", " 11483276\n", - " 0.526175\n", - " 0.401877\n", - " 0.607318\n", - " 0.477941\n", - " 0.549137\n", - " 0.047495\n", - " 0.769956\n", - " 0.422864\n", - " 0.126572\n", - " 0.073549\n", + " 0.504178\n", + " 0.161125\n", + " 0.412866\n", + " 0.960469\n", + " 0.941566\n", + " 0.294474\n", + " 0.123558\n", + " 0.640859\n", + " 0.229713\n", + " 0.782693\n", " ...\n", - " 0.406933\n", - " 0.387700\n", - " 0.204090\n", - " 0.547156\n", - " 0.558324\n", - " 0.079974\n", - " 0.169205\n", - " 0.287978\n", + " 0.919020\n", + " 0.741212\n", + " 0.643234\n", + " 0.022654\n", + " 0.111536\n", + " 0.840001\n", + " 0.001191\n", + " 0.003219\n", " 11137\n", " 1029\n", " \n", " \n", " 11483277\n", - " 0.632683\n", - " 0.266278\n", - " 0.919380\n", - " 0.899299\n", - " 0.806271\n", - " 0.843766\n", - " 0.107567\n", - " 0.909166\n", - " 0.068002\n", - " 0.097232\n", + " 0.155903\n", + " 0.951039\n", + " 0.251885\n", + " 0.919702\n", + " 0.133986\n", + " 0.866848\n", + " 0.172710\n", + " 0.682367\n", + " 0.706197\n", + " 0.993416\n", " ...\n", - " 0.000711\n", - " 0.474354\n", - " 0.458854\n", - " 0.794206\n", - " 0.908586\n", - " 0.380833\n", - " 0.938612\n", - " 0.554012\n", + " 0.823101\n", + " 0.724665\n", + " 0.401648\n", + " 0.135023\n", + " 0.350144\n", + " 0.957360\n", + " 0.173104\n", + " 0.677269\n", " 11137\n", " 1030\n", " \n", @@ -447,43 +456,43 @@ ], "text/plain": [ " Band_1 Band_2 Band_3 Band_4 Band_5 Band_6 \\\n", - "0 0.797209 0.314943 0.515625 0.289574 0.567998 0.711330 \n", - "1 0.936276 0.837195 0.695625 0.631541 0.701025 0.960226 \n", - "2 0.283778 0.107825 0.300096 0.302607 0.572963 0.244061 \n", - "3 0.457389 0.719769 0.118473 0.407864 0.979559 0.097455 \n", - "4 0.366526 0.529941 0.417289 0.847633 0.193126 0.982161 \n", + "0 0.687524 0.304722 0.419995 0.259451 0.507092 0.323172 \n", + "1 0.718638 0.490737 0.360811 0.531658 0.511925 0.502386 \n", + "2 0.911718 0.410745 0.934476 0.256145 0.123791 0.206306 \n", + "3 0.405207 0.791517 0.936675 0.433172 0.866013 0.461116 \n", + "4 0.063385 0.777884 0.941228 0.666460 0.575846 0.827420 \n", "... ... ... ... ... ... ... \n", - "11483273 0.152260 0.766855 0.169944 0.878360 0.383142 0.539870 \n", - "11483274 0.345999 0.479799 0.496870 0.479557 0.999687 0.157588 \n", - "11483275 0.748482 0.771680 0.299341 0.296593 0.102600 0.957731 \n", - "11483276 0.526175 0.401877 0.607318 0.477941 0.549137 0.047495 \n", - "11483277 0.632683 0.266278 0.919380 0.899299 0.806271 0.843766 \n", + "11483273 0.762910 0.475233 0.448766 0.360571 0.277480 0.073007 \n", + "11483274 0.517250 0.058740 0.460863 0.808285 0.752132 0.039975 \n", + "11483275 0.119028 0.988765 0.295493 0.011601 0.173720 0.642613 \n", + "11483276 0.504178 0.161125 0.412866 0.960469 0.941566 0.294474 \n", + "11483277 0.155903 0.951039 0.251885 0.919702 0.133986 0.866848 \n", "\n", " Band_7 Band_8 Band_9 Band_10 ... Band_419 Band_420 \\\n", - "0 0.985263 0.488050 0.669980 0.047356 ... 0.251188 0.565922 \n", - "1 0.141791 0.381211 0.114170 0.788448 ... 0.230309 0.182223 \n", - "2 0.893596 0.930337 0.607978 0.133053 ... 0.597720 0.048488 \n", - "3 0.728281 0.564473 0.634733 0.200184 ... 0.239004 0.365207 \n", - "4 0.822412 0.445544 0.044492 0.017709 ... 0.742343 0.772829 \n", + "0 0.403418 0.410082 0.041743 0.759389 ... 0.382299 0.014212 \n", + "1 0.924304 0.908558 0.062361 0.543072 ... 0.448249 0.347809 \n", + "2 0.282671 0.817732 0.919495 0.264951 ... 0.284105 0.207181 \n", + "3 0.322686 0.013696 0.010301 0.600920 ... 0.764502 0.966043 \n", + "4 0.859770 0.105069 0.319631 0.696536 ... 0.038016 0.638541 \n", "... ... ... ... ... ... ... ... \n", - "11483273 0.985659 0.650074 0.760505 0.952661 ... 0.317700 0.078385 \n", - "11483274 0.562706 0.481475 0.395999 0.912812 ... 0.514387 0.211770 \n", - "11483275 0.038437 0.549724 0.076379 0.128406 ... 0.251922 0.673968 \n", - "11483276 0.769956 0.422864 0.126572 0.073549 ... 0.406933 0.387700 \n", - "11483277 0.107567 0.909166 0.068002 0.097232 ... 0.000711 0.474354 \n", + "11483273 0.423881 0.083852 0.307329 0.339650 ... 0.202143 0.914050 \n", + "11483274 0.165822 0.996934 0.440954 0.302505 ... 0.252671 0.592800 \n", + "11483275 0.409197 0.529874 0.447891 0.788607 ... 0.692102 0.625139 \n", + "11483276 0.123558 0.640859 0.229713 0.782693 ... 0.919020 0.741212 \n", + "11483277 0.172710 0.682367 0.706197 0.993416 ... 0.823101 0.724665 \n", "\n", " Band_421 Band_422 Band_423 Band_424 Band_425 Band_426 \\\n", - "0 0.188862 0.551562 0.465831 0.023405 0.336994 0.450468 \n", - "1 0.290250 0.975250 0.227430 0.175060 0.567754 0.865518 \n", - "2 0.564245 0.728381 0.535179 0.961610 0.464103 0.565047 \n", - "3 0.195417 0.237427 0.699486 0.205661 0.734998 0.589876 \n", - "4 0.340681 0.146870 0.946994 0.066265 0.499453 0.388927 \n", + "0 0.008055 0.713550 0.675177 0.513762 0.248871 0.072115 \n", + "1 0.912210 0.629690 0.046711 0.932032 0.579875 0.617307 \n", + "2 0.075888 0.470861 0.049957 0.371652 0.781437 0.655946 \n", + "3 0.137867 0.466293 0.657372 0.682398 0.611508 0.221807 \n", + "4 0.814261 0.258037 0.573052 0.936515 0.425533 0.659791 \n", "... ... ... ... ... ... ... \n", - "11483273 0.648448 0.056393 0.945091 0.665414 0.529980 0.593835 \n", - "11483274 0.179397 0.664200 0.767492 0.368559 0.885074 0.608405 \n", - "11483275 0.346867 0.888135 0.471597 0.753125 0.630612 0.409271 \n", - "11483276 0.204090 0.547156 0.558324 0.079974 0.169205 0.287978 \n", - "11483277 0.458854 0.794206 0.908586 0.380833 0.938612 0.554012 \n", + "11483273 0.561372 0.528521 0.656066 0.789577 0.642838 0.791538 \n", + "11483274 0.900815 0.566804 0.788629 0.046903 0.734878 0.644062 \n", + "11483275 0.715138 0.082538 0.059340 0.858740 0.892223 0.610468 \n", + "11483276 0.643234 0.022654 0.111536 0.840001 0.001191 0.003219 \n", + "11483277 0.401648 0.135023 0.350144 0.957360 0.173104 0.677269 \n", "\n", " Pixel_Row Pixel_Col \n", "0 0 0 \n", @@ -501,7 +510,7 @@ "[11483278 rows x 428 columns]" ] }, - "execution_count": 3, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -527,36 +536,31 @@ "source": [ "import numpy as np\n", "\n", - "def flatten_and_melt_array_to_structured_array(array):\n", + "def iterate_flatten_melt_array(array):\n", " \"\"\"\n", - " Flattens a 3D numpy array to a structured numpy array in the \"melted\" format.\n", + " Generator to iterate over a 3D numpy array and yield \"melted\" data.\n", " \n", " Parameters:\n", " - array: A 3D numpy array of shape (bands, rows, cols).\n", " \n", - " Returns:\n", - " - A structured numpy array with 'Pixel_Row', 'Pixel_Col', 'Band_ID', and 'Wavelength' fields.\n", + " Yields:\n", + " - Tuple of (Pixel_Row, Pixel_Col, Band_ID, Wavelength) for each pixel-band combination.\n", " \"\"\"\n", " bands, rows, cols = array.shape\n", - " total_pixels = rows * cols\n", - " dtype = [('Pixel_Row', int), ('Pixel_Col', int), ('Band_ID', 'U10'), ('Wavelength', array.dtype)]\n", - " \n", - " # Create an empty structured array\n", - " structured_array = np.zeros(total_pixels * bands, dtype=dtype)\n", " \n", - " # Populate the structured array\n", - " counter = 0\n", - " for row in range(rows):\n", - " for col in range(cols):\n", - " for band in range(bands):\n", - " structured_array[counter] = (row, col, f'Band_{band+1}', array[band, row, col])\n", - " counter += 1\n", - " \n", - " return structured_array\n", + " for band in range(bands):\n", + " for row in range(rows):\n", + " for col in range(cols):\n", + " yield (row, col, f'Band_{band+1}', array[band, row, col])\n", "\n", "# Example usage\n", - "chunk = np.random.rand(426, 11138, 1031) # Example array, replace with your actual data\n", - "melted_array = flatten_and_melt_array_to_structured_array(chunk)\n" + "chunk = np.random.rand(426, 11138, 1031) # Replace with your actual data\n", + "\n", + "# To demonstrate or test the generator, you can iterate through a small portion of it\n", + "for i, data_point in enumerate(iterate_flatten_melt_array(chunk)):\n", + " print(data_point)\n", + " if i > 100: # Adjust this condition to control how many items you want to print\n", + " break\n" ] }, { @@ -566,8 +570,68 @@ "metadata": {}, "outputs": [], "source": [ - "melted_array" + "import csv\n", + "\n", + "# Open a CSV file for writing\n", + "with open('melted_data.csv', 'w', newline='') as file:\n", + " writer = csv.writer(file)\n", + " writer.writerow(['Pixel_Row', 'Pixel_Col', 'Band_ID', 'Wavelength']) # Write header\n", + "\n", + " # Write each data point\n", + " for data_point in iterate_flatten_melt_array(chunk):\n", + " writer.writerow(data_point)\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "def batch_flatten_melt_array(array, batch_size=1000000):\n", + " \"\"\"\n", + " Generator to iterate over a 3D numpy array and yield batches of \"melted\" data.\n", + " \n", + " Parameters:\n", + " - array: A 3D numpy array of shape (bands, rows, cols).\n", + " - batch_size: The number of rows in each batch.\n", + " \n", + " Yields:\n", + " - A DataFrame containing a batch of melted data.\n", + " \"\"\"\n", + " bands, rows, cols = array.shape\n", + " total_pixels = rows * cols\n", + " num_batches = (total_pixels + batch_size - 1) // batch_size # Ceiling division to get the number of batches\n", + " \n", + " for batch in range(num_batches):\n", + " batch_data = []\n", + " start_index = batch * batch_size\n", + " end_index = min(start_index + batch_size, total_pixels)\n", + " \n", + " for index in range(start_index, end_index):\n", + " row = index // cols\n", + " col = index % cols\n", + " for band in range(bands):\n", + " batch_data.append((row, col, f'Band_{band+1}', array[band, row, col]))\n", + " \n", + " batch_df = pd.DataFrame(batch_data, columns=['Pixel_Row', 'Pixel_Col', 'Band_ID', 'Wavelength'])\n", + " yield batch_df\n", + "\n", + "# Example usage\n", + "chunk = np.random.rand(426, 11138, 1031) # Replace with your actual data\n", + "\n", + "# Iterate through each batch and process\n", + "for i, batch_df in enumerate(batch_flatten_melt_array(chunk)):\n", + " print(f\"Processing batch {i+1}\")\n", + " # Process the batch_df here\n", + " # For example, you could save each batch to a separate CSV file\n", + " batch_df.to_csv(f'melted_data_batch_{i+1}.csv', index=False)\n", + " if i == 0: # For demonstration, break after processing the first batch\n", + " break\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "551e6f3c-68ea-446a-806d-d789ab16d2fe", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {