Skip to content

Commit

Permalink
Merge pull request #429 from nasaharvest/Corrective_labels_Uganda_Nor…
Browse files Browse the repository at this point in the history
…th_2019

Add_Data_Corrective_labels_Uganda_North_2019
  • Loading branch information
yashgadhiya10 authored Jan 29, 2025
2 parents 84ab610 + 280328c commit d6698bf
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 6 deletions.
6 changes: 3 additions & 3 deletions data/datasets.dvc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
outs:
- md5: a6eb46d364ecfbde721d79e5f545dcde.dir
size: 756013135
nfiles: 64
- md5: ffdbfcb7eb9735e7b485676b7d9af37a.dir
size: 770113901
nfiles: 65
path: datasets
hash: md5
6 changes: 3 additions & 3 deletions data/raw.dvc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
outs:
- md5: fa2e5d606695b1e4acb3e611d0a9d5d4.dir
size: 447402768
nfiles: 412
- md5: 288bc2945bb4453d99720766b16b5815.dir
size: 447498504
nfiles: 413
path: raw
hash: md5
8 changes: 8 additions & 0 deletions data/report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -537,3 +537,11 @@ eo_data_complete 1000
✔ training amount: 387, positive class: 1.3%
✔ validation amount: 294, positive class: 1.0%
✔ testing amount: 319, positive class: 0.3%



UgandaNorthCorLabel2019 (Timesteps: 24)
----------------------------------------------------------------------------
eo_data_complete 2631
eo_data_duplicate 1
✔ training amount: 2631, positive class: 44.6%
13 changes: 13 additions & 0 deletions datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,18 @@ def load_labels(self) -> pd.DataFrame:
return df


class UgandaNorthCorLabel2019(LabeledDataset):
def load_labels(self) -> pd.DataFrame:
raw_folder = raw_dir / "Uganda_North_2019_GEE_labels"
df = pd.read_csv(raw_folder / "Uganda_North_2019_GEE_labels.csv")
df.rename(columns={"lat": LAT, "long": LON}, inplace=True)
df = df.drop_duplicates(subset=[LAT, LON]).reset_index(drop=True)
df[CLASS_PROB] = (df["class_probability"] == 1).astype(int)
df[START], df[END] = date(2019, 1, 1), date(2020, 12, 31)
df[SUBSET] = "training"
return df


class KenyaCropArea2019(LabeledDataset):
def load_labels(self) -> pd.DataFrame:
raw_folder = raw_dir / "Kenya_Crop_Area_2019"
Expand Down Expand Up @@ -1637,6 +1649,7 @@ def load_labels(self) -> pd.DataFrame:
Uganda_NorthCEO2017(),
Uganda_NorthCEO2020(),
Uganda_NorthCEO2018(),
UgandaNorthCorLabel2019(),
]

if __name__ == "__main__":
Expand Down

0 comments on commit d6698bf

Please sign in to comment.