Merge branch 'master' into vik/merge_CA-NL-LB_and_CA-NL-NF

electricitymaps · Jan 8, 2025 · bd91806 · bd91806
2 parents afcce23 + 9162cb5
commit bd91806
Show file tree

Hide file tree

Showing 16 changed files with 20,984 additions and 12,414 deletions.
diff --git a/config/zones/KR.yaml b/config/zones/KR.yaml
@@ -80,6 +80,7 @@ contributors:
   - alixunderplatz
   - IV1T3
   - gadakast
+  - consideRatio
 country: KR
 delays:
   production: 5

diff --git a/config/zones/MX.yaml b/config/zones/MX.yaml
@@ -55,6 +55,7 @@ capacity:
       value: 7320.0
 contributors:
   - scriptator
+  - consideRatio
 country: MX
 emissionFactors:
   direct:

diff --git a/parsers/CENACE.py b/parsers/CENACE.py
@@ -142,9 +142,18 @@ def fetch_csv_for_date(dt, session: Session | None = None):
 
     # cleanup and parse the data
     df.columns = df.columns.str.strip()
+
+    # transform 01-24 entries where 24 means 00 the next day
     df["Hora"] = df["Hora"].apply(lambda x: "00" if int(x) == 24 else f"{int(x):02d}")
     df["Dia"] = pd.to_datetime(df["Dia"], format="%d/%m/%Y")
     df.loc[df["Hora"] == "00", "Dia"] = df["Dia"] + pd.Timedelta(days=1)
+
+    # The hour column has been seen at least once (3rd Nov 2024) to include 1-25
+    # hours rather than the expected 1-24, due to this, we are for now dropping
+    # such entries if they show up
+    df = df.drop(df[df["Hora"] == "25"].index)
+
+    # create datetime objects
     df["Dia"] = df["Dia"].dt.strftime("%d/%m/%Y")
     df["instante"] = pd.to_datetime(df["Dia"] + " " + df["Hora"], format="%d/%m/%Y %H")
     df["instante"] = df["instante"].dt.tz_localize(TIMEZONE)

diff --git a/parsers/KPX.py b/parsers/KPX.py
@@ -27,23 +27,34 @@
     "https://new.kpx.or.kr/powerSource.es?mid=a10606030000&device=chart"
 )
 
+#### Classification of New & Renewable Energy Sources ####
+#
+# Source: https://cms.khnp.co.kr/eng/content/563/main.do?mnCd=EN040101
+# New energy: Hydrogen, Fuel Cell, Coal liquefied or gasified energy, and vacuum residue gasified energy, etc.
+# Renewable: Solar, Wind power, Water power, ocean energy, Geothermal, Bio energy, etc.
+#
 PRODUCTION_MAPPING = {
     "coal": "coal",
     "localCoal": "coal",
     "gas": "gas",
     "oil": "oil",
     "nuclearPower": "nuclear",
     "waterPower": "hydro",
+    "windPower": "wind",
     "sunlight": "solar",
     "newRenewable": "unknown",
 }
-
-STORAGE_MAPPING = {"raisingWater": "hydro"}
-
-#### Classification of New & Renewable Energy Sources ####
-# Source: https://cms.khnp.co.kr/eng/content/563/main.do?mnCd=EN040101
-# New energy: Hydrogen, Fuel Cell, Coal liquefied or gasified energy, and vacuum residue gasified energy, etc.
-# Renewable: Solar, Wind power, Water power, ocean energy, Geothermal, Bio energy, etc.
+STORAGE_MAPPING = {
+    "raisingWater": "hydro",
+}
+IGNORE_LIST = [
+    "ppa",
+    "btm",
+    "newRenewablePlusWindPower",
+    "once",
+    "regDate",
+    "seq",
+]
 
 
 @use_proxy(country_code="KR")
@@ -53,6 +64,7 @@ def fetch_consumption(
     target_datetime: datetime | None = None,
     logger: Logger = getLogger(__name__),
 ) -> list[dict]:
+    session = session or Session()
     if target_datetime:
         raise ParserException(
             "KPX.py",
@@ -61,28 +73,27 @@ def fetch_consumption(
         )
 
     logger.debug(f"Fetching consumption data from {REAL_TIME_URL}")
-    session = session or Session()
     response = session.get(REAL_TIME_URL, verify=False)
-    assert response.status_code == 200
+    assert response.ok
 
     soup = BeautifulSoup(response.text, "html.parser")
-    consumption_title = soup.find("th", string=re.compile(r"\s*현재부하\s*"))
-    consumption_val = float(
-        consumption_title.find_next_sibling().text.split()[0].replace(",", "")
-    )
 
-    consumption_date_list = soup.find("p", {"class": "info_top"}).text.split(" ")[:2]
-    consumption_date_list[0] = consumption_date_list[0].replace(".", "-").split("(")[0]
-    consumption_date = datetime.strptime(
-        " ".join(consumption_date_list), "%Y-%m-%d %H:%M"
-    ).replace(tzinfo=TIMEZONE)
+    # value_text looks like: 64,918 MW
+    value_text = soup.find("td", {"id": "load"}).text
+    value = float(value_text.split()[0].replace(",", ""))
+
+    # dt_text looks like: 2025.01.05(일) 23:10 새로고침
+    dt_text = soup.find("p", {"class": "info_top"}).text
+    dt_parts = dt_text.split(" ")[:2]
+    dt_string = dt_parts[0].split("(")[0] + " " + dt_parts[1]
+    dt = datetime.strptime(dt_string, "%Y.%m.%d %H:%M").replace(tzinfo=TIMEZONE)
 
     consumption_list = TotalConsumptionList(logger)
     consumption_list.append(
         zoneKey=zone_key,
-        datetime=consumption_date,
+        datetime=dt,
         source=KR_SOURCE,
-        consumption=consumption_val,
+        consumption=value,
     )
 
     return consumption_list.to_list()
@@ -96,6 +107,7 @@ def fetch_price(
     target_datetime: datetime | None = None,
     logger: Logger = getLogger(__name__),
 ) -> list[dict]:
+    session = session or Session()
     now = datetime.now(tz=TIMEZONE)
     target_datetime = (
         now if target_datetime is None else target_datetime.astimezone(TIMEZONE)
@@ -111,9 +123,8 @@ def fetch_price(
         )
 
     logger.debug(f"Fetching price data from {PRICE_URL}")
-    session = session or Session()
     response = session.get(PRICE_URL, verify=False)
-    assert response.status_code == 200
+    assert response.ok
 
     price_list = PriceList(logger)
 
@@ -154,15 +165,8 @@ def parse_chart_prod_data(
     production_list = ProductionBreakdownList(logger)
 
     # Extract object with data
-    data_source = re.search(r"var ictArr = (\[\{.+\}\]);", raw_data).group(1)
-    # Un-quoted keys ({key:"value"}) are valid JavaScript but not valid JSON (which requires {"key":"value"}).
-    # Will break if other keys than these are introduced. Alternatively, use a JSON5 library (JSON5 allows un-quoted keys)
-    data_source = re.sub(
-        r'"(localCoal|newRenewable|oil|once|gas|nuclearPower|coal|regDate|raisingWater|waterPower|seq)"',
-        r'"\1"',
-        data_source,
-    )
-    json_obj = json.loads(data_source)
+    json_string = re.search(r"var ictArr = (\[\{.+\}\]);", raw_data).group(1)
+    json_obj = json.loads(json_string)
 
     for item in json_obj:
         if item["regDate"] == "0":
@@ -175,7 +179,7 @@ def parse_chart_prod_data(
         production_mix = ProductionMix()
         storage_mix = StorageMix()
         for item_key, item_value in item.items():
-            if item_key == "regDate":
+            if item_key in IGNORE_LIST:
                 continue
             elif item_key in PRODUCTION_MAPPING:
                 production_mix.add_value(
@@ -235,8 +239,7 @@ def get_historical_prod_data(
 
     logger.debug(f"Fetching production data from {HISTORICAL_PRODUCTION_URL}")
     res = session.post(HISTORICAL_PRODUCTION_URL, payload)
-
-    assert res.status_code == 200
+    assert res.ok
 
     return parse_chart_prod_data(res.text, zone_key, logger)
 
@@ -249,6 +252,7 @@ def fetch_production(
     target_datetime: datetime | None = None,
     logger: Logger = getLogger(__name__),
 ) -> list[dict]:
+    session = session or Session()
     first_available_date = datetime(2021, 12, 22, 0, 0, 0, tzinfo=TIMEZONE)
     if target_datetime is not None and target_datetime < first_available_date:
         raise ParserException(
@@ -257,12 +261,10 @@ def fetch_production(
             zone_key,
         )
 
-    session = session or Session()
     if target_datetime is None:
         production_list = get_real_time_prod_data(
             zone_key=zone_key, session=session, logger=logger
         )
-
     else:
         production_list = get_historical_prod_data(
             zone_key=zone_key,