From f12830c2c57ad4bb09804d8e724f300d9255c0ea Mon Sep 17 00:00:00 2001
From: Zosia Borowska <zofia.anna.borowska@gmail.com>
Date: Wed, 29 Nov 2023 18:16:22 +0000
Subject: [PATCH 1/6] Reformatting the raw data tables for readability

---
 src/pystatis/table.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/src/pystatis/table.py b/src/pystatis/table.py
index 071fc93..ad0ea3b 100644
--- a/src/pystatis/table.py
+++ b/src/pystatis/table.py
@@ -41,9 +41,40 @@ def get_data(self, area: str = "all", **kwargs):
         self.raw_data = raw_data
         data_str = StringIO(raw_data)
         self.data = pd.read_csv(data_str, sep=";")
+        self.nice_data = format_table(self.data)
 
         metadata = load_data(
             endpoint="metadata", method="table", params=params, as_json=True
         )
         assert isinstance(metadata, dict)  # nosec assert_used
         self.metadata = metadata
+
+def format_table(data: pd.DataFrame, 
+                ) -> pd.DataFrame:
+    """Format the raw data into a more readable table
+    
+    Args:
+        data (pd.DataFrame): A pandas dataframe created with get_data()
+    
+    Returns:
+        pd.DataFrame: Formatted dataframe that omits all CODE columns and gives 
+        infromative columns names.
+    """
+    time_name, = data["Zeit_Label"].unique() # Time label (usually Jahr) 
+    time_values = data["Zeit"]
+
+    merkmal_labels = data.filter(like="Merkmal_Label").columns
+    indep_names = [data[name].unique()[0] for name in merkmal_labels] # list of column names from Merkmal_Label
+
+    auspraegung_labels = data.filter(like="Auspraegung_Label").columns
+    indep_values = [data[name] for name in auspraegung_labels] # list of data from Ausgepragung_Label
+
+    dep_values = data.loc[:,auspraegung_labels[-1]:].iloc[:,1:] # get all columns after last Auspraegung column
+    dep_names = [" ".join(name.split('_')[1:]) 
+                    for name in dep_values.columns] # splits strings in column names for readability
+
+    nice_dict = {time_name:time_values, 
+                    **dict(zip(indep_names, indep_values)), 
+                    **dict(zip(dep_names, dep_values.values.T))}
+    nice_data = pd.DataFrame(nice_dict)
+    return nice_data
\ No newline at end of file

From 3363b5e8202bbac9f2ad5aa276a495f4e0ee2ee6 Mon Sep 17 00:00:00 2001
From: Zosia Borowska <zofia.anna.borowska@gmail.com>
Date: Wed, 29 Nov 2023 18:25:19 +0000
Subject: [PATCH 2/6] Adding comments

---
 src/pystatis/table.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pystatis/table.py b/src/pystatis/table.py
index ad0ea3b..7bbb1dc 100644
--- a/src/pystatis/table.py
+++ b/src/pystatis/table.py
@@ -58,9 +58,9 @@ def format_table(data: pd.DataFrame,
     
     Returns:
         pd.DataFrame: Formatted dataframe that omits all CODE columns and gives 
-        infromative columns names.
+        informative columns names.
     """
-    time_name, = data["Zeit_Label"].unique() # Time label (usually Jahr) 
+    time_name, = data["Zeit_Label"].unique()
     time_values = data["Zeit"]
 
     merkmal_labels = data.filter(like="Merkmal_Label").columns

From 59650f6ef13a24898df62a9f8e53d097b77ede50 Mon Sep 17 00:00:00 2001
From: Zosia Borowska <zofia.anna.borowska@gmail.com>
Date: Mon, 15 Jan 2024 15:33:02 +0000
Subject: [PATCH 3/6] Applied suggested changes and run code formatting

---
 src/pystatis/table.py | 57 +++++++++++++++++++++++--------------------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/src/pystatis/table.py b/src/pystatis/table.py
index 7bbb1dc..ed374e3 100644
--- a/src/pystatis/table.py
+++ b/src/pystatis/table.py
@@ -22,13 +22,14 @@ def __init__(self, name: str):
         self.data = pd.DataFrame()
         self.metadata: dict = {}
 
-    def get_data(self, area: str = "all", **kwargs):
+    def get_data(self, area: str = "all", prettify: bool = True, **kwargs):
         """Downloads raw data and metadata from GENESIS-Online.
 
         Additional keyword arguments are passed on to the GENESIS-Online GET request for tablefile.
 
         Args:
             area (str, optional): Area to search for the object in GENESIS-Online. Defaults to "all".
+            prettify (bool, optional): Reformats the table into a readable format. Defaults to True.
         """
         params = {"name": self.name, "area": area, "format": "ffcsv"}
 
@@ -41,7 +42,8 @@ def get_data(self, area: str = "all", **kwargs):
         self.raw_data = raw_data
         data_str = StringIO(raw_data)
         self.data = pd.read_csv(data_str, sep=";")
-        self.nice_data = format_table(self.data)
+        if prettify:
+            self.data = self.prettify_table(self.data)
 
         metadata = load_data(
             endpoint="metadata", method="table", params=params, as_json=True
@@ -49,32 +51,33 @@ def get_data(self, area: str = "all", **kwargs):
         assert isinstance(metadata, dict)  # nosec assert_used
         self.metadata = metadata
 
-def format_table(data: pd.DataFrame, 
-                ) -> pd.DataFrame:
-    """Format the raw data into a more readable table
-    
-    Args:
-        data (pd.DataFrame): A pandas dataframe created with get_data()
-    
-    Returns:
-        pd.DataFrame: Formatted dataframe that omits all CODE columns and gives 
-        informative columns names.
-    """
-    time_name, = data["Zeit_Label"].unique()
-    time_values = data["Zeit"]
+    @staticmethod
+    def prettify_table(data: pd.DataFrame) -> pd.DataFrame:
+        """Reformat the data into a more readable table
+
+        Args:
+            data (pd.DataFrame): A pandas dataframe created from raw_data
+
+        Returns:
+            pd.DataFrame: Formatted dataframe that omits all unnecessary Code columns
+            and includes informative columns names
+        """
+        # Extracts time column with name from first element of Zeit_Label column
+        time = pd.DataFrame({data["Zeit_Label"].iloc[0]: data["Zeit"]})
 
-    merkmal_labels = data.filter(like="Merkmal_Label").columns
-    indep_names = [data[name].unique()[0] for name in merkmal_labels] # list of column names from Merkmal_Label
+        # Extracts new column names from first values of the Merkmal_Label columns
+        # and assigns these to the relevant attribute columns (Auspraegung_Label)
+        attributes = data.filter(like="Auspraegung_Label")
+        attributes.columns = data.filter(like="Merkmal_Label").iloc[0].tolist()
 
-    auspraegung_labels = data.filter(like="Auspraegung_Label").columns
-    indep_values = [data[name] for name in auspraegung_labels] # list of data from Ausgepragung_Label
+        # Selects all columns containing the values
+        values = data.filter(like="__")
 
-    dep_values = data.loc[:,auspraegung_labels[-1]:].iloc[:,1:] # get all columns after last Auspraegung column
-    dep_names = [" ".join(name.split('_')[1:]) 
-                    for name in dep_values.columns] # splits strings in column names for readability
+        # Given a name like BEV036__Bevoelkerung_in_Hauptwohnsitzhaushalten__1000
+        # extracts the readable label and omit both the code and the unit
+        values.columns = [
+            " ".join(name.split("_")[1:-1]) for name in values.columns
+        ]
 
-    nice_dict = {time_name:time_values, 
-                    **dict(zip(indep_names, indep_values)), 
-                    **dict(zip(dep_names, dep_values.values.T))}
-    nice_data = pd.DataFrame(nice_dict)
-    return nice_data
\ No newline at end of file
+        pretty_data = pd.concat([time, attributes, values], axis=1)
+        return pretty_data

From 92f7b16ed79dd0e67655ed38ca2f184cf1b0c8bf Mon Sep 17 00:00:00 2001
From: Michael Aydinbas <michael.aydinbas@new-work.se>
Date: Mon, 29 Jan 2024 15:41:59 +0100
Subject: [PATCH 4/6] add tests for Table

---
 src/pystatis/table.py |  7 ++++---
 tests/test_config.py  |  1 -
 tests/test_db.py      |  2 --
 tests/test_table.py   | 46 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 50 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_table.py

diff --git a/src/pystatis/table.py b/src/pystatis/table.py
index ed374e3..6b644d3 100644
--- a/src/pystatis/table.py
+++ b/src/pystatis/table.py
@@ -39,9 +39,11 @@ def get_data(self, area: str = "all", prettify: bool = True, **kwargs):
             endpoint="data", method="tablefile", params=params, as_json=False
         )
         assert isinstance(raw_data, str)  # nosec assert_used
+
         self.raw_data = raw_data
         data_str = StringIO(raw_data)
         self.data = pd.read_csv(data_str, sep=";")
+
         if prettify:
             self.data = self.prettify_table(self.data)
 
@@ -49,6 +51,7 @@ def get_data(self, area: str = "all", prettify: bool = True, **kwargs):
             endpoint="metadata", method="table", params=params, as_json=True
         )
         assert isinstance(metadata, dict)  # nosec assert_used
+
         self.metadata = metadata
 
     @staticmethod
@@ -75,9 +78,7 @@ def prettify_table(data: pd.DataFrame) -> pd.DataFrame:
 
         # Given a name like BEV036__Bevoelkerung_in_Hauptwohnsitzhaushalten__1000
         # extracts the readable label and omit both the code and the unit
-        values.columns = [
-            " ".join(name.split("_")[1:-1]) for name in values.columns
-        ]
+        values.columns = [name.split("__")[1] for name in values.columns]
 
         pretty_data = pd.concat([time, attributes, values], axis=1)
         return pretty_data
diff --git a/tests/test_config.py b/tests/test_config.py
index 138deb7..6c05b49 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,4 +1,3 @@
-import copy
 import os
 from configparser import ConfigParser
 from pathlib import Path
diff --git a/tests/test_db.py b/tests/test_db.py
index 6840704..1a0f96b 100644
--- a/tests/test_db.py
+++ b/tests/test_db.py
@@ -1,10 +1,8 @@
-import logging
 from configparser import ConfigParser
 
 import pytest
 
 from pystatis import config, db
-from pystatis.exception import PystatisConfigError
 
 
 @pytest.fixture()
diff --git a/tests/test_table.py b/tests/test_table.py
new file mode 100644
index 0000000..2b5b467
--- /dev/null
+++ b/tests/test_table.py
@@ -0,0 +1,46 @@
+import pandas as pd
+import pytest
+
+import pystatis
+
+EASY_TABLE = """Statistik_Code;Statistik_Label;Zeit_Code;Zeit_Label;Zeit;1_Merkmal_Code;1_Merkmal_Label;1_Auspraegung_Code;1_Auspraegung_Label;     FLC006__Gebietsflaeche__qkm
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;08;Baden-Württemberg;35747,85
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;09;Bayern;70541,58
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;11;Berlin;891,12
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;12;Brandenburg;29654,38
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;04;Bremen;419,61
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;02;Hamburg;755,09
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;06;Hessen;21115,62
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;13;Mecklenburg-Vorpommern;23294,90
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;03;Niedersachsen;47709,90
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;05;Nordrhein-Westfalen;34112,72
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;07;Rheinland-Pfalz;19857,97
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;10;Saarland;2571,52
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;14;Sachsen;18449,86
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;15;Sachsen-Anhalt;20467,20
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;01;Schleswig-Holstein;15804,30
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;16;Thüringen;16202,37
+11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;;Insgesamt;357595,99"""
+
+
+def test_get_data(mocker):
+    mocker.patch("pystatis.http_helper.load_data", return_value=EASY_TABLE)
+    table = pystatis.Table(name="11111-0001")
+    table.get_data(prettify=False)
+    assert table.data.shape == (17, 10)
+    assert isinstance(table.data, pd.DataFrame)
+    assert not table.data.empty
+    assert isinstance(table.raw_data, str)
+    assert table.raw_data != ""
+
+
+def test_prettify(mocker):
+    mocker.patch("pystatis.http_helper.load_data", return_value=EASY_TABLE)
+    table = pystatis.Table(name="11111-0001")
+    table.get_data(prettify=True)
+    assert table.data.shape == (17, 3)
+    assert table.data.columns.to_list() == [
+        "Stichtag",
+        "Bundesländer",
+        "Gebietsflaeche",
+    ]

From 6a798db60ebc0b0ea9e8906294348048021ea88c Mon Sep 17 00:00:00 2001
From: Michael Aydinbas <michael.aydinbas@new-work.se>
Date: Mon, 29 Jan 2024 15:50:32 +0100
Subject: [PATCH 5/6] fix mocker patches in test_table

---
 src/pystatis/table.py | 2 --
 tests/test_table.py   | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/pystatis/table.py b/src/pystatis/table.py
index 6b644d3..2136d22 100644
--- a/src/pystatis/table.py
+++ b/src/pystatis/table.py
@@ -38,7 +38,6 @@ def get_data(self, area: str = "all", prettify: bool = True, **kwargs):
         raw_data = load_data(
             endpoint="data", method="tablefile", params=params, as_json=False
         )
-        assert isinstance(raw_data, str)  # nosec assert_used
 
         self.raw_data = raw_data
         data_str = StringIO(raw_data)
@@ -50,7 +49,6 @@ def get_data(self, area: str = "all", prettify: bool = True, **kwargs):
         metadata = load_data(
             endpoint="metadata", method="table", params=params, as_json=True
         )
-        assert isinstance(metadata, dict)  # nosec assert_used
 
         self.metadata = metadata
 
diff --git a/tests/test_table.py b/tests/test_table.py
index 2b5b467..966c586 100644
--- a/tests/test_table.py
+++ b/tests/test_table.py
@@ -24,7 +24,7 @@
 
 
 def test_get_data(mocker):
-    mocker.patch("pystatis.http_helper.load_data", return_value=EASY_TABLE)
+    mocker.patch("pystatis.table.load_data", return_value=EASY_TABLE)
     table = pystatis.Table(name="11111-0001")
     table.get_data(prettify=False)
     assert table.data.shape == (17, 10)
@@ -35,7 +35,7 @@ def test_get_data(mocker):
 
 
 def test_prettify(mocker):
-    mocker.patch("pystatis.http_helper.load_data", return_value=EASY_TABLE)
+    mocker.patch("pystatis.table.load_data", return_value=EASY_TABLE)
     table = pystatis.Table(name="11111-0001")
     table.get_data(prettify=True)
     assert table.data.shape == (17, 3)

From 8217717fd2a796736c2e790d5aa5f4b8001d78f3 Mon Sep 17 00:00:00 2001
From: Michael Aydinbas <michael.aydinbas@new-work.se>
Date: Mon, 29 Jan 2024 16:24:28 +0100
Subject: [PATCH 6/6] implement monkeypatch to overwrite load_data for table
 tests

---
 src/pystatis/table.py |  2 ++
 tests/test_table.py   | 19 +++++++++++++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/pystatis/table.py b/src/pystatis/table.py
index 2136d22..6b644d3 100644
--- a/src/pystatis/table.py
+++ b/src/pystatis/table.py
@@ -38,6 +38,7 @@ def get_data(self, area: str = "all", prettify: bool = True, **kwargs):
         raw_data = load_data(
             endpoint="data", method="tablefile", params=params, as_json=False
         )
+        assert isinstance(raw_data, str)  # nosec assert_used
 
         self.raw_data = raw_data
         data_str = StringIO(raw_data)
@@ -49,6 +50,7 @@ def get_data(self, area: str = "all", prettify: bool = True, **kwargs):
         metadata = load_data(
             endpoint="metadata", method="table", params=params, as_json=True
         )
+        assert isinstance(metadata, dict)  # nosec assert_used
 
         self.metadata = metadata
 
diff --git a/tests/test_table.py b/tests/test_table.py
index 966c586..7c3f15f 100644
--- a/tests/test_table.py
+++ b/tests/test_table.py
@@ -23,8 +23,19 @@
 11111;Feststellung des Gebietsstands;STAG;Stichtag;31.12.2022;DLAND;Bundesländer;;Insgesamt;357595,99"""
 
 
-def test_get_data(mocker):
-    mocker.patch("pystatis.table.load_data", return_value=EASY_TABLE)
+def mocked_load_data(endpoint, method, params, as_json):
+    if endpoint == "data" and method == "tablefile":
+        return EASY_TABLE
+    elif endpoint == "metadata" and method == "table":
+        return {"metadata": "table"}
+    else:
+        raise NotImplementedError
+
+
+def test_get_data(monkeypatch):
+    # patch pystatis.table.load_data with parameter endpoint="data"
+    # and method="tablefile" to return EASY_TABLE
+    monkeypatch.setattr(pystatis.table, "load_data", mocked_load_data)
     table = pystatis.Table(name="11111-0001")
     table.get_data(prettify=False)
     assert table.data.shape == (17, 10)
@@ -34,8 +45,8 @@ def test_get_data(mocker):
     assert table.raw_data != ""
 
 
-def test_prettify(mocker):
-    mocker.patch("pystatis.table.load_data", return_value=EASY_TABLE)
+def test_prettify(monkeypatch):
+    monkeypatch.setattr(pystatis.table, "load_data", mocked_load_data)
     table = pystatis.Table(name="11111-0001")
     table.get_data(prettify=True)
     assert table.data.shape == (17, 3)