Initial commit

rl-institut · Jan 29, 2025 · 16f2cb5 · 16f2cb5
commit 16f2cb5
Show file tree

Hide file tree

Showing 99 changed files with 95,922 additions and 0 deletions.
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/egon-jao-matching.iml b/.idea/egon-jao-matching.iml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb
diff --git a/.ipynb_checkpoints/Untitled1-checkpoint.ipynb b/.ipynb_checkpoints/Untitled1-checkpoint.ipynb
@@ -0,0 +1,234 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "04aa58f8-4d0a-47a3-8ac1-d3057cc204d0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Starting histogram and scatter plot analysis...\n",
+      "Loaded lightsource/transformers_original.csv successfully.\n",
+      "Loaded lightsource/transformers_updated.csv successfully.\n",
+      "Loaded lightsource/lines_original.csv successfully.\n",
+      "Loaded lightsource/lines_updated.csv successfully.\n",
+      "\n",
+      "Processing b for transformers...\n",
+      "Scatter plot saved: lightsource/analysis_results/b/transformers_corresponding_b_plot.png\n",
+      "Histogram saved: lightsource/analysis_results/b/transformers_b_histogram.png\n",
+      "\n",
+      "Processing x for transformers...\n",
+      "Scatter plot saved: lightsource/analysis_results/x/transformers_corresponding_x_plot.png\n",
+      "Histogram saved: lightsource/analysis_results/x/transformers_x_histogram.png\n",
+      "\n",
+      "Processing r for transformers...\n",
+      "Scatter plot saved: lightsource/analysis_results/r/transformers_corresponding_r_plot.png\n",
+      "Histogram saved: lightsource/analysis_results/r/transformers_r_histogram.png\n",
+      "\n",
+      "Processing b for lines...\n",
+      "Scatter plot saved: lightsource/analysis_results/b/lines_corresponding_b_plot.png\n",
+      "Histogram saved: lightsource/analysis_results/b/lines_b_histogram.png\n",
+      "\n",
+      "Processing x for lines...\n",
+      "Scatter plot saved: lightsource/analysis_results/x/lines_corresponding_x_plot.png\n",
+      "Histogram saved: lightsource/analysis_results/x/lines_x_histogram.png\n",
+      "\n",
+      "Processing r for lines...\n",
+      "Scatter plot saved: lightsource/analysis_results/r/lines_corresponding_r_plot.png\n",
+      "Histogram saved: lightsource/analysis_results/r/lines_r_histogram.png\n",
+      "\n",
+      "Analysis completed.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "import numpy as np\n",
+    "\n",
+    "# Configuration\n",
+    "DATA_DIR = \"lightsource\"\n",
+    "\n",
+    "FILES = {\n",
+    "    'transformers_original': 'transformers_original.csv',\n",
+    "    'transformers_updated': 'transformers_updated.csv',\n",
+    "    'lines_original': 'lines_original.csv',\n",
+    "    'lines_updated': 'lines_updated.csv'\n",
+    "}\n",
+    "\n",
+    "COLUMNS_TO_ANALYZE = {\n",
+    "    'transformers': ['b', 'x', 'r'],\n",
+    "    'lines': ['b', 'x', 'r']\n",
+    "}\n",
+    "\n",
+    "IDENTIFIER_COLUMNS = {\n",
+    "    'transformers': 'Transformer',\n",
+    "    'lines': ['bus0', 'bus1', 'type']\n",
+    "}\n",
+    "\n",
+    "# Create output directory\n",
+    "OUTPUT_DIR_BASE = os.path.join(DATA_DIR, \"analysis_results\")\n",
+    "os.makedirs(OUTPUT_DIR_BASE, exist_ok=True)\n",
+    "\n",
+    "\n",
+    "def load_csv(file_path):\n",
+    "    \"\"\"Load a CSV file into a pandas DataFrame.\"\"\"\n",
+    "    try:\n",
+    "        df = pd.read_csv(file_path)\n",
+    "        print(f\"Loaded {file_path} successfully.\")\n",
+    "        return df\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error loading {file_path}: {e}\")\n",
+    "        return None\n",
+    "\n",
+    "\n",
+    "def plot_corresponding_values(original_df, updated_df, component, column_name):\n",
+    "    \"\"\"Plot corresponding values for original and updated datasets, excluding identical values.\"\"\"\n",
+    "    identifier = IDENTIFIER_COLUMNS.get(component)\n",
+    "    output_dir = os.path.join(OUTPUT_DIR_BASE, column_name)\n",
+    "    os.makedirs(output_dir, exist_ok=True)\n",
+    "\n",
+    "    if isinstance(identifier, list):\n",
+    "        merged_df = pd.merge(original_df, updated_df, on=identifier, suffixes=('_original', '_updated'))\n",
+    "    else:\n",
+    "        merged_df = pd.merge(original_df, updated_df, on=identifier, suffixes=('_original', '_updated'))\n",
+    "\n",
+    "    col_original = f'{column_name}_original'\n",
+    "    col_updated = f'{column_name}_updated'\n",
+    "\n",
+    "    # Convert to numeric and remove NaN values\n",
+    "    merged_df[col_original] = pd.to_numeric(merged_df[col_original], errors='coerce')\n",
+    "    merged_df[col_updated] = pd.to_numeric(merged_df[col_updated], errors='coerce')\n",
+    "    merged_df = merged_df.dropna(subset=[col_original, col_updated])\n",
+    "\n",
+    "    # **Remove exact identical values**\n",
+    "    different_values = merged_df[merged_df[col_original] != merged_df[col_updated]]\n",
+    "\n",
+    "    if different_values.empty:\n",
+    "        print(f\"No different values found for {component} {column_name}. Skipping plot.\")\n",
+    "        return\n",
+    "\n",
+    "    plt.figure(figsize=(8, 8))\n",
+    "\n",
+    "    # Scatter plot with a diagonal reference line\n",
+    "    plt.scatter(different_values[col_original], different_values[col_updated], alpha=0.6)\n",
+    "    min_val, max_val = different_values[[col_original, col_updated]].min().min(), different_values[[col_original, col_updated]].max().max()\n",
+    "    plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='y = x')\n",
+    "\n",
+    "    plt.xlabel(f'Original {column_name}')\n",
+    "    plt.ylabel(f'Updated {column_name}')\n",
+    "    plt.title(f'Changed {column_name} Values for {component.capitalize()} (Excluding Identical Values)')\n",
+    "    plt.legend()\n",
+    "    plt.tight_layout()\n",
+    "\n",
+    "    # Save plot\n",
+    "    plot_path = os.path.join(output_dir, f\"{component}_corresponding_{column_name}_plot.png\")\n",
+    "    plt.savefig(plot_path)\n",
+    "    plt.close()\n",
+    "    print(f\"Scatter plot saved: {plot_path}\")\n",
+    "\n",
+    "\n",
+    "def plot_histogram_different_values(original, updated, component, column_name):\n",
+    "    \"\"\"Create and save overlapping histograms for original and updated values, excluding identical values.\"\"\"\n",
+    "    output_dir = os.path.join(OUTPUT_DIR_BASE, column_name)\n",
+    "    os.makedirs(output_dir, exist_ok=True)\n",
+    "\n",
+    "    original = pd.Series(original).dropna()\n",
+    "    updated = pd.Series(updated).dropna()\n",
+    "\n",
+    "    # **Remove identical values before plotting histograms**\n",
+    "    mask = original != updated\n",
+    "    original_diff = original[mask]\n",
+    "    updated_diff = updated[mask]\n",
+    "\n",
+    "    if len(original_diff) == 0:\n",
+    "        print(f\"No different values found for {component} {column_name} histogram. Skipping.\")\n",
+    "        return\n",
+    "\n",
+    "    plt.figure(figsize=(10, 6))\n",
+    "    sns.histplot(original_diff, color='blue', label='Original', kde=True, alpha=0.6)\n",
+    "    sns.histplot(updated_diff, color='orange', label='Updated', kde=True, alpha=0.6)\n",
+    "    plt.xlabel(column_name)\n",
+    "    plt.ylabel('Frequency')\n",
+    "    plt.title(f\"Distribution of Different {column_name} Values for {component.capitalize()}\")\n",
+    "    plt.legend()\n",
+    "    plt.tight_layout()\n",
+    "\n",
+    "    # Save plot\n",
+    "    plot_path = os.path.join(output_dir, f\"{component}_{column_name}_histogram.png\")\n",
+    "    plt.savefig(plot_path)\n",
+    "    plt.close()\n",
+    "    print(f\"Histogram saved: {plot_path}\")\n",
+    "\n",
+    "\n",
+    "def main():\n",
+    "    print(\"Starting histogram and scatter plot analysis...\")\n",
+    "\n",
+    "    # Load all datasets\n",
+    "    datasets = {key: load_csv(os.path.join(DATA_DIR, filename)) for key, filename in FILES.items()}\n",
+    "\n",
+    "    # Process each component and column\n",
+    "    for component in ['transformers', 'lines']:\n",
+    "        for column_name in COLUMNS_TO_ANALYZE[component]:\n",
+    "            print(f\"\\nProcessing {column_name} for {component}...\")\n",
+    "\n",
+    "            original_key = f\"{component}_original\"\n",
+    "            updated_key = f\"{component}_updated\"\n",
+    "\n",
+    "            if original_key in datasets and updated_key in datasets:\n",
+    "                original_df = datasets[original_key]\n",
+    "                updated_df = datasets[updated_key]\n",
+    "\n",
+    "                if column_name in original_df.columns and column_name in updated_df.columns:\n",
+    "                    original_values = pd.to_numeric(original_df[column_name], errors='coerce').dropna()\n",
+    "                    updated_values = pd.to_numeric(updated_df[column_name], errors='coerce').dropna()\n",
+    "\n",
+    "                    # Generate scatter plots and histograms (excluding identical values)\n",
+    "                    plot_corresponding_values(original_df, updated_df, component, column_name)\n",
+    "                    plot_histogram_different_values(original_values, updated_values, component, column_name)\n",
+    "                else:\n",
+    "                    print(f\"Column {column_name} not found in one of the datasets for {component}. Skipping.\")\n",
+    "\n",
+    "    print(\"\\nAnalysis completed.\")\n",
+    "\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    main()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "daa04b26-3c3b-46cb-9b70-c91f474bd18f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}