-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
- Loading branch information
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,234 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "04aa58f8-4d0a-47a3-8ac1-d3057cc204d0", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Starting histogram and scatter plot analysis...\n", | ||
"Loaded lightsource/transformers_original.csv successfully.\n", | ||
"Loaded lightsource/transformers_updated.csv successfully.\n", | ||
"Loaded lightsource/lines_original.csv successfully.\n", | ||
"Loaded lightsource/lines_updated.csv successfully.\n", | ||
"\n", | ||
"Processing b for transformers...\n", | ||
"Scatter plot saved: lightsource/analysis_results/b/transformers_corresponding_b_plot.png\n", | ||
"Histogram saved: lightsource/analysis_results/b/transformers_b_histogram.png\n", | ||
"\n", | ||
"Processing x for transformers...\n", | ||
"Scatter plot saved: lightsource/analysis_results/x/transformers_corresponding_x_plot.png\n", | ||
"Histogram saved: lightsource/analysis_results/x/transformers_x_histogram.png\n", | ||
"\n", | ||
"Processing r for transformers...\n", | ||
"Scatter plot saved: lightsource/analysis_results/r/transformers_corresponding_r_plot.png\n", | ||
"Histogram saved: lightsource/analysis_results/r/transformers_r_histogram.png\n", | ||
"\n", | ||
"Processing b for lines...\n", | ||
"Scatter plot saved: lightsource/analysis_results/b/lines_corresponding_b_plot.png\n", | ||
"Histogram saved: lightsource/analysis_results/b/lines_b_histogram.png\n", | ||
"\n", | ||
"Processing x for lines...\n", | ||
"Scatter plot saved: lightsource/analysis_results/x/lines_corresponding_x_plot.png\n", | ||
"Histogram saved: lightsource/analysis_results/x/lines_x_histogram.png\n", | ||
"\n", | ||
"Processing r for lines...\n", | ||
"Scatter plot saved: lightsource/analysis_results/r/lines_corresponding_r_plot.png\n", | ||
"Histogram saved: lightsource/analysis_results/r/lines_r_histogram.png\n", | ||
"\n", | ||
"Analysis completed.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import os\n", | ||
"import pandas as pd\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import seaborn as sns\n", | ||
"import numpy as np\n", | ||
"\n", | ||
"# Configuration\n", | ||
"DATA_DIR = \"lightsource\"\n", | ||
"\n", | ||
"FILES = {\n", | ||
" 'transformers_original': 'transformers_original.csv',\n", | ||
" 'transformers_updated': 'transformers_updated.csv',\n", | ||
" 'lines_original': 'lines_original.csv',\n", | ||
" 'lines_updated': 'lines_updated.csv'\n", | ||
"}\n", | ||
"\n", | ||
"COLUMNS_TO_ANALYZE = {\n", | ||
" 'transformers': ['b', 'x', 'r'],\n", | ||
" 'lines': ['b', 'x', 'r']\n", | ||
"}\n", | ||
"\n", | ||
"IDENTIFIER_COLUMNS = {\n", | ||
" 'transformers': 'Transformer',\n", | ||
" 'lines': ['bus0', 'bus1', 'type']\n", | ||
"}\n", | ||
"\n", | ||
"# Create output directory\n", | ||
"OUTPUT_DIR_BASE = os.path.join(DATA_DIR, \"analysis_results\")\n", | ||
"os.makedirs(OUTPUT_DIR_BASE, exist_ok=True)\n", | ||
"\n", | ||
"\n", | ||
"def load_csv(file_path):\n", | ||
" \"\"\"Load a CSV file into a pandas DataFrame.\"\"\"\n", | ||
" try:\n", | ||
" df = pd.read_csv(file_path)\n", | ||
" print(f\"Loaded {file_path} successfully.\")\n", | ||
" return df\n", | ||
" except Exception as e:\n", | ||
" print(f\"Error loading {file_path}: {e}\")\n", | ||
" return None\n", | ||
"\n", | ||
"\n", | ||
"def plot_corresponding_values(original_df, updated_df, component, column_name):\n", | ||
" \"\"\"Plot corresponding values for original and updated datasets, excluding identical values.\"\"\"\n", | ||
" identifier = IDENTIFIER_COLUMNS.get(component)\n", | ||
" output_dir = os.path.join(OUTPUT_DIR_BASE, column_name)\n", | ||
" os.makedirs(output_dir, exist_ok=True)\n", | ||
"\n", | ||
" if isinstance(identifier, list):\n", | ||
" merged_df = pd.merge(original_df, updated_df, on=identifier, suffixes=('_original', '_updated'))\n", | ||
" else:\n", | ||
" merged_df = pd.merge(original_df, updated_df, on=identifier, suffixes=('_original', '_updated'))\n", | ||
"\n", | ||
" col_original = f'{column_name}_original'\n", | ||
" col_updated = f'{column_name}_updated'\n", | ||
"\n", | ||
" # Convert to numeric and remove NaN values\n", | ||
" merged_df[col_original] = pd.to_numeric(merged_df[col_original], errors='coerce')\n", | ||
" merged_df[col_updated] = pd.to_numeric(merged_df[col_updated], errors='coerce')\n", | ||
" merged_df = merged_df.dropna(subset=[col_original, col_updated])\n", | ||
"\n", | ||
" # **Remove exact identical values**\n", | ||
" different_values = merged_df[merged_df[col_original] != merged_df[col_updated]]\n", | ||
"\n", | ||
" if different_values.empty:\n", | ||
" print(f\"No different values found for {component} {column_name}. Skipping plot.\")\n", | ||
" return\n", | ||
"\n", | ||
" plt.figure(figsize=(8, 8))\n", | ||
"\n", | ||
" # Scatter plot with a diagonal reference line\n", | ||
" plt.scatter(different_values[col_original], different_values[col_updated], alpha=0.6)\n", | ||
" min_val, max_val = different_values[[col_original, col_updated]].min().min(), different_values[[col_original, col_updated]].max().max()\n", | ||
" plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='y = x')\n", | ||
"\n", | ||
" plt.xlabel(f'Original {column_name}')\n", | ||
" plt.ylabel(f'Updated {column_name}')\n", | ||
" plt.title(f'Changed {column_name} Values for {component.capitalize()} (Excluding Identical Values)')\n", | ||
" plt.legend()\n", | ||
" plt.tight_layout()\n", | ||
"\n", | ||
" # Save plot\n", | ||
" plot_path = os.path.join(output_dir, f\"{component}_corresponding_{column_name}_plot.png\")\n", | ||
" plt.savefig(plot_path)\n", | ||
" plt.close()\n", | ||
" print(f\"Scatter plot saved: {plot_path}\")\n", | ||
"\n", | ||
"\n", | ||
"def plot_histogram_different_values(original, updated, component, column_name):\n", | ||
" \"\"\"Create and save overlapping histograms for original and updated values, excluding identical values.\"\"\"\n", | ||
" output_dir = os.path.join(OUTPUT_DIR_BASE, column_name)\n", | ||
" os.makedirs(output_dir, exist_ok=True)\n", | ||
"\n", | ||
" original = pd.Series(original).dropna()\n", | ||
" updated = pd.Series(updated).dropna()\n", | ||
"\n", | ||
" # **Remove identical values before plotting histograms**\n", | ||
" mask = original != updated\n", | ||
" original_diff = original[mask]\n", | ||
" updated_diff = updated[mask]\n", | ||
"\n", | ||
" if len(original_diff) == 0:\n", | ||
" print(f\"No different values found for {component} {column_name} histogram. Skipping.\")\n", | ||
" return\n", | ||
"\n", | ||
" plt.figure(figsize=(10, 6))\n", | ||
" sns.histplot(original_diff, color='blue', label='Original', kde=True, alpha=0.6)\n", | ||
" sns.histplot(updated_diff, color='orange', label='Updated', kde=True, alpha=0.6)\n", | ||
" plt.xlabel(column_name)\n", | ||
" plt.ylabel('Frequency')\n", | ||
" plt.title(f\"Distribution of Different {column_name} Values for {component.capitalize()}\")\n", | ||
" plt.legend()\n", | ||
" plt.tight_layout()\n", | ||
"\n", | ||
" # Save plot\n", | ||
" plot_path = os.path.join(output_dir, f\"{component}_{column_name}_histogram.png\")\n", | ||
" plt.savefig(plot_path)\n", | ||
" plt.close()\n", | ||
" print(f\"Histogram saved: {plot_path}\")\n", | ||
"\n", | ||
"\n", | ||
"def main():\n", | ||
" print(\"Starting histogram and scatter plot analysis...\")\n", | ||
"\n", | ||
" # Load all datasets\n", | ||
" datasets = {key: load_csv(os.path.join(DATA_DIR, filename)) for key, filename in FILES.items()}\n", | ||
"\n", | ||
" # Process each component and column\n", | ||
" for component in ['transformers', 'lines']:\n", | ||
" for column_name in COLUMNS_TO_ANALYZE[component]:\n", | ||
" print(f\"\\nProcessing {column_name} for {component}...\")\n", | ||
"\n", | ||
" original_key = f\"{component}_original\"\n", | ||
" updated_key = f\"{component}_updated\"\n", | ||
"\n", | ||
" if original_key in datasets and updated_key in datasets:\n", | ||
" original_df = datasets[original_key]\n", | ||
" updated_df = datasets[updated_key]\n", | ||
"\n", | ||
" if column_name in original_df.columns and column_name in updated_df.columns:\n", | ||
" original_values = pd.to_numeric(original_df[column_name], errors='coerce').dropna()\n", | ||
" updated_values = pd.to_numeric(updated_df[column_name], errors='coerce').dropna()\n", | ||
"\n", | ||
" # Generate scatter plots and histograms (excluding identical values)\n", | ||
" plot_corresponding_values(original_df, updated_df, component, column_name)\n", | ||
" plot_histogram_different_values(original_values, updated_values, component, column_name)\n", | ||
" else:\n", | ||
" print(f\"Column {column_name} not found in one of the datasets for {component}. Skipping.\")\n", | ||
"\n", | ||
" print(\"\\nAnalysis completed.\")\n", | ||
"\n", | ||
"\n", | ||
"if __name__ == \"__main__\":\n", | ||
" main()\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "daa04b26-3c3b-46cb-9b70-c91f474bd18f", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.16" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |