Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
mohsenmansouri committed Jan 29, 2025
0 parents commit 16f2cb5
Show file tree
Hide file tree
Showing 99 changed files with 95,922 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/egon-jao-matching.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

435 changes: 435 additions & 0 deletions .ipynb_checkpoints/Untitled-checkpoint.ipynb

Large diffs are not rendered by default.

234 changes: 234 additions & 0 deletions .ipynb_checkpoints/Untitled1-checkpoint.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "04aa58f8-4d0a-47a3-8ac1-d3057cc204d0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting histogram and scatter plot analysis...\n",
"Loaded lightsource/transformers_original.csv successfully.\n",
"Loaded lightsource/transformers_updated.csv successfully.\n",
"Loaded lightsource/lines_original.csv successfully.\n",
"Loaded lightsource/lines_updated.csv successfully.\n",
"\n",
"Processing b for transformers...\n",
"Scatter plot saved: lightsource/analysis_results/b/transformers_corresponding_b_plot.png\n",
"Histogram saved: lightsource/analysis_results/b/transformers_b_histogram.png\n",
"\n",
"Processing x for transformers...\n",
"Scatter plot saved: lightsource/analysis_results/x/transformers_corresponding_x_plot.png\n",
"Histogram saved: lightsource/analysis_results/x/transformers_x_histogram.png\n",
"\n",
"Processing r for transformers...\n",
"Scatter plot saved: lightsource/analysis_results/r/transformers_corresponding_r_plot.png\n",
"Histogram saved: lightsource/analysis_results/r/transformers_r_histogram.png\n",
"\n",
"Processing b for lines...\n",
"Scatter plot saved: lightsource/analysis_results/b/lines_corresponding_b_plot.png\n",
"Histogram saved: lightsource/analysis_results/b/lines_b_histogram.png\n",
"\n",
"Processing x for lines...\n",
"Scatter plot saved: lightsource/analysis_results/x/lines_corresponding_x_plot.png\n",
"Histogram saved: lightsource/analysis_results/x/lines_x_histogram.png\n",
"\n",
"Processing r for lines...\n",
"Scatter plot saved: lightsource/analysis_results/r/lines_corresponding_r_plot.png\n",
"Histogram saved: lightsource/analysis_results/r/lines_r_histogram.png\n",
"\n",
"Analysis completed.\n"
]
}
],
"source": [
"import os\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import numpy as np\n",
"\n",
"# Configuration\n",
"DATA_DIR = \"lightsource\"\n",
"\n",
"FILES = {\n",
" 'transformers_original': 'transformers_original.csv',\n",
" 'transformers_updated': 'transformers_updated.csv',\n",
" 'lines_original': 'lines_original.csv',\n",
" 'lines_updated': 'lines_updated.csv'\n",
"}\n",
"\n",
"COLUMNS_TO_ANALYZE = {\n",
" 'transformers': ['b', 'x', 'r'],\n",
" 'lines': ['b', 'x', 'r']\n",
"}\n",
"\n",
"IDENTIFIER_COLUMNS = {\n",
" 'transformers': 'Transformer',\n",
" 'lines': ['bus0', 'bus1', 'type']\n",
"}\n",
"\n",
"# Create output directory\n",
"OUTPUT_DIR_BASE = os.path.join(DATA_DIR, \"analysis_results\")\n",
"os.makedirs(OUTPUT_DIR_BASE, exist_ok=True)\n",
"\n",
"\n",
"def load_csv(file_path):\n",
" \"\"\"Load a CSV file into a pandas DataFrame.\"\"\"\n",
" try:\n",
" df = pd.read_csv(file_path)\n",
" print(f\"Loaded {file_path} successfully.\")\n",
" return df\n",
" except Exception as e:\n",
" print(f\"Error loading {file_path}: {e}\")\n",
" return None\n",
"\n",
"\n",
"def plot_corresponding_values(original_df, updated_df, component, column_name):\n",
" \"\"\"Plot corresponding values for original and updated datasets, excluding identical values.\"\"\"\n",
" identifier = IDENTIFIER_COLUMNS.get(component)\n",
" output_dir = os.path.join(OUTPUT_DIR_BASE, column_name)\n",
" os.makedirs(output_dir, exist_ok=True)\n",
"\n",
" if isinstance(identifier, list):\n",
" merged_df = pd.merge(original_df, updated_df, on=identifier, suffixes=('_original', '_updated'))\n",
" else:\n",
" merged_df = pd.merge(original_df, updated_df, on=identifier, suffixes=('_original', '_updated'))\n",
"\n",
" col_original = f'{column_name}_original'\n",
" col_updated = f'{column_name}_updated'\n",
"\n",
" # Convert to numeric and remove NaN values\n",
" merged_df[col_original] = pd.to_numeric(merged_df[col_original], errors='coerce')\n",
" merged_df[col_updated] = pd.to_numeric(merged_df[col_updated], errors='coerce')\n",
" merged_df = merged_df.dropna(subset=[col_original, col_updated])\n",
"\n",
" # **Remove exact identical values**\n",
" different_values = merged_df[merged_df[col_original] != merged_df[col_updated]]\n",
"\n",
" if different_values.empty:\n",
" print(f\"No different values found for {component} {column_name}. Skipping plot.\")\n",
" return\n",
"\n",
" plt.figure(figsize=(8, 8))\n",
"\n",
" # Scatter plot with a diagonal reference line\n",
" plt.scatter(different_values[col_original], different_values[col_updated], alpha=0.6)\n",
" min_val, max_val = different_values[[col_original, col_updated]].min().min(), different_values[[col_original, col_updated]].max().max()\n",
" plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='y = x')\n",
"\n",
" plt.xlabel(f'Original {column_name}')\n",
" plt.ylabel(f'Updated {column_name}')\n",
" plt.title(f'Changed {column_name} Values for {component.capitalize()} (Excluding Identical Values)')\n",
" plt.legend()\n",
" plt.tight_layout()\n",
"\n",
" # Save plot\n",
" plot_path = os.path.join(output_dir, f\"{component}_corresponding_{column_name}_plot.png\")\n",
" plt.savefig(plot_path)\n",
" plt.close()\n",
" print(f\"Scatter plot saved: {plot_path}\")\n",
"\n",
"\n",
"def plot_histogram_different_values(original, updated, component, column_name):\n",
" \"\"\"Create and save overlapping histograms for original and updated values, excluding identical values.\"\"\"\n",
" output_dir = os.path.join(OUTPUT_DIR_BASE, column_name)\n",
" os.makedirs(output_dir, exist_ok=True)\n",
"\n",
" original = pd.Series(original).dropna()\n",
" updated = pd.Series(updated).dropna()\n",
"\n",
" # **Remove identical values before plotting histograms**\n",
" mask = original != updated\n",
" original_diff = original[mask]\n",
" updated_diff = updated[mask]\n",
"\n",
" if len(original_diff) == 0:\n",
" print(f\"No different values found for {component} {column_name} histogram. Skipping.\")\n",
" return\n",
"\n",
" plt.figure(figsize=(10, 6))\n",
" sns.histplot(original_diff, color='blue', label='Original', kde=True, alpha=0.6)\n",
" sns.histplot(updated_diff, color='orange', label='Updated', kde=True, alpha=0.6)\n",
" plt.xlabel(column_name)\n",
" plt.ylabel('Frequency')\n",
" plt.title(f\"Distribution of Different {column_name} Values for {component.capitalize()}\")\n",
" plt.legend()\n",
" plt.tight_layout()\n",
"\n",
" # Save plot\n",
" plot_path = os.path.join(output_dir, f\"{component}_{column_name}_histogram.png\")\n",
" plt.savefig(plot_path)\n",
" plt.close()\n",
" print(f\"Histogram saved: {plot_path}\")\n",
"\n",
"\n",
"def main():\n",
" print(\"Starting histogram and scatter plot analysis...\")\n",
"\n",
" # Load all datasets\n",
" datasets = {key: load_csv(os.path.join(DATA_DIR, filename)) for key, filename in FILES.items()}\n",
"\n",
" # Process each component and column\n",
" for component in ['transformers', 'lines']:\n",
" for column_name in COLUMNS_TO_ANALYZE[component]:\n",
" print(f\"\\nProcessing {column_name} for {component}...\")\n",
"\n",
" original_key = f\"{component}_original\"\n",
" updated_key = f\"{component}_updated\"\n",
"\n",
" if original_key in datasets and updated_key in datasets:\n",
" original_df = datasets[original_key]\n",
" updated_df = datasets[updated_key]\n",
"\n",
" if column_name in original_df.columns and column_name in updated_df.columns:\n",
" original_values = pd.to_numeric(original_df[column_name], errors='coerce').dropna()\n",
" updated_values = pd.to_numeric(updated_df[column_name], errors='coerce').dropna()\n",
"\n",
" # Generate scatter plots and histograms (excluding identical values)\n",
" plot_corresponding_values(original_df, updated_df, component, column_name)\n",
" plot_histogram_different_values(original_values, updated_values, component, column_name)\n",
" else:\n",
" print(f\"Column {column_name} not found in one of the datasets for {component}. Skipping.\")\n",
"\n",
" print(\"\\nAnalysis completed.\")\n",
"\n",
"\n",
"if __name__ == \"__main__\":\n",
" main()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "daa04b26-3c3b-46cb-9b70-c91f474bd18f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading

0 comments on commit 16f2cb5

Please sign in to comment.