POS_Blockchain_Malicious_Node_Detection

UppuluriKalyani · Oct 24, 2024 · cebf802 · cebf802
1 parent aa3f314
commit cebf802
Show file tree

Hide file tree

Showing 13 changed files with 10,257 additions and 0 deletions.
diff --git a/Neural Networks/POS_Blockchain_Malicious_Node_Detection/README.md b/Neural Networks/POS_Blockchain_Malicious_Node_Detection/README.md
@@ -0,0 +1,61 @@
+# Blockchain Node Classification using LSTM
+
+This project implements a Long Short-Term Memory (LSTM) neural network for classifying blockchain nodes based on various features.
+
+## Project Structure
+
+```
+blockchain_node_classification/
+├── data/               # Data directory
+├── results/            # Results directory
+│   ├── figures/        # Generated plots
+│   └── metrics/        # Performance metrics
+├── src/                # Source code
+└── requirements.txt    # Project dependencies
+```
+
+## Setup
+
+1. Create a virtual environment:
+```bash
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+```
+
+2. Install requirements:
+```bash
+pip install -r requirements.txt
+```
+
+3. Place your dataset (Blockchain101.csv) in the `data/` directory.
+
+## Usage
+
+Run the main script:
+```bash
+python src/main.py
+```
+
+## Results
+
+The following results are generated in the `results/` directory:
+
+1. Figures:
+   - Training history plots
+   - ROC curves
+   - Precision-Recall curves
+   - Confusion matrix
+
+2. Metrics:
+   - Classification report (precision, recall, F1-score)
+   - Confusion matrix
+
+## Model Architecture
+
+The LSTM model consists of:
+- LSTM layer with 64 units
+- Dense output layer with softmax activation
+- Adam optimizer with learning rate 0.001
+- Categorical crossentropy loss function
+
+Training is performed using 5-fold cross-validation.
diff --git a/Neural Networks/POS_Blockchain_Malicious_Node_Detection/data/Blockchain101.csv b/Neural Networks/POS_Blockchain_Malicious_Node_Detection/data/Blockchain101.csv
diff --git a/Neural Networks/POS_Blockchain_Malicious_Node_Detection/requirements.txt b/Neural Networks/POS_Blockchain_Malicious_Node_Detection/requirements.txt
@@ -0,0 +1,6 @@
+pandas==2.0.0
+numpy==1.24.3
+scikit-learn==1.2.2
+tensorflow==2.13.0
+matplotlib==3.7.1
+seaborn==0.12.2
diff --git a/...al Networks/POS_Blockchain_Malicious_Node_Detection/src/__pycache__/model.cpython-310.pyc b/...al Networks/POS_Blockchain_Malicious_Node_Detection/src/__pycache__/model.cpython-310.pyc
diff --git a/...al Networks/POS_Blockchain_Malicious_Node_Detection/src/__pycache__/utils.cpython-310.pyc b/...al Networks/POS_Blockchain_Malicious_Node_Detection/src/__pycache__/utils.cpython-310.pyc
diff --git a/Neural Networks/POS_Blockchain_Malicious_Node_Detection/src/main.py b/Neural Networks/POS_Blockchain_Malicious_Node_Detection/src/main.py
@@ -0,0 +1,91 @@
+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import LabelEncoder, StandardScaler
+from sklearn.model_selection import StratifiedKFold
+from sklearn.metrics import classification_report, confusion_matrix
+from utils import plot_training_history, plot_roc_curve, plot_pr_curve, plot_confusion_matrix
+from model import create_lstm_model
+import os
+from tensorflow.keras.utils import to_categorical
+
+def main():
+
+    os.makedirs('results/figures', exist_ok=True)
+    os.makedirs('results/metrics', exist_ok=True)
+
+
+    data = pd.read_csv("../data/Blockchain101.csv")
+    data.fillna(method='ffill', inplace=True)
+
+
+    label_encoder = LabelEncoder()
+    data['TxnFee (Binary)'] = label_encoder.fit_transform(data['TxnFee (Binary)'])
+
+    target = 'Node Label'
+    features = data.columns.drop(target)
+
+
+    scaler = StandardScaler()
+    data[features] = scaler.fit_transform(data[features])
+
+    X = data[features].values
+    y = label_encoder.fit_transform(data[target].values)
+    n_classes = len(np.unique(y))
+    y_categorical = to_categorical(y)
+
+
+    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
+    histories = []
+    all_y_test = []
+    all_y_pred = []
+    all_y_pred_proba = []
+
+
+    for fold, (train_index, val_index) in enumerate(skf.split(X, y), 1):
+        print(f"Training Fold {fold}")
+        X_train, X_val = X[train_index], X[val_index]
+        y_train, y_val = y_categorical[train_index], y_categorical[val_index]
+
+
+        X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
+        X_val = X_val.reshape((X_val.shape[0], 1, X_val.shape[1]))
+        input_shape = (1, X.shape[1])
+
+
+        model = create_lstm_model(input_shape, n_classes)
+        history = model.fit(X_train, y_train, epochs=50, batch_size=32, 
+                          validation_data=(X_val, y_val), verbose=2)
+        histories.append(history)
+
+        # Make predictions
+        y_pred_proba = model.predict(X_val)
+        y_pred = np.argmax(y_pred_proba, axis=1)
+
+        all_y_test.extend(np.argmax(y_val, axis=1))
+        all_y_pred.extend(y_pred)
+        all_y_pred_proba.extend(y_pred_proba)
+
+
+    all_y_test = np.array(all_y_test)
+    all_y_pred = np.array(all_y_pred)
+    all_y_pred_proba = np.array(all_y_pred_proba)
+
+
+    plot_training_history(histories, 'LSTM')
+    plot_roc_curve(to_categorical(all_y_test), all_y_pred_proba, 'LSTM')
+    plot_pr_curve(to_categorical(all_y_test), all_y_pred_proba, 'LSTM')
+
+
+    classification_rep = classification_report(all_y_test, all_y_pred)
+    cm = confusion_matrix(all_y_test, all_y_pred)
+    plot_confusion_matrix(cm, 'LSTM')
+
+
+    with open('results/metrics/lstm_metrics.txt', 'w') as f:
+        f.write("LSTM Model Classification Report:\n\n")
+        f.write(classification_rep)
+
+    print("Training completed. Results saved in results directory.")
+
+if __name__ == "__main__":
+    main()
diff --git a/Neural Networks/POS_Blockchain_Malicious_Node_Detection/src/model.py b/Neural Networks/POS_Blockchain_Malicious_Node_Detection/src/model.py
@@ -0,0 +1,19 @@
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, LSTM
+from tensorflow.keras.optimizers import Adam
+
+
+def create_lstm_model(input_shape, n_classes):
+
+    model = Sequential([
+        LSTM(64, input_shape=input_shape),
+        Dense(n_classes, activation='softmax')
+    ])
+
+    model.compile(
+        optimizer=Adam(learning_rate=0.001),
+        loss='categorical_crossentropy',
+        metrics=['accuracy']
+    )
+
+    return model
diff --git a/...ockchain_Malicious_Node_Detection/src/results/figures/lstm_confusion_matrix.png b/...ockchain_Malicious_Node_Detection/src/results/figures/lstm_confusion_matrix.png
diff --git a/...s/POS_Blockchain_Malicious_Node_Detection/src/results/figures/lstm_pr_curve.png b/...s/POS_Blockchain_Malicious_Node_Detection/src/results/figures/lstm_pr_curve.png
diff --git a/.../POS_Blockchain_Malicious_Node_Detection/src/results/figures/lstm_roc_curve.png b/.../POS_Blockchain_Malicious_Node_Detection/src/results/figures/lstm_roc_curve.png
diff --git a/...ockchain_Malicious_Node_Detection/src/results/figures/lstm_training_history.png b/...ockchain_Malicious_Node_Detection/src/results/figures/lstm_training_history.png
diff --git a/Neural Networks/POS_Blockchain_Malicious_Node_Detection/src/results/metrics/lstm_metrics.txt b/Neural Networks/POS_Blockchain_Malicious_Node_Detection/src/results/metrics/lstm_metrics.txt
@@ -0,0 +1,10 @@
+LSTM Model Classification Report:
+
+              precision    recall  f1-score   support
+
+           0       0.87      0.89      0.88      5771
+           1       0.84      0.83      0.83      4229
+
+    accuracy                           0.86     10000
+   macro avg       0.86      0.86      0.86     10000
+weighted avg       0.86      0.86      0.86     10000
diff --git a/Neural Networks/POS_Blockchain_Malicious_Node_Detection/src/utils.py b/Neural Networks/POS_Blockchain_Malicious_Node_Detection/src/utils.py
@@ -0,0 +1,69 @@
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.metrics import roc_curve, auc, precision_recall_curve
+
+def plot_training_history(histories, model_name):
+
+    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 12))
+
+    for i, history in enumerate(histories):
+        ax1.plot(history.history['loss'], label=f'Fold {i+1}')
+        ax2.plot(history.history['accuracy'], label=f'Fold {i+1}')
+
+    ax1.set_title(f'{model_name} - Training Loss per Epoch')
+    ax1.set_xlabel('Epoch')
+    ax1.set_ylabel('Loss')
+    ax1.legend()
+
+    ax2.set_title(f'{model_name} - Training Accuracy per Epoch')
+    ax2.set_xlabel('Epoch')
+    ax2.set_ylabel('Accuracy')
+    ax2.legend()
+
+    plt.tight_layout()
+    plt.savefig(f'results/figures/{model_name.lower()}_training_history.png')
+    plt.close()
+
+def plot_roc_curve(y_test, y_pred_proba, model_name):
+    """Plot ROC curve for each class"""
+    plt.figure(figsize=(8, 6))
+
+    for i in range(y_test.shape[1]):
+        fpr, tpr, _ = roc_curve(y_test[:, i], y_pred_proba[:, i])
+        roc_auc = auc(fpr, tpr)
+        plt.plot(fpr, tpr, label=f'Class {i} (AUC = {roc_auc:.2f})')
+
+    plt.plot([0, 1], [0, 1], 'k--')
+    plt.xlim([0.0, 1.0])
+    plt.ylim([0.0, 1.05])
+    plt.xlabel('False Positive Rate')
+    plt.ylabel('True Positive Rate')
+    plt.title(f'{model_name} - ROC Curve')
+    plt.legend(loc="lower right")
+    plt.savefig(f'results/figures/{model_name.lower()}_roc_curve.png')
+    plt.close()
+
+def plot_pr_curve(y_test, y_pred_proba, model_name):
+    """Plot Precision-Recall curve for each class"""
+    plt.figure(figsize=(8, 6))
+
+    for i in range(y_test.shape[1]):
+        precision, recall, _ = precision_recall_curve(y_test[:, i], y_pred_proba[:, i])
+        plt.plot(recall, precision, label=f'Class {i}')
+
+    plt.xlabel('Recall')
+    plt.ylabel('Precision')
+    plt.title(f'{model_name} - Precision-Recall Curve')
+    plt.legend()
+    plt.savefig(f'results/figures/{model_name.lower()}_pr_curve.png')
+    plt.close()
+
+def plot_confusion_matrix(cm, model_name):
+    """Plot confusion matrix heatmap"""
+    plt.figure(figsize=(8, 6))
+    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
+    plt.title(f'{model_name} - Confusion Matrix')
+    plt.ylabel('True label')
+    plt.xlabel('Predicted label')
+    plt.savefig(f'results/figures/{model_name.lower()}_confusion_matrix.png')
+    plt.close()