Skip to content

Commit

Permalink
POS_Blockchain_Malicious_Node_Detection
Browse files Browse the repository at this point in the history
  • Loading branch information
Karm-Dave authored Oct 24, 2024
1 parent aa3f314 commit cebf802
Show file tree
Hide file tree
Showing 13 changed files with 10,257 additions and 0 deletions.
61 changes: 61 additions & 0 deletions Neural Networks/POS_Blockchain_Malicious_Node_Detection/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Blockchain Node Classification using LSTM

This project implements a Long Short-Term Memory (LSTM) neural network for classifying blockchain nodes based on various features.

## Project Structure

```
blockchain_node_classification/
├── data/ # Data directory
├── results/ # Results directory
│ ├── figures/ # Generated plots
│ └── metrics/ # Performance metrics
├── src/ # Source code
└── requirements.txt # Project dependencies
```

## Setup

1. Create a virtual environment:
```bash
python -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```

2. Install requirements:
```bash
pip install -r requirements.txt
```

3. Place your dataset (Blockchain101.csv) in the `data/` directory.

## Usage

Run the main script:
```bash
python src/main.py
```

## Results

The following results are generated in the `results/` directory:

1. Figures:
- Training history plots
- ROC curves
- Precision-Recall curves
- Confusion matrix

2. Metrics:
- Classification report (precision, recall, F1-score)
- Confusion matrix

## Model Architecture

The LSTM model consists of:
- LSTM layer with 64 units
- Dense output layer with softmax activation
- Adam optimizer with learning rate 0.001
- Categorical crossentropy loss function

Training is performed using 5-fold cross-validation.
10,001 changes: 10,001 additions & 0 deletions Neural Networks/POS_Blockchain_Malicious_Node_Detection/data/Blockchain101.csv

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pandas==2.0.0
numpy==1.24.3
scikit-learn==1.2.2
tensorflow==2.13.0
matplotlib==3.7.1
seaborn==0.12.2
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix
from utils import plot_training_history, plot_roc_curve, plot_pr_curve, plot_confusion_matrix
from model import create_lstm_model
import os
from tensorflow.keras.utils import to_categorical

def main():

os.makedirs('results/figures', exist_ok=True)
os.makedirs('results/metrics', exist_ok=True)


data = pd.read_csv("../data/Blockchain101.csv")
data.fillna(method='ffill', inplace=True)


label_encoder = LabelEncoder()
data['TxnFee (Binary)'] = label_encoder.fit_transform(data['TxnFee (Binary)'])

target = 'Node Label'
features = data.columns.drop(target)


scaler = StandardScaler()
data[features] = scaler.fit_transform(data[features])

X = data[features].values
y = label_encoder.fit_transform(data[target].values)
n_classes = len(np.unique(y))
y_categorical = to_categorical(y)


skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
histories = []
all_y_test = []
all_y_pred = []
all_y_pred_proba = []


for fold, (train_index, val_index) in enumerate(skf.split(X, y), 1):
print(f"Training Fold {fold}")
X_train, X_val = X[train_index], X[val_index]
y_train, y_val = y_categorical[train_index], y_categorical[val_index]


X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_val = X_val.reshape((X_val.shape[0], 1, X_val.shape[1]))
input_shape = (1, X.shape[1])


model = create_lstm_model(input_shape, n_classes)
history = model.fit(X_train, y_train, epochs=50, batch_size=32,
validation_data=(X_val, y_val), verbose=2)
histories.append(history)

# Make predictions
y_pred_proba = model.predict(X_val)
y_pred = np.argmax(y_pred_proba, axis=1)

all_y_test.extend(np.argmax(y_val, axis=1))
all_y_pred.extend(y_pred)
all_y_pred_proba.extend(y_pred_proba)


all_y_test = np.array(all_y_test)
all_y_pred = np.array(all_y_pred)
all_y_pred_proba = np.array(all_y_pred_proba)


plot_training_history(histories, 'LSTM')
plot_roc_curve(to_categorical(all_y_test), all_y_pred_proba, 'LSTM')
plot_pr_curve(to_categorical(all_y_test), all_y_pred_proba, 'LSTM')


classification_rep = classification_report(all_y_test, all_y_pred)
cm = confusion_matrix(all_y_test, all_y_pred)
plot_confusion_matrix(cm, 'LSTM')


with open('results/metrics/lstm_metrics.txt', 'w') as f:
f.write("LSTM Model Classification Report:\n\n")
f.write(classification_rep)

print("Training completed. Results saved in results directory.")

if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.optimizers import Adam


def create_lstm_model(input_shape, n_classes):

model = Sequential([
LSTM(64, input_shape=input_shape),
Dense(n_classes, activation='softmax')
])

model.compile(
optimizer=Adam(learning_rate=0.001),
loss='categorical_crossentropy',
metrics=['accuracy']
)

return model
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
LSTM Model Classification Report:

precision recall f1-score support

0 0.87 0.89 0.88 5771
1 0.84 0.83 0.83 4229

accuracy 0.86 10000
macro avg 0.86 0.86 0.86 10000
weighted avg 0.86 0.86 0.86 10000
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, auc, precision_recall_curve

def plot_training_history(histories, model_name):

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 12))

for i, history in enumerate(histories):
ax1.plot(history.history['loss'], label=f'Fold {i+1}')
ax2.plot(history.history['accuracy'], label=f'Fold {i+1}')

ax1.set_title(f'{model_name} - Training Loss per Epoch')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.legend()

ax2.set_title(f'{model_name} - Training Accuracy per Epoch')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.legend()

plt.tight_layout()
plt.savefig(f'results/figures/{model_name.lower()}_training_history.png')
plt.close()

def plot_roc_curve(y_test, y_pred_proba, model_name):
"""Plot ROC curve for each class"""
plt.figure(figsize=(8, 6))

for i in range(y_test.shape[1]):
fpr, tpr, _ = roc_curve(y_test[:, i], y_pred_proba[:, i])
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label=f'Class {i} (AUC = {roc_auc:.2f})')

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title(f'{model_name} - ROC Curve')
plt.legend(loc="lower right")
plt.savefig(f'results/figures/{model_name.lower()}_roc_curve.png')
plt.close()

def plot_pr_curve(y_test, y_pred_proba, model_name):
"""Plot Precision-Recall curve for each class"""
plt.figure(figsize=(8, 6))

for i in range(y_test.shape[1]):
precision, recall, _ = precision_recall_curve(y_test[:, i], y_pred_proba[:, i])
plt.plot(recall, precision, label=f'Class {i}')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title(f'{model_name} - Precision-Recall Curve')
plt.legend()
plt.savefig(f'results/figures/{model_name.lower()}_pr_curve.png')
plt.close()

def plot_confusion_matrix(cm, model_name):
"""Plot confusion matrix heatmap"""
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title(f'{model_name} - Confusion Matrix')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.savefig(f'results/figures/{model_name.lower()}_confusion_matrix.png')
plt.close()

0 comments on commit cebf802

Please sign in to comment.