-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
57 lines (46 loc) · 2.49 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
"""
Feature Extraction Main Script
This script serves as the main entry point for the PhishSense feature extraction system.
It utilizes argparse to handle command-line arguments for input and output file paths.
The script reads a CSV file containing URLs and their types, extracts features from each URL,
and saves the results to another CSV file. The extraction process involves two steps: reading the
CSV file and then extracting features from the URLs. The resulting CSV file can be used as a labeled
dataset for training machine learning models.
Usage:
python main.py --input input_csv_file.csv --output output_features_csv_file.csv
Arguments:
--input (-i): Path to the input CSV file containing URLs and types.
--output (-o): Path to the output CSV file where features will be saved.
--start-line: Start line for reading data from the input CSV.
--end-line: End line for reading data from the input CSV.
The script performs the following steps:
1. Parses command-line arguments using argparse.
2. Reads the input CSV file and extracts URLs with their corresponding types.
3. Utilizes the 'read_csv' and 'process_urls' functions from the 'read_csv' and 'extract_features' modules.
4. Saves the extracted features to the specified output CSV file.
Author: Ahmaad Ansari
Date: March 10, 2024
"""
import argparse
import pandas as pd
from read_csv import read_csv
from extract_features import process_urls
def parse_arguments():
parser = argparse.ArgumentParser(description='Extract features from URLs in a CSV file.')
parser.add_argument('--input', '-i', required=True, help='Input CSV file with URLs and types')
parser.add_argument('--output', '-o', required=True, help='Output CSV file for extracted features')
parser.add_argument('--start-line', '-s', type=int, default=0, help='Start line for reading data from the input CSV')
parser.add_argument('--end-line', '-e', type=int, default=None, help='End line for reading data from the input CSV')
return parser.parse_args()
if __name__ == "__main__":
try:
# Parse command-line arguments
args = parse_arguments()
# Step 1: Read CSV and get URLs with types
urls_and_types = read_csv(args.input, start_line=args.start_line, end_line=args.end_line)
# Step 2: Extract features and save to CSV
process_urls(urls_and_types, args.output)
except KeyboardInterrupt:
print("Ctrl+C pressed. Exiting gracefully.")
except Exception as e:
print(f"An unexpected error occurred: {e}")