-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvisualize.py
125 lines (105 loc) · 3.99 KB
/
visualize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import os
import pandas as pd
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.decomposition import FastICA
import matplotlib.pyplot as plt
import argparse
from config import *
import util
def visualize_pca():
"""
Function to visualize dataset using PCA to reduce the dimensions
Saves the visualization to plots directory
Args:
Returns:
"""
path = ['data', 'raw', 'stock.csv']
data = util.get_data(path)
data = data[(data['date'] > START_DATE) & (data['date'] <= END_DATE)].reset_index()
del data['date']
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))
np_scaled = min_max_scaler.fit_transform(data)
data = pd.DataFrame(np_scaled)
reduced_data = PCA(n_components=2).fit_transform(data)
# Plot the decision boundary. For that, we will assign a color to each
x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
fig = plt.figure(1)
plt.clf()
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
for index, row in data.iterrows():
plt.plot(reduced_data[index, 0], reduced_data[index, 1], '.', markersize=4)
plt.xticks(())
plt.yticks(())
plt.title('Visualization of dataset after PCA')
plt.show()
if not os.path.exists('plots'):
os.makedirs('plots')
fig.savefig('plots/PCA_Visualization.png')
plt.close(fig)
def visualize_ica():
"""
Function to visualize dataset using ICA to reduce the dimensions
Saves the visualization to plots directory
Args:
Returns:
"""
path = ['data', 'raw', 'stock.csv']
data = util.get_data(path)
data = data[(data['date'] > START_DATE) & (data['date'] <= END_DATE)].reset_index()
del data['date']
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(-1, 1))
np_scaled = min_max_scaler.fit_transform(data)
data = pd.DataFrame(np_scaled)
reduced_data = FastICA(n_components=2).fit_transform(data)
# Plot the decision boundary. For that, we will assign a color to each
x_min, x_max = reduced_data[:, 0].min() - 0.1, reduced_data[:, 0].max() + 0.1
y_min, y_max = reduced_data[:, 1].min() - 0.1, reduced_data[:, 1].max() + 0.1
fig = plt.figure(1)
plt.clf()
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
for index, row in data.iterrows():
plt.plot(reduced_data[index, 0], reduced_data[index, 1], '.', markersize=3)
plt.xticks(())
plt.yticks(())
plt.title('Visualization of dataset after ICA')
plt.show()
if not os.path.exists('plots'):
os.makedirs('plots')
fig.savefig('plots/ICA_Visualization.png')
plt.close(fig)
def visualize_tsne():
print('To be implemented')
pass
def parse():
"""
Function to interact with user to get the arguments
Args:
Returns:
dictionary<str,bool>
dimension reduction techniques and its flag
"""
parser = argparse.ArgumentParser(description='Process and visualization of dataset using Dimensionality Reduction'
' Techniques like PCA, ICA and t-SNE.')
parser.add_argument('--pca', action='store_true', help='Using PCA to reduce dimensions')
parser.add_argument('--ica', action='store_true', help='Using ICA to reduce dimensions')
parser.add_argument('--tsne', action='store_true', help='Using t-SNE to reduce dimensions')
arguments = vars(parser.parse_args())
if not any(arguments.values()):
parser.error('No arguments provided, please set the flag at least one of the following: pca, ica, tsne')
return arguments
if __name__ == '__main__':
"""
Main function to call various visualization using appropriate dimension reduction technique
Args:
Returns:
"""
args = parse()
for technique in args:
method = 'visualize_' + technique
if args[technique]:
print('Visualizing using ' + technique.upper())
globals()[method]()