Skip to content

Commit

Permalink
change as_matrix to values
Browse files Browse the repository at this point in the history
  • Loading branch information
bob7783 committed Aug 4, 2018
1 parent 94c4328 commit b8b97e3
Show file tree
Hide file tree
Showing 16 changed files with 34 additions and 25 deletions.
4 changes: 2 additions & 2 deletions ab_testing/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
df = pd.read_csv('advertisement_clicks.csv')
a = df[df['advertisement_id'] == 'A']
b = df[df['advertisement_id'] == 'B']
a = a['action'].as_matrix()
b = b['action'].as_matrix()
a = a['action'].values
b = b['action'].values

print("a.mean:", a.mean())
print("b.mean:", b.mean())
Expand Down
2 changes: 1 addition & 1 deletion ann_logistic_extra/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def get_data():
# df.head()

# easier to work with numpy array
data = df.as_matrix()
data = df.values

# shuffle it
np.random.shuffle(data)
Expand Down
7 changes: 2 additions & 5 deletions cnn_class2/class_activation_maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,10 @@
# Note: you may need to update your version of future
# sudo pip install -U future

from keras.layers import Input, Lambda, Dense, Flatten
from keras.models import Model
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
# from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import confusion_matrix
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
Expand All @@ -24,7 +20,7 @@



# useful for getting number of files
# get the image files
image_files = glob('../large_files/256_ObjectCategories/*/*.jp*g')
image_files += glob('../large_files/101_ObjectCategories/*/*.jp*g')

Expand Down Expand Up @@ -72,6 +68,7 @@
cam = fmaps.dot(w)

# upsample to 224 x 224
# 7 x 32 = 224
cam = sp.ndimage.zoom(cam, (32, 32), order=1)

plt.subplot(1,2,1)
Expand Down
2 changes: 1 addition & 1 deletion cnn_class2/fashion.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def y2indicator(Y):
# get the data
# https://www.kaggle.com/zalando-research/fashionmnist
data = pd.read_csv('../large_files/fashionmnist/fashion-mnist_train.csv')
data = data.as_matrix()
data = data.values
np.random.shuffle(data)

X = data[:, 1:].reshape(-1, 28, 28, 1) / 255.0
Expand Down
2 changes: 1 addition & 1 deletion cnn_class2/fashion2.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def y2indicator(Y):
# get the data
# https://www.kaggle.com/zalando-research/fashionmnist
data = pd.read_csv('../large_files/fashionmnist/fashion-mnist_train.csv')
data = data.as_matrix()
data = data.values
np.random.shuffle(data)

X = data[:, 1:].reshape(-1, 28, 28, 1) / 255.0
Expand Down
4 changes: 4 additions & 0 deletions hmm_class/hmmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime


def random_normalized(d1, d2):
Expand All @@ -22,6 +23,7 @@ def __init__(self, M):
self.M = M # number of hidden states

def fit(self, X, max_iter=30):
t0 = datetime.now()
np.random.seed(123)
# train the HMM model using the Baum-Welch algorithm
# a specific instance of the expectation-maximization algorithm
Expand Down Expand Up @@ -136,6 +138,8 @@ def fit(self, X, max_iter=30):
print("B:", self.B)
print("pi:", self.pi)

print("Fit duration:", (datetime.now() - t0))

plt.plot(costs)
plt.show()

Expand Down
2 changes: 1 addition & 1 deletion linear_regression_class/systolic.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import pandas as pd

df = pd.read_excel('mlr02.xls')
X = df.as_matrix()
X = df.values

# using age to predict systolic blood pressure
plt.scatter(X[:,1], X[:,0])
Expand Down
2 changes: 1 addition & 1 deletion nlp_class/nb.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# it will work for other types of "counts", like tf-idf, so it should
# also work for our "word proportions"

data = pd.read_csv('spambase.data').as_matrix() # use pandas for convenience
data = pd.read_csv('spambase.data').values # use pandas for convenience
np.random.shuffle(data) # shuffle each row in-place, but preserve the row

X = data[:,:48]
Expand Down
5 changes: 3 additions & 2 deletions nlp_class/spam2.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from wordcloud import WordCloud


Expand All @@ -32,7 +33,7 @@

# create binary labels
df['b_labels'] = df['labels'].map({'ham': 0, 'spam': 1})
Y = df['b_labels'].as_matrix()
Y = df['b_labels'].values

# try multiple ways of calculating features
# tfidf = TfidfVectorizer(decode_error='ignore')
Expand All @@ -49,7 +50,7 @@
model.fit(Xtrain, Ytrain)
print("train score:", model.score(Xtrain, Ytrain))
print("test score:", model.score(Xtest, Ytest))

exit()


# visualize the data
Expand Down
2 changes: 1 addition & 1 deletion nlp_class3/bilstm_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def get_mnist(limit=None):

print("Reading in and transforming data...")
df = pd.read_csv('../large_files/train.csv')
data = df.as_matrix()
data = df.values
np.random.shuffle(data)
X = data[:, 1:].reshape(-1, 28, 28) / 255.0 # data is from 0..255
Y = data[:, 0]
Expand Down
7 changes: 7 additions & 0 deletions supervised_class/bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@


import numpy as np
import matplotlib.pyplot as plt
from util import get_data
from datetime import datetime
from scipy.stats import norm
Expand Down Expand Up @@ -60,3 +61,9 @@ def predict(self, X):
t0 = datetime.now()
print("Test accuracy:", model.score(Xtest, Ytest))
print("Time to compute test accuracy:", (datetime.now() - t0), "Test size:", len(Ytest))

# plot the mean of each class
for c, g in iteritems(model.gaussians):
plt.imshow(g['mean'].reshape(28, 28))
plt.title(c)
plt.show()
2 changes: 1 addition & 1 deletion supervised_class/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
def get_data(limit=None):
print("Reading in and transforming data...")
df = pd.read_csv('../large_files/train.csv')
data = df.as_matrix()
data = df.values
np.random.shuffle(data)
X = data[:, 1:] / 255.0 # data is from 0..255
Y = data[:, 0]
Expand Down
4 changes: 2 additions & 2 deletions supervised_class2/rf_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def transform(self, df):
X = np.zeros((N, self.D))
i = 0
for col, scaler in iteritems(self.scalers):
X[:,i] = scaler.transform(df[col].as_matrix().reshape(-1, 1)).flatten()
X[:,i] = scaler.transform(df[col].values.reshape(-1, 1)).flatten()
i += 1

for col, encoder in iteritems(self.labelEncoders):
Expand Down Expand Up @@ -98,7 +98,7 @@ def get_data():
transformer = DataTransformer()

X = transformer.fit_transform(df)
Y = df[0].as_matrix()
Y = df[0].values
return X, Y


Expand Down
8 changes: 4 additions & 4 deletions supervised_class2/rf_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def fit(self, df):
self.scalers = {}
for col in NUMERICAL_COLS:
scaler = StandardScaler()
scaler.fit(df[col].as_matrix().reshape(-1, 1))
scaler.fit(df[col].values.reshape(-1, 1))
self.scalers[col] = scaler

def transform(self, df):
Expand All @@ -53,7 +53,7 @@ def transform(self, df):
X = np.zeros((N, D))
i = 0
for col, scaler in iteritems(self.scalers):
X[:,i] = scaler.transform(df[col].as_matrix().reshape(-1, 1)).flatten()
X[:,i] = scaler.transform(df[col].values.reshape(-1, 1)).flatten()
i += 1
for col in NO_TRANSFORM:
X[:,i] = df[col]
Expand Down Expand Up @@ -96,9 +96,9 @@ def get_data():
df_test = df.loc[test_idx]

Xtrain = transformer.fit_transform(df_train)
Ytrain = np.log(df_train['medv'].as_matrix())
Ytrain = np.log(df_train['medv'].values)
Xtest = transformer.transform(df_test)
Ytest = np.log(df_test['medv'].as_matrix())
Ytest = np.log(df_test['medv'].values)
return Xtrain, Ytrain, Xtest, Ytest


Expand Down
4 changes: 2 additions & 2 deletions unsupervised_class/kmeans_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from .kmeans import plot_k_means, get_simple_data
from kmeans import plot_k_means, get_simple_data
from datetime import datetime

def get_data(limit=None):
print("Reading in and transforming data...")
df = pd.read_csv('../large_files/train.csv')
data = df.as_matrix()
data = df.values
np.random.shuffle(data)
X = data[:, 1:] / 255.0 # data is from 0..255
Y = data[:, 0]
Expand Down
2 changes: 1 addition & 1 deletion unsupervised_class2/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def getKaggleMNIST():
# column 0 is labels
# column 1-785 is data, with values 0 .. 255
# total size of CSV: (42000, 1, 28, 28)
train = pd.read_csv('../large_files/train.csv').as_matrix().astype(np.float32)
train = pd.read_csv('../large_files/train.csv').values.astype(np.float32)
train = shuffle(train)

Xtrain = train[:-1000,1:] / 255
Expand Down

0 comments on commit b8b97e3

Please sign in to comment.