diff --git a/ML_Proj b/ML_Proj index c7a5500..d166bd6 100644 --- a/ML_Proj +++ b/ML_Proj @@ -1,48 +1,4 @@ -def model_train_save(train_data): - Train_df = pd.read_csv(train_data,header = 0, delimiter = ',', encoding="iso-8859-1") - X_train = Train_df.text - y_train = Train_df.IndustryCategory - tokenizer = Tokenizer(num_words=5000) - tokenizer.fit_on_texts(X_train) - X_train = tokenizer.texts_to_sequences(X_train) - vocab_size = len(tokenizer.word_index) + 1 # Adding 1 because of reserved 0 index - maxlen = 100 - X_train = pad_sequences(X_train, padding='post', maxlen=maxlen) - encoder = LabelEncoder() - encoder.fit(y_train) - y_train = encoder.transform(y_train) - num_classes = np.max(y_train) + 1 - y_train = utils.to_categorical(y_train, num_classes) - embedding_dim = 50 - model = Sequential() - model.add(layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=maxlen)) - model.add(layers.Flatten()) - model.add(layers.Dense(10, activation='relu')) - model.add(layers.Dense(num_classes, activation='sigmoid')) - model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy']) - model.summary() - history = model.fit(np.array(X_train), np.array(y_train),epochs=2,verbose=False,batch_size=10) - loss, Training_accuracy = model.evaluate(X_train, y_train, verbose=False) - print("Training Accuracy: {:.4f}".format(Training_accuracy)) - # serialize model to JSON - # the keras model which is trained is defined as 'model' - model_json = model.to_json() - with open("model_num.json", "w") as json_file: - json_file.write(model_json) - - # serialize weights to HDF5 - model.save_weights("model_num.h5") - # load json and create model - json_file = open('model_num.json', 'r') - loaded_model_json = json_file.read() - json_file.close() - loaded_model = model_from_json(loaded_model_json) - # load weights into new model - loaded_model.load_weights("model_num.h5") - #print("Loaded model from disk") - return (loaded_model.save('model_num.hdf5')) - def predict(test_data): Test_df = pd.read_csv(test_data,header = 0, delimiter = ',', encoding="iso-8859-1") X_test = Test_df.text