reverted notebook file

DSCI-310 · Apr 11, 2023 · b781953 · b781953
1 parent 112a812
commit b781953
Show file tree

Hide file tree

Showing 7 changed files with 134 additions and 446 deletions.
diff --git a/src/cleansplit.py b/src/cleansplit.py
@@ -18,6 +18,13 @@ def clean_data(input_path):
     train_df = preprocessor(df, 0)
     test_df = preprocessor(df, 1)
 
+    # # Splitting into X and y train and test sets
+    # X_train = train_df.drop(columns=["target", "quality"])
+    # y_train = train_df["target"]
+
+    # X_test = test_df.drop(columns=["target", "quality"])
+    # y_test = test_df["target"]
+
     # Write the cleaned data to the output path
     train_df.to_csv(output_train_path, index=False)
     test_df.to_csv(output_test_path, index=False)

diff --git a/src/coefficients.py b/src/coefficients.py
@@ -13,7 +13,9 @@ def coefficients(input_path, output_path):
     y_train = input_training["target"]
 
     input_pipe = pipe_build('lr', X_train, y_train)
-
+    # df = show_coefficients(input_pipe, X_train)
+    # df_styled = df.style.background_gradient()
+    # dfi.export(df_styled, output_path + '.png')
 
 if __name__ == "__main__":
     # Set up command-line argument parsing

diff --git a/src/cor_ratio.py b/src/cor_ratio.py
@@ -19,7 +19,8 @@ def analysis(training_path, test_path, input_model, output_path):
     y_test = input_test["target"]
 
     input_pipe = pipe_build(input_model, X_train, y_train)
-
+    # df = show_correct(input_pipe, X_test, y_test).to_frame(name='Positive vs. Negative')
+    # dfi.export(df, output_path + input_model + 'correct.png')
 
     fig = class_report(input_pipe, X_test, y_test)
     fig.savefig(output_path + input_model + "report.png", bbox_inches="tight")

diff --git a/src/eda.py b/src/eda.py
@@ -19,6 +19,11 @@ def eda(input_path, output_path):
     with open(output_path + "barcount.png", "wb") as f:
         f.write(bar_png_data)
 
+    # # The integer specifies to return the train or test dataframe
+    # train_df = preprocessor(df, 0)
+    # dfi.export(train_df["target"].value_counts().to_frame(), output_path + 'dfclasses.png', fontsize=12, table_conversion='chrome')
+
+
 if __name__ == "__main__":
     # Set up command-line argument parsing
     parser = argparse.ArgumentParser()

diff --git a/src/grapher.py b/src/grapher.py
@@ -82,8 +82,10 @@ def compare_scores(lst):
     # RETURNS: The bar chart where the highlighted bar is the highest score.
     cscores = lst
 
-    report = pd.DataFrame(
-        [cscores], columns=['Baseline', 'LR', 'SVC', 'DT', 'NB'])
+    report = pd.DataFrame()
+    report = report.append(pd.DataFrame(
+        [cscores], columns=['Baseline', 'LR', 'SVC', 'DT', 'NB']),
+                            ignore_index=True)
 
     report.index = ['Score']
     report = report.T.reset_index()
@@ -99,7 +101,7 @@ def compare_scores(lst):
             'steelblue')  # And if it's not true it sets the bar steelblue.
     )).properties(width=500, height=200).configure(background='lightgrey')
 
-    # y = y.to_json('scores.json')
+    y = y.to_json('scores.json')
     return y
 
 def show_coefficients(pipe, X_train):
@@ -142,4 +144,4 @@ def show_correct(pipe, X_test, y_test):
     ax = pd.DataFrame(data={'actual': y_test, 'predicted': pipe.predict(X_test)})
     ax['correct'] = ax['actual'] == ax['predicted']
 
-    return ax.correct.value_counts()
+    return ax.correct.value_counts()
diff --git a/src/hpfigures.py b/src/hpfigures.py
@@ -11,6 +11,10 @@ def hpfigures(input_path, input_model, output_path):
     X_train = input_training.drop(columns=["target", "quality"])
     y_train = input_training["target"]
 
+    # df = hp_optimizer(input_model, X_train, y_train)
+    # df_styled = df.style.background_gradient()
+    # dfi.export(df_styled, output_path + input_model + 'table.png')
+
 if __name__ == "__main__":
     # Set up command-line argument parsing
     parser = argparse.ArgumentParser()

diff --git a/wineclassification.ipynb b/wineclassification.ipynb