-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
7aa49ae
commit 61ae1f2
Showing
1 changed file
with
124 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
import sys | ||
from dataclasses import dataclass | ||
|
||
import numpy as np | ||
import pandas as pd | ||
from sklearn.compose import ColumnTransformer | ||
from sklearn.impute import SimpleImputer | ||
from sklearn.pipeline import Pipeline | ||
from sklearn.preprocessing import OneHotEncoder,StandardScaler | ||
|
||
from src.exception import CustomException | ||
from src.logger import logging | ||
import os | ||
|
||
from src.utils import save_object | ||
|
||
@dataclass | ||
class DataTransformationConfig: | ||
preprocessor_obj_file_path=os.path.join('artifacts',"proprocessor.pkl") | ||
|
||
class DataTransformation: | ||
def __init__(self): | ||
self.data_transformation_config=DataTransformationConfig() | ||
|
||
def get_data_transformer_object(self): | ||
''' | ||
This function si responsible for data trnasformation | ||
''' | ||
try: | ||
numerical_columns = ["writing_score", "reading_score"] | ||
categorical_columns = [ | ||
"gender", | ||
"race_ethnicity", | ||
"parental_level_of_education", | ||
"lunch", | ||
"test_preparation_course", | ||
] | ||
|
||
num_pipeline= Pipeline( | ||
steps=[ | ||
("imputer",SimpleImputer(strategy="median")), | ||
("scaler",StandardScaler()) | ||
|
||
] | ||
) | ||
|
||
cat_pipeline=Pipeline( | ||
|
||
steps=[ | ||
("imputer",SimpleImputer(strategy="most_frequent")), | ||
("one_hot_encoder",OneHotEncoder()), | ||
("scaler",StandardScaler(with_mean=False)) | ||
] | ||
|
||
) | ||
|
||
logging.info(f"Categorical columns: {categorical_columns}") | ||
logging.info(f"Numerical columns: {numerical_columns}") | ||
|
||
preprocessor=ColumnTransformer( | ||
[ | ||
("num_pipeline",num_pipeline,numerical_columns), | ||
("cat_pipelines",cat_pipeline,categorical_columns) | ||
|
||
] | ||
|
||
|
||
) | ||
|
||
return preprocessor | ||
|
||
except Exception as e: | ||
raise CustomException(e,sys) | ||
|
||
def initiate_data_transformation(self,train_path,test_path): | ||
|
||
try: | ||
train_df=pd.read_csv(train_path) | ||
test_df=pd.read_csv(test_path) | ||
|
||
logging.info("Read train and test data completed") | ||
|
||
logging.info("Obtaining preprocessing object") | ||
|
||
preprocessing_obj=self.get_data_transformer_object() | ||
|
||
target_column_name="math_score" | ||
numerical_columns = ["writing_score", "reading_score"] | ||
|
||
input_feature_train_df=train_df.drop(columns=[target_column_name],axis=1) | ||
target_feature_train_df=train_df[target_column_name] | ||
|
||
input_feature_test_df=test_df.drop(columns=[target_column_name],axis=1) | ||
target_feature_test_df=test_df[target_column_name] | ||
|
||
logging.info( | ||
f"Applying preprocessing object on training dataframe and testing dataframe." | ||
) | ||
|
||
input_feature_train_arr=preprocessing_obj.fit_transform(input_feature_train_df) | ||
input_feature_test_arr=preprocessing_obj.transform(input_feature_test_df) | ||
|
||
train_arr = np.c_[ | ||
input_feature_train_arr, np.array(target_feature_train_df) | ||
] | ||
test_arr = np.c_[input_feature_test_arr, np.array(target_feature_test_df)] | ||
|
||
logging.info(f"Saved preprocessing object.") | ||
|
||
save_object( | ||
|
||
file_path=self.data_transformation_config.preprocessor_obj_file_path, | ||
obj=preprocessing_obj | ||
|
||
) | ||
|
||
return ( | ||
train_arr, | ||
test_arr, | ||
self.data_transformation_config.preprocessor_obj_file_path, | ||
) | ||
except Exception as e: | ||
raise CustomException(e,sys) |