used car prediction model.py

# -*- coding: utf-8 -*-
"""project.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1PwytcE2uzMNSkrZX2AsEYriF_2k-7Yd9
"""

# Commented out IPython magic to ensure Python compatibility.
#import required libraries
import pandas as pd
import matplotlib.pyplot as plt
# %matplotlib inline
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import ipywidgets as widgets
from IPython.display import display

#read_csv() takes the path of the csv file using pandas pd.read_csv() convert csv into dataframe
car_data = pd.read_csv('/content/car_data.csv')

#head() is used to display the first 5 rows of the dataframe
car_data.head()

#info() provides summary of the information present in dataframe
car_data.info()

#sum of null values for each column
car_data.isnull().sum()
#Describe the statistical values
car_data.describe()
#Returns the list of columns
car_data.columns

#to count the varieties and print
print("---------------------------")
print(car_data['Fuel_Type'].value_counts())
print("---------------------------")
print(car_data['Seller_Type'].value_counts())
print("---------------------------")
print(car_data['Transmission'].value_counts())
print("---------------------------")

fuel_type = car_data['Fuel_Type']
seller_type = car_data['Seller_Type']
transmission_type = car_data['Transmission']
selling_price = car_data['Selling_Price']

from matplotlib import style

# Set the style of the plot to 'ggplot' for a specific visual style.
style.use('ggplot')

# Create a figure with a specified size.
fig = plt.figure(figsize=(15,5))

# Set the overall title for the figure.
fig.suptitle('Visualizing categorical data columns')

# Create the first subplot in a 1x3 grid.
plt.subplot(1,3,1)

# Create a bar plot for fuel_type against selling_price with a specified color.
plt.bar(fuel_type, selling_price, color='royalblue')

# Set labels for the x and y axes.
plt.xlabel("Fuel Type")
plt.ylabel("Selling Price")

# Create the second subplot in a 1x3 grid.
plt.subplot(1,3,2)

# Create a bar plot for seller_type against selling_price with a specified color.
plt.bar(seller_type, selling_price, color='red')

# Set labels for the x axis.
plt.xlabel("Seller Type")

# Create the third subplot in a 1x3 grid.
plt.subplot(1,3,3)

# Create a bar plot for transmission_type against selling_price with a specified color.
plt.bar(transmission_type, selling_price, color='purple')

# Set label for the x axis.
plt.xlabel('Transmission type')

# Display the plot.
plt.show()

#manual encoding
car_data.replace({'Fuel_Type':{'Petrol':0, 'Diesel':1, 'CNG':2}}, inplace=True)
#one hot encoding
car_data = pd.get_dummies(car_data, columns=['Seller_Type', 'Transmission'], drop_first=True)
car_data.head()

plt.figure(figsize=(10,7))
sns.heatmap(car_data.corr(), annot=True)
plt.title('Correlation between the columns')
plt.show()

fig=plt.figure(figsize=(7,5))
plt.title('Correlation between present price and selling price')
sns.regplot(x='Present_Price', y='Selling_Price', data=car_data)

#cretaing dependent and independent variables. y has the dependent variable
X = car_data.drop(['Car_Name','Selling_Price'], axis=1)
y = car_data['Selling_Price']

#shape() tells the rows and columns
print("Shape of X is: ",X.shape)
print("Shape of y is: ", y.shape)

#train test split of data
#x holds independent variables for training and testing
#y hold dependent variable for training and testing
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=42)

print("X_test shape:", X_test.shape)
print("X_train shape:", X_train.shape)
print("y_test shape: ", y_test.shape)
print("y_train shape:", y_train.shape)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = LinearRegression()
#fit() is used to train the model using the data given as parameters to it
model.fit(X_train, y_train)

pred = model.predict(X_test)

print("MAE: ", (metrics.mean_absolute_error(pred, y_test)))
print("MSE: ", (metrics.mean_squared_error(pred, y_test)))
print("R2 score: ", (metrics.r2_score(pred, y_test)))

sns.regplot(x=pred, y=y_test, scatter_kws={'color': 'blue'}, line_kws={'color': 'red'})
plt.xlabel("Predicted Price")
plt.ylabel('Actual Price')
plt.title("ACtual vs predicted price")
plt.show()

# Create input widgets for different types of features
year_widget = widgets.IntSlider(description='Year:', min=1970, max=2023, step=1, value=2010)
present_price_widget = widgets.FloatSlider(description='Present Price:', min=0.2, max=100, step=0.1, value=9.83)
kilometer_driven_widget = widgets.IntSlider(description='Kilometer Driven:', min=1000, max=100000, step=1000, value=2071)
fuel_type_widget = widgets.Dropdown(description='Fuel Type:', options=['Petrol', 'Diesel', 'CNG'], value='Petrol')
owner_widget = widgets.Dropdown(description='Owner:', options=['First', 'Second', 'Third'], value='First')
seller_type_widget = widgets.Dropdown(description='Seller Type:', options=['Individual', 'Dealer'], value='Individual')
transmission_widget = widgets.Dropdown(description='Transmission:', options=['Manual', 'Automatic'], value='Manual')

# Function to get input values and make a prediction
def make_prediction(button):
    fuel_type_encoded = 0 if fuel_type_widget.value == 'Petrol' else 1 if fuel_type_widget.value == 'Diesel' else 3
    owner_encoded = 0 if owner_widget.value == 'First' else 1 if owner_widget.value == 'Second' else 2
    seller_type_encoded = 0 if seller_type_widget.value == 'Individual' else 1
    transmission_encoded = 0 if transmission_widget.value == 'Manual' else 1

    new_data_point = [
        [year_widget.value, present_price_widget.value, kilometer_driven_widget.value,
         fuel_type_encoded, owner_encoded, seller_type_encoded, transmission_encoded]
    ]
    prediction = model.predict(new_data_point)
    print("Selling Price in Lakhs:", prediction)

# Create a button to trigger the prediction
predict_button = widgets.Button(description='Predict')
predict_button.on_click(make_prediction)

# Display the input widgets and the button
display(year_widget, present_price_widget, kilometer_driven_widget,
        fuel_type_widget, owner_widget, seller_type_widget, transmission_widget,
        predict_button)