End-to-End Machine Learning using Streamlit
If you are not familiar with Streamlit, we suggest you first go through this link.
In this article, we are going to use Cars data to build a model that predicts the price of the car and use streamlit to show it as a web app.
Also, Check out our Article on:
StreamLit - Data Scientists tool for developing web apps
Data Description
Installing and Importing Packages
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_splitimport seaborn as sns
import matplotlib.pyplot as plt
Data Acquisition and description
df = pd.read_csv( filepath_or_buffer='https://raw.githubusercontent.com/insaid2018/Term-2/master/car%20data.csv')print('Data Shape:', df.shape)df.head()
Data Description
df.describe()
Data Information
df.info()
Data Preprocessing
final_dataset=df[['Year','Selling_Price','Present_Price','Kms_Driven','Fuel_Type','Seller_Type','Transmission','Owner']]final_dataset.head()
Feature Engineering
final_dataset['Current Year']=2020final_dataset['no_year']=final_dataset['Current Year']
- final_dataset['Year']final_dataset.drop(['Year'],axis=1,inplace=True)final_dataset.head()
Encoding Categories
le = LabelEncoder()final = final_dataset[['Fuel_Type', 'Seller_Type','Transmission']].apply(le.fit_transform)final_dataset.drop(['Fuel_Type', 'Seller_Type', 'Transmission'], inplace=True,axis=1)
Finalizing Data
Fuel = final['Fuel_Type']
Seller = final['Seller_Type']
Transmission = final['Transmission']final_dataset=final_dataset.join(Fuel)
final_dataset=final_dataset.join(Seller)
final_dataset=final_dataset.join(Transmission)final_dataset=final_dataset.drop(['Current Year'],axis=1)
final_dataset.head()
Data Preparation
X=final_dataset.iloc[:,1:]
y=final_dataset.iloc[:,0]X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
Model Development and Evaluation
#Randomized Search CV# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 100,
stop = 1200, num = 12)]# Number of features to consider at every split
max_features = ['auto', 'sqrt']# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(5, 30, num = 6)]# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10, 15, 100]# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 5, 10]# Create the random grid
random_grid = {'n_estimators': n_estimators,
'max_features': max_features,
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf}print(random_grid)
Model Building
rf = RandomForestRegressor()rf_random = RandomizedSearchCV(estimator = rf,
param_distributions = random_grid,
scoring='neg_mean_squared_error',
n_iter = 10,
cv = 5,
verbose=2,
random_state=42,
n_jobs = -1)rf_random.fit(X_train,y_train)
Best parameters
rf_random.best_params_
Model Evaluation
predictions=rf_random.predict(X_test)print('MAE:', metrics.mean_absolute_error(y_test, predictions))
print('MSE:', metrics.mean_squared_error(y_test, predictions))
print('RMSE:',
np.sqrt(metrics.mean_squared_error(y_test, predictions)))
Checking Prediction Visually
plt.scatter(y_test,predictions)
Saving Model
# open a file, where you ant to store the data
file = open('random_forest_regression_model.pkl', 'wb')# dump information to that file
pickle.dump(rf_random, file)
Building WebApp using Streamlit
Importing the libraries in our py file
import streamlit as st
import pickle
import numpy as np
Loading our pickled model
model=pickle.load(open('random_forest_regression_model.pkl','rb'))
Defining function to predict the value
def predict_price(Present_Price, Kms_Driven, Fuel_Type, Seller_Type,
Transmission, Owner, no_year):
input=np.array([[Present_Price, Kms_Driven, Fuel_Type,
Seller_Type, Transmission, Owner,
no_year]]).astype(np.float64)
prediction=model.predict(input)
return float(prediction)
Defining function that takes in user inputs from the web page
def main():
st.title("Car Price Prediction")
html_temp = """
<div style="background-color:#025246 ;padding:10px">
<h2 style="color:white;text-align:center;">
Used Car Price Prediction ML App </h2>
</div>
"""
st.markdown(html_temp, unsafe_allow_html=True) Present_Price = st.text_input("What is the current market value
of the car?","In Lakhs")
Kms_Driven = st.text_input("How much kilometers the car has
driven?","Type Here")
Fuel_Type = st.text_input("What is the type of fuel
used?","Please Type 0 for CNG/ 1 for
Diesel/ 2 for Petrol")
Seller_Type = st.text_input("What is the type of
seller?","Please Type 0 for Dealer/
1 for Individual")
Transmission = st.text_input("What is the type of
Transmission?","Please type 0 for
Automatic/ 1 for manual")
Owner = st.text_input("What is the no. of owners?",
"Please type 0/1/3")
no_year = st.text_input("How many years old?","Type here") if st.button("Predict"):
output=predict_price(Present_Price, Kms_Driven,
Fuel_Type, Seller_Type,
Transmission, Owner, no_year)
st.success('The selling price of this vehicle will be
approximately {} lakhs'.format(round(output, 2)))
Looking at the full script
import streamlit as st
import pickle
import numpy as npmodel=pickle.load(open('random_forest_regression_model.pkl','rb'))def predict_price(Present_Price, Kms_Driven, Fuel_Type, Seller_Type,
Transmission, Owner, no_year):
input=np.array([[Present_Price, Kms_Driven, Fuel_Type,
Seller_Type, Transmission, Owner,
no_year]]).astype(np.float64)
prediction=model.predict(input)
return float(prediction)def main():
st.title("Car Price Prediction")
html_temp = """
<div style="background-color:#025246 ;padding:10px">
<h2 style="color:white;text-align:center;">
Used Car Price Prediction ML App </h2>
</div>
""" st.markdown(html_temp, unsafe_allow_html=True) Present_Price = st.text_input("What is the current market value
of the car?","In Lakhs")
Kms_Driven = st.text_input("How much kilometers the car has
driven?","Type Here")
Fuel_Type = st.text_input("What is the type of fuel
used?","Please Type 0 for CNG/ 1 for
Diesel/ 2 for Petrol")
Seller_Type = st.text_input("What is the type of
seller?","Please Type 0 for Dealer/
1 for Individual")
Transmission = st.text_input("What is the type of
Transmission?","Please type 0 for
Automatic/ 1 for manual")
Owner = st.text_input("What is the no. of owners?",
"Please type 0/1/3")
no_year = st.text_input("How many years old?","Type here") if st.button("Predict"):
output=predict_price(Present_Price, Kms_Driven,
Fuel_Type, Seller_Type,
Transmission, Owner, no_year)
st.success('The selling price of this vehicle will be
approximately {} lakhs'.format(round(output, 2)))if __name__ == '__main__':
main()
Result
Also, Check out our Article on:
Follow us for more upcoming future articles related to Data Science, Machine Learning, and Artificial Intelligence.
Also, Do give us a Clap👏 if you find this article useful as your encouragement catalyzes inspiration for and helps to create more cool stuff like this.