End-to-End Machine Learning using Streamlit

5 min readMar 12, 2021


By Hemanka Sarmah

If you are not familiar with Streamlit, we suggest you first go through this link.

In this article, we are going to use Cars data to build a model that predicts the price of the car and use streamlit to show it as a web app.

Data Description

Installing and Importing Packages

import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt

Data Acquisition and description

df = pd.read_csv( filepath_or_buffer='https://raw.githubusercontent.com/insaid2018/Term-2/master/car%20data.csv')print('Data Shape:', df.shape)df.head()

Data Description


Data Information


Data Preprocessing


Feature Engineering

final_dataset['Current Year']=2020final_dataset['no_year']=final_dataset['Current Year']
- final_dataset['Year']

Encoding Categories

le = LabelEncoder()final = final_dataset[['Fuel_Type', 'Seller_Type','Transmission']].apply(le.fit_transform)final_dataset.drop(['Fuel_Type', 'Seller_Type', 'Transmission'], inplace=True,axis=1)

Finalizing Data

Fuel = final['Fuel_Type']
Seller = final['Seller_Type']
Transmission = final['Transmission']
final_dataset=final_dataset.drop(['Current Year'],axis=1)

Data Preparation

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

Model Development and Evaluation

#Randomized Search CV# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 100,
stop = 1200, num = 12)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(5, 30, num = 6)]
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10, 15, 100]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 5, 10]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
'max_features': max_features,
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf}

Model Building

rf = RandomForestRegressor()rf_random = RandomizedSearchCV(estimator = rf, 
param_distributions = random_grid,
n_iter = 10,
cv = 5,
n_jobs = -1)

Best parameters


Model Evaluation

predictions=rf_random.predict(X_test)print('MAE:', metrics.mean_absolute_error(y_test, predictions))
print('MSE:', metrics.mean_squared_error(y_test, predictions))
np.sqrt(metrics.mean_squared_error(y_test, predictions)))

Checking Prediction Visually


Saving Model

# open a file, where you ant to store the data
file = open('random_forest_regression_model.pkl', 'wb')
# dump information to that file
pickle.dump(rf_random, file)

Building WebApp using Streamlit

Importing the libraries in our py file

import streamlit as st
import pickle
import numpy as np

Loading our pickled model


Defining function to predict the value

def predict_price(Present_Price, Kms_Driven, Fuel_Type, Seller_Type,
Transmission, Owner, no_year):
input=np.array([[Present_Price, Kms_Driven, Fuel_Type,
Seller_Type, Transmission, Owner,
return float(prediction)

Defining function that takes in user inputs from the web page

def main():
st.title("Car Price Prediction")
html_temp = """
<div style="background-color:#025246 ;padding:10px">
<h2 style="color:white;text-align:center;">
Used Car Price Prediction ML App </h2>
st.markdown(html_temp, unsafe_allow_html=True) Present_Price = st.text_input("What is the current market value
of the car?","In Lakhs")
Kms_Driven = st.text_input("How much kilometers the car has
driven?","Type Here")
Fuel_Type = st.text_input("What is the type of fuel
used?","Please Type 0 for CNG/ 1 for
Diesel/ 2 for Petrol")
Seller_Type = st.text_input("What is the type of
seller?","Please Type 0 for Dealer/
1 for Individual")
Transmission = st.text_input("What is the type of
Transmission?","Please type 0 for
Automatic/ 1 for manual")
Owner = st.text_input("What is the no. of owners?",
"Please type 0/1/3")
no_year = st.text_input("How many years old?","Type here")
if st.button("Predict"):
output=predict_price(Present_Price, Kms_Driven,
Fuel_Type, Seller_Type,
Transmission, Owner, no_year)
st.success('The selling price of this vehicle will be
approximately {} lakhs'.format(round(output, 2)))

Looking at the full script

import streamlit as st
import pickle
import numpy as np
model=pickle.load(open('random_forest_regression_model.pkl','rb'))def predict_price(Present_Price, Kms_Driven, Fuel_Type, Seller_Type,
Transmission, Owner, no_year):
input=np.array([[Present_Price, Kms_Driven, Fuel_Type,
Seller_Type, Transmission, Owner,
return float(prediction)
def main():
st.title("Car Price Prediction")
html_temp = """
<div style="background-color:#025246 ;padding:10px">
<h2 style="color:white;text-align:center;">
Used Car Price Prediction ML App </h2>
st.markdown(html_temp, unsafe_allow_html=True) Present_Price = st.text_input("What is the current market value
of the car?","In Lakhs")
Kms_Driven = st.text_input("How much kilometers the car has
driven?","Type Here")
Fuel_Type = st.text_input("What is the type of fuel
used?","Please Type 0 for CNG/ 1 for
Diesel/ 2 for Petrol")
Seller_Type = st.text_input("What is the type of
seller?","Please Type 0 for Dealer/
1 for Individual")
Transmission = st.text_input("What is the type of
Transmission?","Please type 0 for
Automatic/ 1 for manual")
Owner = st.text_input("What is the no. of owners?",
"Please type 0/1/3")
no_year = st.text_input("How many years old?","Type here")
if st.button("Predict"):
output=predict_price(Present_Price, Kms_Driven,
Fuel_Type, Seller_Type,
Transmission, Owner, no_year)
st.success('The selling price of this vehicle will be
approximately {} lakhs'.format(round(output, 2)))
if __name__ == '__main__':


