Spaces:
Sleeping
Sleeping
Muhammad Fariz Firdaus
commited on
Commit
·
39a6292
1
Parent(s):
4b04264
Upload 9 files
Browse files- app.py +41 -0
- bquxjob_24b82863_18c2d385ce5.csv +0 -0
- eda.py +54 -0
- list_cat_cols_o.txt +1 -0
- list_num_cols_s.txt +1 -0
- model_best.pkl +3 -0
- model_encoder_ord.pkl +3 -0
- model_scaler_skew.pkl +3 -0
- prediction.py +99 -0
app.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import streamlit as st
|
3 |
+
import eda
|
4 |
+
import prediction
|
5 |
+
|
6 |
+
# Set the page title and favicon
|
7 |
+
st.set_page_config(page_title="Default Credit Card Client Prediction",
|
8 |
+
page_icon="💳",
|
9 |
+
layout='wide',
|
10 |
+
initial_sidebar_state='expanded'
|
11 |
+
)
|
12 |
+
|
13 |
+
# Create a sidebar with a title and a selection box
|
14 |
+
st.sidebar.title("Choose a page:")
|
15 |
+
page = st.sidebar.selectbox("", ('Landing Page', 'Data Exploration', 'Data Prediction'))
|
16 |
+
|
17 |
+
# Display different content depending on the selected page
|
18 |
+
if page == 'Data Exploration':
|
19 |
+
eda.run()
|
20 |
+
elif page == 'Data Prediction':
|
21 |
+
prediction.run()
|
22 |
+
else:
|
23 |
+
# Add a header and a subheader with some text
|
24 |
+
st.title("Will You Default on Your Credit Card Payment?")
|
25 |
+
st.subheader("Find out with this web app that uses machine learning to predict your defaulting risk.")
|
26 |
+
|
27 |
+
# Add an image about the case
|
28 |
+
st.image("https://images.emojiterra.com/twitter/512px/1f4b3.png", width=300)
|
29 |
+
with st.expander("Backgroud dataset"):
|
30 |
+
st.caption("""
|
31 |
+
This web app uses a dataset from Taiwanese Bank that contains information on default payments, demographic factors, credit data, payment history, and bill statements of credit card clients in Taiwan from April 2005 to September 2005.
|
32 |
+
""")
|
33 |
+
with st.expander("Problem statements"):
|
34 |
+
st.caption("The goal is to predict whether a client will default on their payment next month based on the given features.")
|
35 |
+
with st.expander("Sponsor"):
|
36 |
+
st.caption("""
|
37 |
+
This web app is sponsored by **XYZ Bank**, a leading financial institution that offers credit cards, loans, and other services to its customers.
|
38 |
+
|
39 |
+
XYZ Bank is committed to providing the best customer experience and ensuring responsible lending practices.
|
40 |
+
""")
|
41 |
+
st.image("https://bankxyz.com/wp-content/uploads/2021/06/bankxyz-logo-1.png")
|
bquxjob_24b82863_18c2d385ce5.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eda.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import libraries
|
2 |
+
import streamlit as st
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import seaborn as sns
|
7 |
+
|
8 |
+
def run():
|
9 |
+
# Load the dataset
|
10 |
+
df = pd.read_csv("bquxjob_24b82863_18c2d385ce5.csv")
|
11 |
+
|
12 |
+
# Create a sidebar
|
13 |
+
st.sidebar.title("EDA Options")
|
14 |
+
st.sidebar.subheader("Choose the features (Only choose 1)")
|
15 |
+
|
16 |
+
# Create checkboxes for the features
|
17 |
+
# Create radio buttons for the features
|
18 |
+
selected_feature = st.sidebar.radio("Select Feature", ["limit_balance", "sex", "education_level", "marital_status", "age",
|
19 |
+
"pay_1", "bill_amt_1", "pay_amt_1", "default_payment_next_month"])
|
20 |
+
|
21 |
+
# add sub-header
|
22 |
+
st.sidebar.subheader("Choose the metrics for visualization (can choose multiple)")
|
23 |
+
|
24 |
+
# Create a checkbox for the statistics
|
25 |
+
stats = 0
|
26 |
+
stats = st.sidebar.checkbox("Statistics")
|
27 |
+
|
28 |
+
# Create a checkbox for the distribution
|
29 |
+
dist = 0
|
30 |
+
dist = st.sidebar.checkbox("Distribution")
|
31 |
+
|
32 |
+
# Create a main title
|
33 |
+
st.title("Exploratory Data Analysis on Default of Credit Card Clients Dataset")
|
34 |
+
|
35 |
+
# Display the default text if no features are selected
|
36 |
+
if stats == 0 and dist == 0:
|
37 |
+
st.write("## Welcome to the EDA.")
|
38 |
+
st.write("Please select the features and metrics that you want to explore from the sidebar.")
|
39 |
+
|
40 |
+
else:
|
41 |
+
# Display the statistics
|
42 |
+
if stats:
|
43 |
+
st.subheader("Statistics")
|
44 |
+
st.write(df[selected_feature].describe())
|
45 |
+
|
46 |
+
# Display the distribution
|
47 |
+
if dist:
|
48 |
+
st.subheader("Distribution")
|
49 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
50 |
+
sns.histplot(df[selected_feature], kde=True, bins=20)
|
51 |
+
st.pyplot(fig)
|
52 |
+
|
53 |
+
if __name__ == '__main__':
|
54 |
+
run()
|
list_cat_cols_o.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["education_level", "pay_6", "pay_5", "pay_4", "pay_3", "pay_2", "pay_1"]
|
list_num_cols_s.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["limit_balance", "pay_amt_6", "pay_amt_5", "pay_amt_4", "pay_amt_3", "pay_amt_2", "pay_amt_1"]
|
model_best.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbcd37c8e30508146a530931a5f8ea3cc463131fd9b39fb800002aa323027eff
|
3 |
+
size 116510
|
model_encoder_ord.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:159203e2237ea4dae8a16a2474e8f31f3cb54e3f84abdab5e959ac06ed817729
|
3 |
+
size 2108
|
model_scaler_skew.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:085d483e8191b64854be09814c44e5b0d99534064c0413d25f09ad61154a5d3f
|
3 |
+
size 936
|
prediction.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import libraries
|
2 |
+
import streamlit as st
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
import pickle
|
6 |
+
import json
|
7 |
+
|
8 |
+
# Load
|
9 |
+
with open('model_best.pkl', 'rb') as file_1:
|
10 |
+
model = pickle.load(file_1)
|
11 |
+
|
12 |
+
with open('model_encoder_ord.pkl', 'rb') as file_2:
|
13 |
+
encoder_o = pickle.load(file_2)
|
14 |
+
|
15 |
+
with open('model_scaler_skew.pkl', 'rb') as file_3:
|
16 |
+
scaler_s = pickle.load(file_3)
|
17 |
+
|
18 |
+
with open('list_num_cols_s.txt', 'r') as file_4:
|
19 |
+
num_col = json.load(file_4)
|
20 |
+
|
21 |
+
with open('list_cat_cols_o.txt', 'r') as file_5:
|
22 |
+
cat_col = json.load(file_5)
|
23 |
+
|
24 |
+
def run():
|
25 |
+
# Define the features for the options
|
26 |
+
features = ["limit_balance", "sex", "education_level", "marital_status", "age",
|
27 |
+
"pay_1", "pay_2", "pay_3", "pay_4", "pay_5", "pay_6",
|
28 |
+
"bill_amt_1", "bill_amt_2", "bill_amt_3", "bill_amt_4", "bill_amt_5", "bill_amt_6",
|
29 |
+
"pay_amt_1", "pay_amt_2", "pay_amt_3", "pay_amt_4", "pay_amt_5", "pay_amt_6"]
|
30 |
+
|
31 |
+
# Define the categorical features for the options
|
32 |
+
categorical_features = ["sex", "education_level", "marital_status", "pay_1", "pay_2", "pay_3", "pay_4", "pay_5", "pay_6"]
|
33 |
+
|
34 |
+
# Define the options for the categorical features
|
35 |
+
options = {
|
36 |
+
"sex": [1, 2],
|
37 |
+
"education_level": [1, 2, 3],
|
38 |
+
"marital_status": [1, 2],
|
39 |
+
"pay_1": [-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
|
40 |
+
"pay_2": [-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
|
41 |
+
"pay_3": [-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
|
42 |
+
"pay_4": [-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
|
43 |
+
"pay_5": [-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
|
44 |
+
"pay_6": [-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
45 |
+
}
|
46 |
+
|
47 |
+
# Displaying the options description
|
48 |
+
st.header('Options description')
|
49 |
+
st.write('1. sex: Gender (1 = male; 2 = female)')
|
50 |
+
st.write('2. education_level: Education Level (1=graduate school, 2=university, 3=high school)')
|
51 |
+
st.write('3. marital_status: Marital status (1 = married; 2 = single)')
|
52 |
+
st.write('4. age: Age (year)')
|
53 |
+
st.write('5. limit_balance: The amount of the given credit (NT dollar)')
|
54 |
+
st.write('6. pay_6 to pay_1: The repayment status in April to September 2005')
|
55 |
+
st.write('7. pay_amt_6 to pay_amt_1: The amount of previous payment in April to September 2005 (NT dollar)')
|
56 |
+
st.write('8. bill_amt_6 to bill_amt_1: The amount of bill statement in April to September 2005 (NT dollar)')
|
57 |
+
|
58 |
+
# Create a sidebar
|
59 |
+
st.sidebar.title("Prediction Options")
|
60 |
+
st.sidebar.subheader("Enter the values for the features")
|
61 |
+
|
62 |
+
# Create inputs for the features
|
63 |
+
inputs = {}
|
64 |
+
for feature in features:
|
65 |
+
if feature in categorical_features:
|
66 |
+
inputs[feature] = st.sidebar.selectbox(feature, options[feature])
|
67 |
+
else:
|
68 |
+
inputs[feature] = st.sidebar.number_input(feature, min_value=0)
|
69 |
+
|
70 |
+
# Create a button for prediction
|
71 |
+
predict = st.sidebar.button("Predict")
|
72 |
+
|
73 |
+
# Create a main title
|
74 |
+
st.title("Click the `Predict` button to start")
|
75 |
+
|
76 |
+
# Display the prediction
|
77 |
+
if predict:
|
78 |
+
# Convert the inputs into a dataframe
|
79 |
+
input_df = pd.DataFrame([inputs])
|
80 |
+
|
81 |
+
data_inf_input_num = input_df[num_col]
|
82 |
+
data_inf_input_cat = input_df[cat_col]
|
83 |
+
|
84 |
+
data_inf_input_num = scaler_s.transform(data_inf_input_num)
|
85 |
+
data_inf_input_cat = encoder_o.transform(data_inf_input_cat)
|
86 |
+
|
87 |
+
data_inf_input_final = np.concatenate([data_inf_input_num, data_inf_input_cat], axis=1)
|
88 |
+
|
89 |
+
# Make the prediction
|
90 |
+
prediction = model.predict(data_inf_input_final)[0]
|
91 |
+
|
92 |
+
# Display the result
|
93 |
+
if prediction == 0:
|
94 |
+
st.success("# The client is not likely to default on their payment next month.")
|
95 |
+
else:
|
96 |
+
st.error("# The client is likely to default on their payment next month.")
|
97 |
+
|
98 |
+
if __name__ == '__main__':
|
99 |
+
run()
|