update
Browse files- .gitignore +26 -0
- LICENSE +21 -0
- README.md +0 -0
- abc.txt +0 -0
- app.py +164 -0
- backend/__init__.py +0 -0
- backend/bot.py +5 -0
- backend/model.py +8 -0
- data/processed_data.csv +0 -0
- data/sample_data.csv +101 -0
- draft.ipynb +420 -0
- git.sh +25 -0
- lib/conversion_ref.xlsx +0 -0
- lib/scaling_ref.xlsx.xlsx +0 -0
- model/model.pkl +3 -0
- pages/batch-processing.py +76 -0
- pages/documentation.py +49 -0
- reference.md +38 -0
- requirements.txt +5 -0
- run.sh +2 -0
.gitignore
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Python cache files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# Jupyter Notebook checkpoints
|
7 |
+
.ipynb_checkpoints
|
8 |
+
|
9 |
+
# Environment variables
|
10 |
+
.env
|
11 |
+
|
12 |
+
# Streamlit specific files
|
13 |
+
.streamlit/
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
# Model files
|
18 |
+
|
19 |
+
*.h5
|
20 |
+
|
21 |
+
# Logs
|
22 |
+
*.log
|
23 |
+
|
24 |
+
# Virtual environment
|
25 |
+
venv/
|
26 |
+
env/
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2025 keanteng
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
Binary files a/README.md and b/README.md differ
|
|
abc.txt
ADDED
File without changes
|
app.py
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from backend.model import load_model
|
4 |
+
import time
|
5 |
+
import google.generativeai as genai
|
6 |
+
|
7 |
+
# Page title
|
8 |
+
st.set_page_config(page_title='Employee Turnover Predictor', layout='wide')
|
9 |
+
|
10 |
+
# add sidebar
|
11 |
+
#st.sidebar.title("Employee Turnover Predictor")
|
12 |
+
|
13 |
+
# ~~~~ Title ~~~~
|
14 |
+
st.title("🧿 Employee Turnover Prediction")
|
15 |
+
st.markdown("""An example app powered by Streamlit to predict the likelihood of employee turnover using machine learning model""")
|
16 |
+
|
17 |
+
# ~~~~ Add Guidelines to the App ~~~~
|
18 |
+
with st.sidebar:
|
19 |
+
toggle = st.toggle("Show Guidelines", True)
|
20 |
+
st.write("**Input Details**")
|
21 |
+
|
22 |
+
if toggle:
|
23 |
+
with st.expander("💡 Guidelines", expanded=True):
|
24 |
+
st.write("Follow the steps below to predict the likelihood of employee turnover:")
|
25 |
+
st.write("1. Fill in the employee details, job-related information, salary information and satisfactory information in the form.")
|
26 |
+
st.write("2. Click on the 'Predict' button to get the prediction results.")
|
27 |
+
st.write("3. The prediction results will show whether the employee is likely to leave the company or not.")
|
28 |
+
st.write("4. The 'Project Information' section provides details about employee turnover, statistics, case study, and data source.")
|
29 |
+
st.write("5. The prediction results will be displayed below the 'Predict' button.")
|
30 |
+
|
31 |
+
# ~~~~ Layout: 2 Columns ~~~~
|
32 |
+
|
33 |
+
|
34 |
+
# ~~~~ Column 1 ~~~~
|
35 |
+
# ~~~~ Employee Details Input ~~~~
|
36 |
+
with st.sidebar:
|
37 |
+
with st.expander("👤 Employee Details", expanded=False):
|
38 |
+
age = st.number_input("Age", min_value=18, max_value=65, value=25)
|
39 |
+
marital_status = st.selectbox("Marital Status", ["Single", "Married", "Divorced"])
|
40 |
+
total_working_years = st.number_input("Total Working Years", min_value=0, max_value=50, value=5)
|
41 |
+
|
42 |
+
# ~~~~ Employee Job Related Information ~~~~
|
43 |
+
with st.expander("🏢 Job Related Information", expanded=False):
|
44 |
+
job_involvement = st.slider("Job Involvement", min_value=1, max_value=4, value=3)
|
45 |
+
overtime = st.radio("Overtime", ["Yes", "No"])
|
46 |
+
years_at_company = st.number_input("Years at Company", min_value=0, max_value=50, value=3)
|
47 |
+
years_in_current_role = st.number_input("Years in Current Role", min_value=0, max_value=50, value=2)
|
48 |
+
|
49 |
+
# ~~~~ Employee Salary Information ~~~~
|
50 |
+
with st.expander("💰 Salary Information (RM)", expanded=False):
|
51 |
+
monthly_income = st.number_input("Monthly Income", min_value=1000, max_value=20000, value=5000)
|
52 |
+
daily_rate = st.number_input("Daily Rate", min_value=100, max_value=1500, value=500)
|
53 |
+
hourly_rate = st.number_input("Hourly Rate", min_value=5, max_value=100, value=20)
|
54 |
+
percent_salary_hike = st.number_input("Percent Salary Hike", min_value=0, max_value=50, value=12)
|
55 |
+
|
56 |
+
# ~~~~ Employee Satisfaction Information ~~~~
|
57 |
+
with st.expander("😊 Satisfaction Information", expanded=False):
|
58 |
+
job_satisfaction = st.slider("Job Satisfaction", min_value=1, max_value=4, value=3)
|
59 |
+
environment_satisfaction = st.slider("Environment Satisfaction", min_value=1, max_value=4, value=3)
|
60 |
+
|
61 |
+
submit = st.button("Compute", type="primary")
|
62 |
+
st.divider()
|
63 |
+
|
64 |
+
with st.expander("🧪 Experimental Features", expanded=False):
|
65 |
+
st.caption("API token can be obtained at https://aistudio.google.com/.")
|
66 |
+
gemini_api = st.text_input("Gemini Token", "", type='password')
|
67 |
+
try:
|
68 |
+
genai.configure(api_key=gemini_api)
|
69 |
+
ai_model = genai.GenerativeModel("gemini-1.5-flash")
|
70 |
+
test = ai_model.generate_content("Explain how AI works")
|
71 |
+
st.success("API key is valid. Experimental feature access granted.")
|
72 |
+
except Exception as e:
|
73 |
+
st.error("API key is invalid. You don't have access to experimental features.")
|
74 |
+
|
75 |
+
|
76 |
+
# ~~~~ Column 2 ~~~~
|
77 |
+
# ~~~~Display the project information ~~~~
|
78 |
+
with st.sidebar:
|
79 |
+
st.caption("MIT License © 2025 Khor Kean Teng, Ng Jing Wen, Lim Sze Chie, Tan Yee Thong, Yee See Marn")
|
80 |
+
|
81 |
+
model = load_model('model/model.pkl')
|
82 |
+
|
83 |
+
# Do data transformation here
|
84 |
+
monthly_income = (monthly_income - 1000)/(20000 - 1000)
|
85 |
+
daily_rate = (daily_rate - 100)/(1500 - 100)
|
86 |
+
hourly_rate = (hourly_rate - 5)/(100 - 5)
|
87 |
+
marital_status = 1 if marital_status == "Divorced" else 2 if marital_status == "Married" else 3
|
88 |
+
overtime = 1 if overtime == "Yes" else 0
|
89 |
+
|
90 |
+
business_travel = 1
|
91 |
+
department = 1
|
92 |
+
distance_from_home = 2
|
93 |
+
education = 3
|
94 |
+
education_field = 1
|
95 |
+
gender = 1
|
96 |
+
job_role = 1
|
97 |
+
monthly_rate = 1
|
98 |
+
num_companies_worked = 2
|
99 |
+
performance_rating = 3
|
100 |
+
relationship_satisfaction = 3
|
101 |
+
stock_option_level = 0
|
102 |
+
training_times_last_year = 2
|
103 |
+
work_life_balance = 3
|
104 |
+
years_since_last_promotion = 0
|
105 |
+
years_with_curr_manager = 0
|
106 |
+
|
107 |
+
input_data = pd.DataFrame({
|
108 |
+
"Age": [age],
|
109 |
+
"BusinessTravel": [business_travel],
|
110 |
+
"DailyRate": [daily_rate],
|
111 |
+
"Department": [department],
|
112 |
+
"DistanceFromHome": [distance_from_home],
|
113 |
+
"Education": [education],
|
114 |
+
"EducationField": [education_field],
|
115 |
+
"EnvironmentSatisfaction": [environment_satisfaction],
|
116 |
+
"Gender": [gender],
|
117 |
+
"HourlyRate": [hourly_rate],
|
118 |
+
"JobInvolvement": [job_involvement],
|
119 |
+
"JobRole": [job_role],
|
120 |
+
"JobSatisfaction": [job_satisfaction],
|
121 |
+
"MaritalStatus": [marital_status],
|
122 |
+
"MonthlyIncome": [monthly_income],
|
123 |
+
"MonthlyRate": [monthly_rate],
|
124 |
+
"NumCompaniesWorked": [num_companies_worked],
|
125 |
+
"OverTime": [overtime],
|
126 |
+
"PercentSalaryHike": [percent_salary_hike],
|
127 |
+
"PerformanceRating": [performance_rating],
|
128 |
+
"RelationshipSatisfaction": [relationship_satisfaction],
|
129 |
+
"StockOptionLevel": [stock_option_level],
|
130 |
+
"TotalWorkingYears": [total_working_years],
|
131 |
+
"TrainingTimesLastYear": [training_times_last_year],
|
132 |
+
"WorkLifeBalance": [work_life_balance],
|
133 |
+
"YearsAtCompany": [years_at_company],
|
134 |
+
"YearsInCurrentRole": [years_in_current_role],
|
135 |
+
"YearsSinceLastPromotion": [years_since_last_promotion],
|
136 |
+
"YearsWithCurrManager": [years_with_curr_manager]
|
137 |
+
})
|
138 |
+
|
139 |
+
# ~~~~ Predict Button ~~~~
|
140 |
+
if submit:
|
141 |
+
prediction = model.predict(input_data)
|
142 |
+
if prediction[0] == 0:
|
143 |
+
message = "The employee is not likely to leave the company."
|
144 |
+
else:
|
145 |
+
message = "The employee is likely to leave the company."
|
146 |
+
|
147 |
+
with st.status("Predicting...", expanded = True) as status:
|
148 |
+
# Get the prediction
|
149 |
+
time.sleep(1)
|
150 |
+
status.update(
|
151 |
+
label = "Prediction Results", state="complete", expanded = True
|
152 |
+
)
|
153 |
+
# Display the prediction
|
154 |
+
if prediction[0] == 0:
|
155 |
+
st.error(message)
|
156 |
+
else:
|
157 |
+
st.success(message)
|
158 |
+
|
159 |
+
with st.status("AI Opinion", expanded=True):
|
160 |
+
try:
|
161 |
+
response = ai_model.generate_content(f"Give some opinions in about 100 word based on the prediction results where the employee is {message}")
|
162 |
+
st.write(response.text)
|
163 |
+
except Exception as e:
|
164 |
+
st.write("You don't have access to this feature. Please authenticate to use this feature.")
|
backend/__init__.py
ADDED
File without changes
|
backend/bot.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
def get_data(file_path):
|
4 |
+
data = pd.read_csv(file_path)
|
5 |
+
return data
|
backend/model.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import joblib
|
2 |
+
import sys
|
3 |
+
|
4 |
+
def load_model(file_path):
|
5 |
+
with open(file_path, 'rb') as f:
|
6 |
+
model = joblib.load(f)
|
7 |
+
print(f"Model loaded from {file_path}", file=sys.stderr)
|
8 |
+
return model
|
data/processed_data.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/sample_data.csv
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Age,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,Gender,HourlyRate,JobInvolvement,JobRole,JobSatisfaction,MaritalStatus,MonthlyIncome,MonthlyRate,NumCompaniesWorked,OverTime,PercentSalaryHike,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
|
2 |
+
18,2,0.884896126713833,2,6,2,1,2,2,79.0638746727023,3.67096695622934,4,1,1,0.0,0.245421584941066,0,0,22.7837960009114,4,4,1,6,0,1,0,2,0,0
|
3 |
+
35,3,0.528299633583058,1,10,2,3,2,1,44.462381645364,4.83823641694839,1,3,1,0.354165796688055,0.521718940362603,0,0,10.7537434714825,3,4,1,8,0,3,9,8,0,4
|
4 |
+
46,3,0.225415889690312,2,1,2,2,2,1,31.8216855199669,2.21343541755574,9,2,1,0.261410600966966,0.824330446815363,4,0,13.092579722152,3,3,2,8,6,1,7,5,4,4
|
5 |
+
26,2,0.300147564337888,1,1,2,3,3,2,82.7363207740769,2.99713469482815,9,4,3,0.156042789492239,0.0953940534594883,1,0,11.0152615045712,3,3,1,11,0,1,7,1,1,4
|
6 |
+
40,3,0.387151473602023,3,5,3,2,1,1,85.7357395806325,1.94312035578636,8,1,1,0.161743663345642,0.0,0,0,13.7655367802936,3,3,1,12,2,3,0,7,0,4
|
7 |
+
18,2,0.292969687020957,1,14,3,1,3,1,51.0496193264179,3.13436603524644,3,2,3,0.0,0.946398296355741,2,0,14.0517770562183,3,3,1,0,3,1,0,1,0,0
|
8 |
+
39,3,0.247270982549977,3,1,3,5,2,1,30.0,2.79237143648608,7,2,3,0.390047682966701,0.0,1,0,23.4496029983759,4,1,1,15,0,1,16,5,5,0
|
9 |
+
42,2,0.787214143098539,1,13,2,1,1,1,95.4470929054933,2.42684909943778,3,1,2,0.0,0.187015556131792,4,0,3.47247565458359,3,1,1,7,2,1,14,2,0,4
|
10 |
+
39,2,0.360353272174383,3,24,4,4,2,1,84.7859793942981,3.3814830915685,5,1,1,0.576933175065172,0.844341727191312,2,0,15.193892585657,3,3,1,6,2,1,20,3,1,0
|
11 |
+
41,2,0.318283622419431,2,30,3,2,1,1,98.2748549218772,2.81126043226691,3,4,1,0.290980421971081,0.725236083577489,1,0,6.64013154341647,3,4,1,6,1,2,11,8,4,6
|
12 |
+
27,1,0.673707321188431,1,16,2,5,4,1,70.1303647171047,2.91339726786792,8,2,1,0.51164053240056,0.0,5,0,16.2257498300301,3,4,2,6,1,1,8,8,1,9
|
13 |
+
44,3,0.0,2,10,3,4,2,1,61.8812825977229,3.43351682699186,2,4,1,0.0,0.383693356002619,0,0,13.308798149351,3,1,1,15,3,2,2,0,3,0
|
14 |
+
20,1,0.577365171755751,1,1,3,2,1,2,63.1887682855909,2.30938877125004,8,3,1,0.0,0.0,1,0,10.9347322984555,3,1,1,3,4,1,0,0,0,0
|
15 |
+
32,3,0.477765221569139,2,20,3,3,3,2,50.7958051233783,3.71673211590062,9,4,3,0.213189245431854,0.0121357517148391,1,0,19.058285037905,3,4,1,1,2,2,6,2,5,1
|
16 |
+
30,3,0.275420197043712,2,4,1,3,1,1,36.0995676794035,1.92814957752323,6,4,1,0.274737404991838,0.316514367598619,4,0,15.4375055694529,3,1,1,2,2,2,1,1,0,8
|
17 |
+
40,1,0.108375677644769,1,16,1,3,3,1,100.0,2.82942953155278,7,2,1,0.673287394667207,0.328312344263964,5,0,12.9250698257048,3,2,3,15,2,3,9,0,0,2
|
18 |
+
32,1,0.420267107662114,3,17,4,5,3,1,48.5248053925291,4.00327292025545,9,1,2,0.512551434407615,0.046466061825807,0,0,8.93587059743023,3,2,1,1,2,2,3,5,10,10
|
19 |
+
60,2,0.987539186490136,1,1,1,1,3,1,57.1100774153506,3.04215376140908,3,1,1,0.56241860397887,0.784536722141436,4,0,18.5114978949955,3,1,1,21,1,1,5,1,2,4
|
20 |
+
31,2,0.397234116968643,3,11,3,2,1,1,56.690794804316,3.73321798999671,7,1,3,0.60864556197485,0.601344112666162,0,0,17.4563649716418,3,1,1,7,4,3,0,0,0,2
|
21 |
+
34,2,0.631252327121418,2,1,2,3,2,1,61.2714432624009,4.06239428666384,3,3,2,0.0894032560941022,0.272017374963247,1,0,25.2964724360263,3,2,1,19,2,3,12,11,0,6
|
22 |
+
32,3,0.583080060423501,2,6,3,3,4,1,38.3009778493344,2.58678373810886,7,4,3,0.141481564463505,0.266929456859619,2,0,17.1257055277748,3,1,1,19,0,2,4,9,6,7
|
23 |
+
60,2,0.60886256413886,2,8,3,3,2,1,100.0,3.44946119445524,5,1,1,0.0,0.361824377958698,4,1,20.8668249436963,3,1,3,15,1,1,4,1,0,0
|
24 |
+
43,3,1.0,1,5,2,3,2,1,69.8285383801696,0.144217082745642,3,1,1,0.0159625917405128,0.501361456095993,5,0,11.5691420209477,3,1,1,8,2,1,12,0,0,3
|
25 |
+
32,3,0.283429576600065,1,15,3,4,4,2,52.9136230986694,0.465063230277119,1,2,2,0.0,0.0,0,0,18.5161622315807,3,4,2,0,1,3,7,0,0,3
|
26 |
+
41,1,1.0,1,2,4,4,3,2,31.1532111448524,2.92135005637192,9,2,2,0.227615827551701,0.539244222417047,4,1,16.3499812754728,3,3,1,0,2,2,0,0,0,0
|
27 |
+
52,1,0.101237935874057,1,13,1,4,2,1,30.0,1.31146351394951,1,1,1,0.504183987510003,1.0,3,0,14.8607847204962,3,4,1,33,1,3,40,14,15,0
|
28 |
+
23,3,0.102684704723007,2,1,3,4,1,1,30.0,2.96416504673855,2,3,2,0.556808230844202,0.632677675411816,0,0,22.6700902921069,4,3,1,8,5,3,6,7,5,7
|
29 |
+
25,3,0.491411456966071,3,1,2,3,3,2,41.9430711663668,0.24382800536667,7,1,3,0.292643444071101,0.520450903013562,4,0,16.3645194480684,3,2,1,13,4,3,0,1,0,0
|
30 |
+
41,2,1.0,1,28,3,1,1,1,99.5163725714313,2.46998261191354,8,1,3,0.308352455339601,0.791677085623735,0,0,19.1087816462321,4,1,1,17,1,2,2,4,0,4
|
31 |
+
47,3,1.0,1,8,3,1,4,1,100.0,1.93007107949463,4,2,1,1.0,0.808357336353662,0,0,8.05348716425844,3,3,1,29,4,2,14,7,12,10
|
32 |
+
31,3,0.538726517040128,2,11,2,5,1,1,63.8730474293125,3.40768674158798,1,1,2,0.0304966971320197,0.114538667573307,3,0,25.9631396946108,3,3,1,11,1,4,0,1,0,2
|
33 |
+
48,2,0.0,3,1,2,3,1,2,64.5341473958719,2.2905912521993,6,4,2,0.219366025848428,0.772960667494781,6,0,17.7292168228217,3,2,1,8,2,1,11,3,8,3
|
34 |
+
34,3,0.139834906667606,1,18,2,1,2,1,54.0020408294524,1.27054952647828,5,3,1,0.506594986970618,0.737462667728527,7,0,13.7890171740499,3,1,1,14,0,2,5,0,0,5
|
35 |
+
40,2,0.459288876683091,1,1,3,2,1,1,31.5010433871168,1.26774037991838,5,4,2,0.0478860782391292,0.665789974132911,0,0,7.74524304113271,3,1,1,19,4,3,18,15,15,5
|
36 |
+
40,1,0.733826685803661,1,17,2,4,1,2,75.7008577598748,3.04269222471344,3,1,1,0.934884523021577,0.394679557552056,6,0,14.3137991644419,3,4,2,12,3,2,12,7,12,6
|
37 |
+
51,3,1.0,1,2,4,3,3,1,53.7435032675141,2.9794360959042,4,3,1,0.927690541233592,0.585487118852776,1,0,12.6149477519526,3,3,1,22,4,3,6,4,5,2
|
38 |
+
41,1,0.299939404080079,1,5,2,1,3,1,38.2363792591174,3.35728228528459,3,2,1,0.390666807436103,0.982111229858157,0,0,5.82668934597605,3,2,1,19,4,2,14,8,2,11
|
39 |
+
40,3,0.347830860312821,2,25,3,1,2,1,100.0,3.44844464252123,4,1,2,0.410669433949105,0.285220975922328,0,1,18.8378149985021,4,4,1,10,0,1,7,0,1,0
|
40 |
+
25,2,0.122405696923293,2,26,1,2,4,1,100.0,2.24115423983947,1,1,2,0.311746154220245,0.323694753892343,0,0,16.7614404831178,3,1,2,5,1,1,0,0,0,0
|
41 |
+
30,1,0.0,3,16,4,6,1,1,53.5548830320373,3.36337156776314,9,1,3,0.447219301876704,0.691174250033538,8,0,13.2182391923076,3,2,1,10,4,1,0,6,1,0
|
42 |
+
22,2,0.245186418295391,1,16,4,1,3,1,71.2252921762933,1.86915070056365,3,3,3,0.310870109100547,0.263397961192169,3,0,12.508787466875,3,4,1,0,3,1,6,3,3,5
|
43 |
+
39,3,1.0,3,1,2,2,1,1,30.0,2.50267653614494,1,1,2,0.124353700607106,0.138761602977062,5,0,20.1699344610357,3,1,1,9,1,4,0,4,0,3
|
44 |
+
55,1,0.457494732134314,2,8,2,2,2,1,39.9519944162056,3.2186290108445,3,3,3,0.0,0.809749024984892,8,0,8.41488603710065,3,3,1,20,4,2,0,2,0,0
|
45 |
+
27,3,0.0683477075858986,1,1,1,2,1,2,87.1954841532308,2.55611347216872,5,4,1,0.470759327649574,1.0,0,0,15.1087293762466,3,2,1,4,2,3,14,4,0,2
|
46 |
+
26,2,0.73948976814775,2,5,3,4,2,2,33.2284847405531,2.4796973838769,6,2,1,0.230683283081594,0.504880583632711,0,0,17.0764735976111,3,3,1,14,1,2,0,0,0,4
|
47 |
+
29,3,0.0354388603265388,3,28,1,1,2,1,72.4159982559238,4.64794277009462,8,3,2,0.148891114835579,0.313020820976981,6,1,12.0672870246404,3,4,1,17,4,3,0,3,2,2
|
48 |
+
19,3,0.991076195418594,3,21,2,4,2,2,75.2957551794003,3.44356466556024,9,4,3,0.0,0.753080589338407,1,0,10.9077198221586,3,2,1,6,3,2,0,0,0,0
|
49 |
+
45,3,0.698863171850152,3,6,1,1,1,1,78.4980587048639,3.53464492500628,9,4,1,0.609884984478493,0.210101592914315,6,0,12.4065335784653,3,3,1,19,2,1,13,11,1,9
|
50 |
+
29,2,0.666588067523572,2,6,1,2,4,1,54.6325888225537,3.8593961787976,5,3,3,0.109258449018128,0.721956790831691,0,0,17.1969208088674,3,1,1,11,0,2,6,0,1,2
|
51 |
+
29,2,0.296397784083832,1,1,3,4,4,1,66.8239432425046,2.27399341963712,5,4,2,0.0586137895938363,0.177570657414424,0,0,12.9746107406301,3,3,1,2,1,1,4,15,6,9
|
52 |
+
30,2,0.305896614627132,2,17,1,2,4,1,30.0,1.90490455758757,6,4,1,0.0651803014460173,0.0280282360137817,0,0,14.673909416404,3,4,1,16,3,1,7,0,5,0
|
53 |
+
48,1,1.0,2,1,4,1,1,1,57.6621380496683,2.41613139942679,9,1,1,1.0,0.45071568192453,5,1,19.9419474090085,3,2,1,29,2,1,16,2,0,9
|
54 |
+
24,3,0.549572887781486,3,11,2,4,1,1,56.6395204758942,1.17522926861091,7,4,1,0.166056841559839,0.618516627740679,7,0,16.1571036955847,3,4,1,7,4,1,0,0,0,3
|
55 |
+
29,1,0.64423923596827,2,3,1,2,2,1,65.187684331889,2.97869476126021,4,1,1,0.0,0.931236025796003,5,0,11.8244446312614,3,4,3,0,3,3,4,0,0,0
|
56 |
+
35,1,0.0497980808132505,2,1,3,2,2,1,81.0442037386244,3.40818216001324,7,1,3,0.342044656675025,1.0,0,0,13.4796891124763,3,1,1,10,2,2,1,2,5,0
|
57 |
+
20,3,0.217032971741176,3,17,1,1,2,1,84.2319305900174,2.04535070025609,9,1,1,0.092220413281171,0.624169845063327,3,0,10.4384078905024,3,1,1,6,0,3,0,2,0,0
|
58 |
+
45,2,0.628001563945607,1,1,1,3,1,1,56.631294242497,1.39765248312659,3,4,1,0.0468385069932922,0.175953189712635,5,0,8.94177165598518,3,1,3,9,1,3,0,5,0,0
|
59 |
+
18,3,0.809748808057304,1,15,1,5,2,1,100.0,1.87820726361762,4,3,3,0.188433671375755,0.477661776734141,0,0,14.7236868239689,3,1,1,14,2,2,5,3,0,3
|
60 |
+
57,3,0.50242134536307,2,26,2,2,1,2,73.2182912787534,1.31611544273862,3,3,1,0.687797758716077,1.0,4,0,16.8875858430876,3,3,1,17,3,1,2,0,0,0
|
61 |
+
45,3,0.289683870986498,3,1,1,3,1,2,90.2975851774595,2.66088514427624,8,4,2,0.0,0.121820636964127,1,0,12.6582388222199,3,2,1,6,0,2,6,3,0,8
|
62 |
+
34,3,0.848964499186754,1,1,4,1,4,1,100.0,4.01727491210708,5,3,2,0.386490117426162,0.657500820389643,3,0,13.508891426895,3,1,1,18,2,1,5,2,3,2
|
63 |
+
29,1,0.350201779529346,2,3,1,2,4,1,86.126543361023,3.58486432511929,6,3,1,0.109813581837882,0.69446148475871,0,0,15.3044579048155,3,1,1,5,2,3,2,0,0,0
|
64 |
+
31,1,0.138291373005619,2,3,3,5,1,1,91.2425090523438,2.44173960411085,4,2,1,0.406516546539312,0.977431858004463,3,0,17.5252083440194,3,2,1,7,6,3,10,11,9,1
|
65 |
+
27,2,0.184926211218149,3,5,1,2,3,1,69.2413466445254,2.13140231345731,4,3,3,0.401596815933898,0.991180483799566,9,0,9.8701671297013,3,1,1,2,2,3,0,2,0,1
|
66 |
+
41,1,0.301660900545997,2,5,1,4,1,1,69.1251020961862,3.303702721918,1,3,2,0.170758897572894,0.926567099081853,1,1,15.3486479229393,3,1,1,13,0,2,13,5,5,6
|
67 |
+
53,2,0.127929389514159,2,1,1,4,1,1,30.0,3.64056139246181,7,3,3,0.0350563706070154,0.932185707564198,6,0,19.9410388749096,3,3,1,5,2,1,8,6,4,5
|
68 |
+
32,2,0.537218139779456,2,12,1,2,2,2,30.2121676237593,1.02389538327548,5,3,2,0.0003468097307804,1.0,3,0,16.3527029513081,3,3,3,5,0,3,1,3,0,5
|
69 |
+
51,3,0.420620019752982,1,5,4,1,1,1,41.1665536426107,4.16841998425722,6,1,3,0.718600021277349,1.0,9,0,12.2907775208906,3,3,1,39,3,2,35,17,7,4
|
70 |
+
36,1,0.000539911682434,3,7,1,3,3,1,43.1831637554264,0.856880800663716,6,3,1,0.0,0.280222813691602,0,0,13.9889177591685,3,4,1,2,2,2,5,0,1,0
|
71 |
+
55,3,0.612000043966685,1,5,2,2,3,1,79.1880042545104,2.32890009430343,1,1,1,0.277643535334797,0.057370855046233,8,0,24.5947889701218,4,4,1,17,2,3,0,5,0,0
|
72 |
+
60,2,0.264359784514337,2,1,4,1,3,1,44.2619392221251,3.49485914568419,4,1,3,1.0,1.0,9,0,9.73156887226829,3,4,1,39,0,2,30,16,13,7
|
73 |
+
18,1,0.1864210027337,2,1,3,1,2,1,36.3641373587089,1.53364965745007,6,3,2,0.313299665299516,0.724344242987018,0,1,11.3117576847644,3,3,1,0,2,3,6,7,0,5
|
74 |
+
35,1,0.149976915447758,2,4,3,1,4,1,30.0,3.45596879546415,3,4,2,0.0,0.109758123967461,1,0,23.5388294679342,3,1,1,9,4,3,6,4,0,6
|
75 |
+
24,3,0.0,1,16,1,6,1,1,88.7380368066718,1.68120875556832,1,2,2,0.329572045338643,0.463364701591192,2,0,13.0980864479692,3,1,1,0,2,4,0,0,0,0
|
76 |
+
36,2,0.785280352606463,2,1,1,1,4,1,67.9733720965838,2.03641662262983,2,1,2,0.161104494928546,0.286160743077737,5,0,18.6238434791828,3,4,1,18,4,3,9,13,5,7
|
77 |
+
56,1,0.678837697167519,2,7,4,4,1,1,100.0,3.9674499156043,2,3,1,0.956517844734314,1.0,0,0,23.7956385496542,4,1,1,35,1,2,16,7,2,12
|
78 |
+
37,2,0.0448115878220008,1,1,4,3,1,2,69.2190755146291,1.96632893104638,6,3,3,0.0,0.601621016986509,2,1,16.2632826546757,3,2,1,7,2,4,1,6,2,0
|
79 |
+
50,2,0.68223104952878,3,11,2,3,2,2,78.2108011094033,0.929492133143435,8,3,1,0.193048805427952,0.815493553880714,4,0,18.678728566695,3,4,2,0,1,1,2,1,2,7
|
80 |
+
38,2,0.188203692922371,2,1,1,3,1,1,46.5605404327037,3.66448881961734,8,1,2,0.366177878589499,0.217528004179393,9,0,13.9243538955,3,4,1,7,3,4,14,7,9,8
|
81 |
+
20,1,0.0,1,2,4,1,3,1,64.5251665412344,1.76556005965621,3,3,1,0.311859436922276,0.261422565519306,2,0,17.776323495146,3,1,1,0,0,2,0,0,9,1
|
82 |
+
34,2,0.873721197014631,1,1,2,2,3,1,52.2047593206339,2.570714165024,5,4,2,0.0,0.911560641788863,5,0,20.1237936782554,3,1,2,7,1,1,4,0,3,6
|
83 |
+
30,3,0.668962504357545,2,1,1,1,3,1,53.2983550932715,2.65479448412058,4,1,1,0.044336263162694,0.310761644954177,2,1,10.3410029768022,3,3,1,0,0,3,0,1,0,0
|
84 |
+
25,2,0.111599733309324,2,27,5,6,2,1,94.3932821309932,1.01142455202132,6,1,2,0.0588144620147767,0.470976435692039,2,0,12.4208996400604,3,1,1,2,1,4,4,2,0,1
|
85 |
+
36,2,0.410798507963005,2,16,2,3,2,1,69.0075924589165,1.09835353624233,5,3,2,0.0411839918915098,0.858893237497417,1,0,11.601284079293,3,1,1,10,0,3,7,3,1,0
|
86 |
+
33,2,0.242771746453078,1,27,5,4,1,1,84.9500319582508,2.2432390572045,4,2,1,0.0,0.618578424455441,6,0,11.8760849400806,3,2,1,0,0,4,0,3,0,1
|
87 |
+
32,3,0.946139773872123,3,1,1,2,2,1,100.0,-0.336825713563831,3,2,3,0.78903958519151,0.0441426774173903,6,1,11.5019593445149,3,1,1,22,2,2,25,6,6,11
|
88 |
+
38,2,0.522810206298461,3,10,4,2,1,2,74.3198915239166,2.53817696550786,5,1,1,0.526351935083306,0.534526160221444,0,0,15.5567377220441,3,1,1,25,2,2,0,0,0,3
|
89 |
+
18,2,0.371064218713859,1,1,2,6,1,2,71.8537357448658,1.96133220572076,1,2,2,0.13784008421834,0.906992789835979,2,0,9.22344448485286,3,2,1,6,2,4,0,2,0,0
|
90 |
+
25,1,0.0657451757033434,2,2,1,2,2,1,55.8465917211613,2.19250413755175,6,1,3,0.0,0.212225378389616,0,1,21.8426378390144,3,1,1,1,0,3,0,0,0,1
|
91 |
+
42,2,0.661593340762081,2,4,2,1,1,1,69.5572052280453,3.6703326924946,1,4,1,0.180865366477182,0.567356920305377,0,0,17.3035019856311,3,3,1,14,3,2,20,2,0,6
|
92 |
+
58,3,0.344378940901919,2,17,1,3,4,2,82.0612589389906,1.65624537577875,7,3,3,0.349772749037473,0.43878636124656,0,0,10.5182540387197,3,3,1,7,4,2,13,8,0,8
|
93 |
+
26,2,0.294054755831915,1,1,2,6,2,2,100.0,3.13016282768429,5,1,2,0.131897323187452,0.741690258031523,0,0,13.0864158725073,3,3,1,10,1,3,2,4,0,8
|
94 |
+
53,3,0.327562997022995,1,1,2,2,1,2,70.3339039309627,1.48484842567305,5,3,2,0.39913440519942,0.808863366009971,8,1,10.2361495536742,3,4,1,21,2,3,0,1,0,7
|
95 |
+
51,3,0.486897160786256,3,5,1,5,2,1,30.1517064036679,3.60970410790475,7,4,2,0.0,1.0,4,0,13.3261020569129,3,3,1,8,3,3,5,10,9,5
|
96 |
+
24,2,0.0,2,17,2,5,1,1,89.0549816298444,4.07250236820328,9,2,2,0.390311119543518,0.773158909831957,7,0,15.8333971478341,3,1,1,9,0,2,0,1,0,5
|
97 |
+
37,2,1.0,1,19,1,4,2,1,37.5283621503371,3.17505887239783,4,2,1,0.777472128406974,1.0,6,1,14.6097655989193,3,3,1,18,2,4,4,5,3,9
|
98 |
+
27,2,0.638254653640545,3,3,2,3,2,1,61.4959253207427,3.5292024815491,6,2,2,0.0,0.128339818239367,0,1,13.6956921640298,3,2,1,0,1,3,3,1,1,0
|
99 |
+
37,1,0.970191770062497,2,17,4,4,1,1,37.7331286954416,3.36149064009386,8,3,3,0.164442325636724,0.338528130628634,3,0,24.8483016116964,4,1,1,7,0,3,11,15,8,8
|
100 |
+
31,3,0.794106942470392,2,7,3,3,4,1,47.1348275942282,2.44470116605974,6,3,3,0.0775067148711385,0.383947622952954,2,0,13.940247858847,3,1,1,1,2,3,2,0,0,0
|
101 |
+
32,3,0.0872029008877171,1,18,3,4,1,1,100.0,2.95470712013349,5,1,1,0.359983918742085,0.0,0,0,11.5807832892493,3,2,1,9,2,4,10,8,5,3
|
draft.ipynb
ADDED
@@ -0,0 +1,420 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"## Data Deployment Draft"
|
8 |
+
]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "code",
|
12 |
+
"execution_count": 1,
|
13 |
+
"metadata": {},
|
14 |
+
"outputs": [],
|
15 |
+
"source": [
|
16 |
+
"# import libraries\n",
|
17 |
+
"import pandas as pd"
|
18 |
+
]
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"cell_type": "code",
|
22 |
+
"execution_count": 2,
|
23 |
+
"metadata": {},
|
24 |
+
"outputs": [
|
25 |
+
{
|
26 |
+
"data": {
|
27 |
+
"text/html": [
|
28 |
+
"<div>\n",
|
29 |
+
"<style scoped>\n",
|
30 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
31 |
+
" vertical-align: middle;\n",
|
32 |
+
" }\n",
|
33 |
+
"\n",
|
34 |
+
" .dataframe tbody tr th {\n",
|
35 |
+
" vertical-align: top;\n",
|
36 |
+
" }\n",
|
37 |
+
"\n",
|
38 |
+
" .dataframe thead th {\n",
|
39 |
+
" text-align: right;\n",
|
40 |
+
" }\n",
|
41 |
+
"</style>\n",
|
42 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
43 |
+
" <thead>\n",
|
44 |
+
" <tr style=\"text-align: right;\">\n",
|
45 |
+
" <th></th>\n",
|
46 |
+
" <th>Age</th>\n",
|
47 |
+
" <th>Attrition</th>\n",
|
48 |
+
" <th>BusinessTravel</th>\n",
|
49 |
+
" <th>DailyRate</th>\n",
|
50 |
+
" <th>Department</th>\n",
|
51 |
+
" <th>DistanceFromHome</th>\n",
|
52 |
+
" <th>Education</th>\n",
|
53 |
+
" <th>EducationField</th>\n",
|
54 |
+
" <th>EnvironmentSatisfaction</th>\n",
|
55 |
+
" <th>Gender</th>\n",
|
56 |
+
" <th>...</th>\n",
|
57 |
+
" <th>PerformanceRating</th>\n",
|
58 |
+
" <th>RelationshipSatisfaction</th>\n",
|
59 |
+
" <th>StockOptionLevel</th>\n",
|
60 |
+
" <th>TotalWorkingYears</th>\n",
|
61 |
+
" <th>TrainingTimesLastYear</th>\n",
|
62 |
+
" <th>WorkLifeBalance</th>\n",
|
63 |
+
" <th>YearsAtCompany</th>\n",
|
64 |
+
" <th>YearsInCurrentRole</th>\n",
|
65 |
+
" <th>YearsSinceLastPromotion</th>\n",
|
66 |
+
" <th>YearsWithCurrManager</th>\n",
|
67 |
+
" </tr>\n",
|
68 |
+
" </thead>\n",
|
69 |
+
" <tbody>\n",
|
70 |
+
" <tr>\n",
|
71 |
+
" <th>0</th>\n",
|
72 |
+
" <td>39</td>\n",
|
73 |
+
" <td>0</td>\n",
|
74 |
+
" <td>3</td>\n",
|
75 |
+
" <td>0.318958</td>\n",
|
76 |
+
" <td>2</td>\n",
|
77 |
+
" <td>3</td>\n",
|
78 |
+
" <td>1</td>\n",
|
79 |
+
" <td>5</td>\n",
|
80 |
+
" <td>2</td>\n",
|
81 |
+
" <td>1</td>\n",
|
82 |
+
" <td>...</td>\n",
|
83 |
+
" <td>3</td>\n",
|
84 |
+
" <td>2</td>\n",
|
85 |
+
" <td>1</td>\n",
|
86 |
+
" <td>7</td>\n",
|
87 |
+
" <td>3</td>\n",
|
88 |
+
" <td>2</td>\n",
|
89 |
+
" <td>16</td>\n",
|
90 |
+
" <td>11</td>\n",
|
91 |
+
" <td>0</td>\n",
|
92 |
+
" <td>9</td>\n",
|
93 |
+
" </tr>\n",
|
94 |
+
" <tr>\n",
|
95 |
+
" <th>1</th>\n",
|
96 |
+
" <td>29</td>\n",
|
97 |
+
" <td>0</td>\n",
|
98 |
+
" <td>2</td>\n",
|
99 |
+
" <td>0.845930</td>\n",
|
100 |
+
" <td>1</td>\n",
|
101 |
+
" <td>21</td>\n",
|
102 |
+
" <td>3</td>\n",
|
103 |
+
" <td>3</td>\n",
|
104 |
+
" <td>2</td>\n",
|
105 |
+
" <td>1</td>\n",
|
106 |
+
" <td>...</td>\n",
|
107 |
+
" <td>3</td>\n",
|
108 |
+
" <td>3</td>\n",
|
109 |
+
" <td>1</td>\n",
|
110 |
+
" <td>8</td>\n",
|
111 |
+
" <td>3</td>\n",
|
112 |
+
" <td>2</td>\n",
|
113 |
+
" <td>2</td>\n",
|
114 |
+
" <td>5</td>\n",
|
115 |
+
" <td>3</td>\n",
|
116 |
+
" <td>5</td>\n",
|
117 |
+
" </tr>\n",
|
118 |
+
" <tr>\n",
|
119 |
+
" <th>2</th>\n",
|
120 |
+
" <td>40</td>\n",
|
121 |
+
" <td>0</td>\n",
|
122 |
+
" <td>2</td>\n",
|
123 |
+
" <td>0.153782</td>\n",
|
124 |
+
" <td>2</td>\n",
|
125 |
+
" <td>1</td>\n",
|
126 |
+
" <td>1</td>\n",
|
127 |
+
" <td>5</td>\n",
|
128 |
+
" <td>2</td>\n",
|
129 |
+
" <td>1</td>\n",
|
130 |
+
" <td>...</td>\n",
|
131 |
+
" <td>3</td>\n",
|
132 |
+
" <td>1</td>\n",
|
133 |
+
" <td>1</td>\n",
|
134 |
+
" <td>24</td>\n",
|
135 |
+
" <td>2</td>\n",
|
136 |
+
" <td>2</td>\n",
|
137 |
+
" <td>0</td>\n",
|
138 |
+
" <td>0</td>\n",
|
139 |
+
" <td>0</td>\n",
|
140 |
+
" <td>0</td>\n",
|
141 |
+
" </tr>\n",
|
142 |
+
" <tr>\n",
|
143 |
+
" <th>3</th>\n",
|
144 |
+
" <td>24</td>\n",
|
145 |
+
" <td>0</td>\n",
|
146 |
+
" <td>2</td>\n",
|
147 |
+
" <td>0.785534</td>\n",
|
148 |
+
" <td>1</td>\n",
|
149 |
+
" <td>6</td>\n",
|
150 |
+
" <td>3</td>\n",
|
151 |
+
" <td>1</td>\n",
|
152 |
+
" <td>4</td>\n",
|
153 |
+
" <td>2</td>\n",
|
154 |
+
" <td>...</td>\n",
|
155 |
+
" <td>3</td>\n",
|
156 |
+
" <td>3</td>\n",
|
157 |
+
" <td>1</td>\n",
|
158 |
+
" <td>4</td>\n",
|
159 |
+
" <td>0</td>\n",
|
160 |
+
" <td>2</td>\n",
|
161 |
+
" <td>7</td>\n",
|
162 |
+
" <td>8</td>\n",
|
163 |
+
" <td>0</td>\n",
|
164 |
+
" <td>6</td>\n",
|
165 |
+
" </tr>\n",
|
166 |
+
" <tr>\n",
|
167 |
+
" <th>4</th>\n",
|
168 |
+
" <td>44</td>\n",
|
169 |
+
" <td>0</td>\n",
|
170 |
+
" <td>2</td>\n",
|
171 |
+
" <td>1.000000</td>\n",
|
172 |
+
" <td>1</td>\n",
|
173 |
+
" <td>5</td>\n",
|
174 |
+
" <td>5</td>\n",
|
175 |
+
" <td>5</td>\n",
|
176 |
+
" <td>1</td>\n",
|
177 |
+
" <td>1</td>\n",
|
178 |
+
" <td>...</td>\n",
|
179 |
+
" <td>3</td>\n",
|
180 |
+
" <td>4</td>\n",
|
181 |
+
" <td>1</td>\n",
|
182 |
+
" <td>0</td>\n",
|
183 |
+
" <td>2</td>\n",
|
184 |
+
" <td>3</td>\n",
|
185 |
+
" <td>0</td>\n",
|
186 |
+
" <td>5</td>\n",
|
187 |
+
" <td>1</td>\n",
|
188 |
+
" <td>2</td>\n",
|
189 |
+
" </tr>\n",
|
190 |
+
" </tbody>\n",
|
191 |
+
"</table>\n",
|
192 |
+
"<p>5 rows × 30 columns</p>\n",
|
193 |
+
"</div>"
|
194 |
+
],
|
195 |
+
"text/plain": [
|
196 |
+
" Age Attrition BusinessTravel DailyRate Department DistanceFromHome \\\n",
|
197 |
+
"0 39 0 3 0.318958 2 3 \n",
|
198 |
+
"1 29 0 2 0.845930 1 21 \n",
|
199 |
+
"2 40 0 2 0.153782 2 1 \n",
|
200 |
+
"3 24 0 2 0.785534 1 6 \n",
|
201 |
+
"4 44 0 2 1.000000 1 5 \n",
|
202 |
+
"\n",
|
203 |
+
" Education EducationField EnvironmentSatisfaction Gender ... \\\n",
|
204 |
+
"0 1 5 2 1 ... \n",
|
205 |
+
"1 3 3 2 1 ... \n",
|
206 |
+
"2 1 5 2 1 ... \n",
|
207 |
+
"3 3 1 4 2 ... \n",
|
208 |
+
"4 5 5 1 1 ... \n",
|
209 |
+
"\n",
|
210 |
+
" PerformanceRating RelationshipSatisfaction StockOptionLevel \\\n",
|
211 |
+
"0 3 2 1 \n",
|
212 |
+
"1 3 3 1 \n",
|
213 |
+
"2 3 1 1 \n",
|
214 |
+
"3 3 3 1 \n",
|
215 |
+
"4 3 4 1 \n",
|
216 |
+
"\n",
|
217 |
+
" TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany \\\n",
|
218 |
+
"0 7 3 2 16 \n",
|
219 |
+
"1 8 3 2 2 \n",
|
220 |
+
"2 24 2 2 0 \n",
|
221 |
+
"3 4 0 2 7 \n",
|
222 |
+
"4 0 2 3 0 \n",
|
223 |
+
"\n",
|
224 |
+
" YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager \n",
|
225 |
+
"0 11 0 9 \n",
|
226 |
+
"1 5 3 5 \n",
|
227 |
+
"2 0 0 0 \n",
|
228 |
+
"3 8 0 6 \n",
|
229 |
+
"4 5 1 2 \n",
|
230 |
+
"\n",
|
231 |
+
"[5 rows x 30 columns]"
|
232 |
+
]
|
233 |
+
},
|
234 |
+
"execution_count": 2,
|
235 |
+
"metadata": {},
|
236 |
+
"output_type": "execute_result"
|
237 |
+
}
|
238 |
+
],
|
239 |
+
"source": [
|
240 |
+
"# load the data\n",
|
241 |
+
"data = pd.read_csv('data/processed_data.csv')\n",
|
242 |
+
"\n",
|
243 |
+
"# preview the data\n",
|
244 |
+
"data.head()"
|
245 |
+
]
|
246 |
+
},
|
247 |
+
{
|
248 |
+
"cell_type": "code",
|
249 |
+
"execution_count": 3,
|
250 |
+
"metadata": {},
|
251 |
+
"outputs": [],
|
252 |
+
"source": [
|
253 |
+
"# sampling data\n",
|
254 |
+
"# sample 10% of the data and save as sample_data.csv\n",
|
255 |
+
"sample_data = data.sample(frac=0.1, random_state=1)\n",
|
256 |
+
"\n",
|
257 |
+
"# remove the Attrition column\n",
|
258 |
+
"sample_data = sample_data.drop(columns='Attrition')\n",
|
259 |
+
"sample_data.to_csv('data/sample_data.csv', index=False)"
|
260 |
+
]
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"cell_type": "code",
|
264 |
+
"execution_count": 100,
|
265 |
+
"metadata": {},
|
266 |
+
"outputs": [
|
267 |
+
{
|
268 |
+
"data": {
|
269 |
+
"text/html": [
|
270 |
+
"<style>#sk-container-id-25 {color: black;}#sk-container-id-25 pre{padding: 0;}#sk-container-id-25 div.sk-toggleable {background-color: white;}#sk-container-id-25 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-25 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-25 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-25 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-25 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-25 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-25 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-25 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-25 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-25 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-25 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-25 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-25 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-25 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-25 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-25 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-25 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-25 div.sk-item {position: relative;z-index: 1;}#sk-container-id-25 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-25 div.sk-item::before, #sk-container-id-25 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-25 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-25 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-25 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-25 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-25 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-25 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-25 div.sk-label-container {text-align: center;}#sk-container-id-25 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-25 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-25\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
|
271 |
+
" colsample_bylevel=None, colsample_bynode=None,\n",
|
272 |
+
" colsample_bytree=None, early_stopping_rounds=None,\n",
|
273 |
+
" enable_categorical=False, eval_metric=None, feature_types=None,\n",
|
274 |
+
" gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
|
275 |
+
" interaction_constraints=None, learning_rate=None, max_bin=None,\n",
|
276 |
+
" max_cat_threshold=None, max_cat_to_onehot=None,\n",
|
277 |
+
" max_delta_step=None, max_depth=4, max_leaves=None,\n",
|
278 |
+
" min_child_weight=None, missing=nan, monotone_constraints=None,\n",
|
279 |
+
" n_estimators=250, n_jobs=None, num_parallel_tree=None,\n",
|
280 |
+
" predictor=None, random_state=None, ...)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-25\" type=\"checkbox\" checked><label for=\"sk-estimator-id-25\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">XGBClassifier</label><div class=\"sk-toggleable__content\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
|
281 |
+
" colsample_bylevel=None, colsample_bynode=None,\n",
|
282 |
+
" colsample_bytree=None, early_stopping_rounds=None,\n",
|
283 |
+
" enable_categorical=False, eval_metric=None, feature_types=None,\n",
|
284 |
+
" gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
|
285 |
+
" interaction_constraints=None, learning_rate=None, max_bin=None,\n",
|
286 |
+
" max_cat_threshold=None, max_cat_to_onehot=None,\n",
|
287 |
+
" max_delta_step=None, max_depth=4, max_leaves=None,\n",
|
288 |
+
" min_child_weight=None, missing=nan, monotone_constraints=None,\n",
|
289 |
+
" n_estimators=250, n_jobs=None, num_parallel_tree=None,\n",
|
290 |
+
" predictor=None, random_state=None, ...)</pre></div></div></div></div></div>"
|
291 |
+
],
|
292 |
+
"text/plain": [
|
293 |
+
"XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
|
294 |
+
" colsample_bylevel=None, colsample_bynode=None,\n",
|
295 |
+
" colsample_bytree=None, early_stopping_rounds=None,\n",
|
296 |
+
" enable_categorical=False, eval_metric=None, feature_types=None,\n",
|
297 |
+
" gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
|
298 |
+
" interaction_constraints=None, learning_rate=None, max_bin=None,\n",
|
299 |
+
" max_cat_threshold=None, max_cat_to_onehot=None,\n",
|
300 |
+
" max_delta_step=None, max_depth=4, max_leaves=None,\n",
|
301 |
+
" min_child_weight=None, missing=nan, monotone_constraints=None,\n",
|
302 |
+
" n_estimators=250, n_jobs=None, num_parallel_tree=None,\n",
|
303 |
+
" predictor=None, random_state=None, ...)"
|
304 |
+
]
|
305 |
+
},
|
306 |
+
"execution_count": 100,
|
307 |
+
"metadata": {},
|
308 |
+
"output_type": "execute_result"
|
309 |
+
}
|
310 |
+
],
|
311 |
+
"source": [
|
312 |
+
"# train a xgboost model\n",
|
313 |
+
"from xgboost import XGBClassifier\n",
|
314 |
+
"\n",
|
315 |
+
"# the target is the Attrition\n",
|
316 |
+
"y = data['Attrition']\n",
|
317 |
+
"x = data.drop(['Attrition'], axis=1)\n",
|
318 |
+
"\n",
|
319 |
+
"# train the model\n",
|
320 |
+
"model = XGBClassifier()\n",
|
321 |
+
"\n",
|
322 |
+
"# parameter tuning\n",
|
323 |
+
"# from sklearn.model_selection import GridSearchCV\n",
|
324 |
+
"\n",
|
325 |
+
"# # more in depth search\n",
|
326 |
+
"# param_grid = {\n",
|
327 |
+
"# 'n_estimators': [100, 200, 300],\n",
|
328 |
+
"# 'max_depth': [2, 3, 4],\n",
|
329 |
+
"# 'learning_rate': [0.1, 0.01, 0.001]\n",
|
330 |
+
"# }\n",
|
331 |
+
"\n",
|
332 |
+
"# grid_search = GridSearchCV(model, param_grid, cv=10, n_jobs=-1)\n",
|
333 |
+
"# grid_search.fit(x_train, y_train)\n",
|
334 |
+
"\n",
|
335 |
+
"# # best parameters\n",
|
336 |
+
"# print(grid_search.best_params_)\n",
|
337 |
+
"\n",
|
338 |
+
"# train the model with the best parameters\n",
|
339 |
+
"model = XGBClassifier(n_estimators=250, max_depth=4)\n",
|
340 |
+
"\n",
|
341 |
+
"# fit the model\n",
|
342 |
+
"model.fit(x, y)"
|
343 |
+
]
|
344 |
+
},
|
345 |
+
{
|
346 |
+
"cell_type": "code",
|
347 |
+
"execution_count": 101,
|
348 |
+
"metadata": {},
|
349 |
+
"outputs": [
|
350 |
+
{
|
351 |
+
"name": "stdout",
|
352 |
+
"output_type": "stream",
|
353 |
+
"text": [
|
354 |
+
"Accuracy: 1.0\n",
|
355 |
+
"Confusion matrix: \n",
|
356 |
+
"[[507 0]\n",
|
357 |
+
" [ 0 493]]\n"
|
358 |
+
]
|
359 |
+
}
|
360 |
+
],
|
361 |
+
"source": [
|
362 |
+
"# test the model\n",
|
363 |
+
"y_pred = model.predict(x)\n",
|
364 |
+
"\n",
|
365 |
+
"# evaluate the model\n",
|
366 |
+
"from sklearn.metrics import accuracy_score\n",
|
367 |
+
"accuracy = accuracy_score(y, y_pred)\n",
|
368 |
+
"print(f'Accuracy: {accuracy}')\n",
|
369 |
+
"\n",
|
370 |
+
"# confusion matrix\n",
|
371 |
+
"from sklearn.metrics import confusion_matrix\n",
|
372 |
+
"conf_matrix = confusion_matrix(y, y_pred)\n",
|
373 |
+
"print(f'Confusion matrix: \\n{conf_matrix}')"
|
374 |
+
]
|
375 |
+
},
|
376 |
+
{
|
377 |
+
"cell_type": "code",
|
378 |
+
"execution_count": 102,
|
379 |
+
"metadata": {},
|
380 |
+
"outputs": [
|
381 |
+
{
|
382 |
+
"data": {
|
383 |
+
"text/plain": [
|
384 |
+
"['model/model.pkl']"
|
385 |
+
]
|
386 |
+
},
|
387 |
+
"execution_count": 102,
|
388 |
+
"metadata": {},
|
389 |
+
"output_type": "execute_result"
|
390 |
+
}
|
391 |
+
],
|
392 |
+
"source": [
|
393 |
+
"# save the model as pkl\n",
|
394 |
+
"import joblib\n",
|
395 |
+
"joblib.dump(model, 'model/model.pkl')"
|
396 |
+
]
|
397 |
+
}
|
398 |
+
],
|
399 |
+
"metadata": {
|
400 |
+
"kernelspec": {
|
401 |
+
"display_name": "Python 3",
|
402 |
+
"language": "python",
|
403 |
+
"name": "python3"
|
404 |
+
},
|
405 |
+
"language_info": {
|
406 |
+
"codemirror_mode": {
|
407 |
+
"name": "ipython",
|
408 |
+
"version": 3
|
409 |
+
},
|
410 |
+
"file_extension": ".py",
|
411 |
+
"mimetype": "text/x-python",
|
412 |
+
"name": "python",
|
413 |
+
"nbconvert_exporter": "python",
|
414 |
+
"pygments_lexer": "ipython3",
|
415 |
+
"version": "3.11.4"
|
416 |
+
}
|
417 |
+
},
|
418 |
+
"nbformat": 4,
|
419 |
+
"nbformat_minor": 2
|
420 |
+
}
|
git.sh
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Get the current commit count
|
4 |
+
commit_count=$(git rev-list --count HEAD)
|
5 |
+
|
6 |
+
# Increment the commit count
|
7 |
+
next_commit_count=$((commit_count + 1))
|
8 |
+
|
9 |
+
# Add all changes
|
10 |
+
git add .
|
11 |
+
|
12 |
+
# Check if a custom message is provided
|
13 |
+
if [ -z "$1" ]; then
|
14 |
+
commit_message="auto commit #$next_commit_count"
|
15 |
+
else
|
16 |
+
commit_message="$1 #$next_commit_count"
|
17 |
+
fi
|
18 |
+
|
19 |
+
# Commit with the message
|
20 |
+
git commit -m "$commit_message"
|
21 |
+
|
22 |
+
# Push the changes
|
23 |
+
git push
|
24 |
+
|
25 |
+
# to run ./git_auto.sh "Your custom message"
|
lib/conversion_ref.xlsx
ADDED
Binary file (9.29 kB). View file
|
|
lib/scaling_ref.xlsx.xlsx
ADDED
Binary file (8.69 kB). View file
|
|
model/model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d619f70804761291b6ba4e20a26ed8781ec5efe2baf74f8c56acaa5a6c2707db
|
3 |
+
size 348121
|
pages/batch-processing.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from backend.bot import *
|
3 |
+
from backend.model import *
|
4 |
+
import pandas as pd
|
5 |
+
import time
|
6 |
+
import google.generativeai as genai
|
7 |
+
|
8 |
+
# Page title
|
9 |
+
st.set_page_config(page_title='Batch Processing', layout='wide')
|
10 |
+
|
11 |
+
st.title("Attrition Prediction Engine")
|
12 |
+
st.write("Welcome to the Attrition Prediction Engine! This tool is designed to help you batch process employee data and predict attrition.")
|
13 |
+
|
14 |
+
with st.sidebar:
|
15 |
+
with st.expander("🧪 Experimental Features", expanded=False):
|
16 |
+
st.caption("API token can be obtained at https://aistudio.google.com/.")
|
17 |
+
gemini_api = st.text_input("Gemini Token", "", type='password')
|
18 |
+
try:
|
19 |
+
genai.configure(api_key=gemini_api)
|
20 |
+
ai_model = genai.GenerativeModel("gemini-1.5-flash")
|
21 |
+
test = ai_model.generate_content("Explain how AI works")
|
22 |
+
st.success("API key is valid. Experimental feature access granted.")
|
23 |
+
except Exception as e:
|
24 |
+
st.error("API key is invalid. You don't have access to experimental features.")
|
25 |
+
|
26 |
+
|
27 |
+
with st.expander("⚠️ Disclaimer", expanded=False):
|
28 |
+
st.write("This web app is intended for prediction purposes only. The results are based on the input data provided and \
|
29 |
+
the performance of the machine learning model. The accuracy of the predictions may vary depending on data quality \
|
30 |
+
and model reliability.")
|
31 |
+
|
32 |
+
st.caption("MIT License © 2025 Khor Kean Teng, Ng Jing Wen, Lim Sze Chie, Tan Yee Thong, Yee See Marn")
|
33 |
+
|
34 |
+
|
35 |
+
# Display assistant response in chat message container
|
36 |
+
with st.chat_message("assistant", avatar="https://cdn4.iconfinder.com/data/icons/heroes-villains-vol-2-colored/100/Terminator-512.png"):
|
37 |
+
# response = st.write_stream(response_generator())
|
38 |
+
response = st.write("Hello admin! I am Az-147. How can I assist you today?")
|
39 |
+
st.caption("If you use predefined data, the file upload step will be hidden.")
|
40 |
+
toggle = st.toggle('Use Predefined Data', True)
|
41 |
+
data= get_data("data/sample_data.csv")
|
42 |
+
|
43 |
+
if toggle == False:
|
44 |
+
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
|
45 |
+
if uploaded_file is not None:
|
46 |
+
data = pd.read_csv(uploaded_file)
|
47 |
+
|
48 |
+
submit = st.button("Execute", type='primary')
|
49 |
+
|
50 |
+
model = load_model("model/model.pkl")
|
51 |
+
|
52 |
+
def count_attrition(predictions):
|
53 |
+
return sum(predictions)
|
54 |
+
|
55 |
+
if submit:
|
56 |
+
with st.status("Data Preview", expanded=True):
|
57 |
+
time.sleep(.5)
|
58 |
+
st.write(f"You've uploaded a data file of {data.shape[0]} rows and {data.shape[1]} columns. Here's a preview of the data:")
|
59 |
+
st.write(data.head())
|
60 |
+
|
61 |
+
with st.status("Predicting Attrition...", expanded=True):
|
62 |
+
time.sleep(2)
|
63 |
+
prediction = model.predict(data)
|
64 |
+
data['Attrition'] = prediction
|
65 |
+
attrition_count = count_attrition(prediction)
|
66 |
+
output = f"Prediction completed! There are {attrition_count} cases of attrition. Here's a preview of the data with the predicted attrition status:"
|
67 |
+
st.write(output)
|
68 |
+
st.write(data.head())
|
69 |
+
|
70 |
+
with st.status("AI Opinion", expanded=True):
|
71 |
+
try:
|
72 |
+
response = ai_model.generate_content(f"Give some opinions in about 100 word based on the prediction results where there are {attrition_count} cases of attrition.")
|
73 |
+
st.write(response.text)
|
74 |
+
except Exception as e:
|
75 |
+
st.write("You don't have access to this feature. Please authenticate to use this feature.")
|
76 |
+
|
pages/documentation.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
st.set_page_config(page_title='Documentation', layout='wide')
|
5 |
+
|
6 |
+
with st.sidebar:
|
7 |
+
with st.expander("⚠️ Disclaimer", expanded=True):
|
8 |
+
st.write("This web app is intended for prediction purposes only. The results are based on the input data provided and \
|
9 |
+
the performance of the machine learning model. The accuracy of the predictions may vary depending on data quality \
|
10 |
+
and model reliability.")
|
11 |
+
|
12 |
+
st.caption("MIT License © 2025 Khor Kean Teng, Ng Jing Wen, Lim Sze Chie, Tan Yee Thong, Yee See Marn")
|
13 |
+
|
14 |
+
st.title("📄 Documentation")
|
15 |
+
st.markdown("""
|
16 |
+
To learn more about the project, please refer to the sections below.
|
17 |
+
""")
|
18 |
+
st.subheader("About Employee Turnover")
|
19 |
+
st.write(
|
20 |
+
"Employee turnover, also known as employee attrition, refers to the number of workers leaving an \
|
21 |
+
organization over a specified time. It disrupts operations, increases recruitment costs, and impacts \
|
22 |
+
productivity, competitiveness, and profitability.")
|
23 |
+
|
24 |
+
st.subheader("Malaysian Employee Turnover Statistics")
|
25 |
+
st.write("- Nearly 49% of Malaysian organizations face employee turnover issues (Al-Suraihi et al., 2021).")
|
26 |
+
st.write("- Voluntary turnover rates rose from 6.5% (early 2019) to 8.7% (2020) (Bibi Nabi & Zahir, 2024).")
|
27 |
+
st.write("- Manufacturing sector turnover: 24% in 2019 (Kin et al., 2022).")
|
28 |
+
st.write("- FMCG sector voluntary turnover: 8.4% in 2020.")
|
29 |
+
|
30 |
+
st.subheader("Case Study Highlight")
|
31 |
+
st.write("Xerox’s Call Centre reduced turnover by 20% using predictive analytics to identify patterns \
|
32 |
+
# linked to attrition and improve employee engagement. (Solutyics, 2023)")
|
33 |
+
|
34 |
+
st.subheader("Data Source")
|
35 |
+
st.write(
|
36 |
+
"The dataset used in this project is from the IBM HR Analytics Employee Attrition & Performance dataset \
|
37 |
+
available on Kaggle. It is a synthetic dataset with 1,470 observations and 35 features, covering \
|
38 |
+
employee background, employment details, and satisfaction metrics. Below attached the sample data for reference.")
|
39 |
+
|
40 |
+
# set up the download data
|
41 |
+
data = pd.read_csv("data/sample_data.csv")
|
42 |
+
data = data.to_csv(index=False).encode("utf-8")
|
43 |
+
|
44 |
+
st.download_button(
|
45 |
+
label="Download Sample Data",
|
46 |
+
data=data,
|
47 |
+
file_name="sample_data.csv",
|
48 |
+
mime="text/csv"
|
49 |
+
)
|
reference.md
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# How to use Git:
|
2 |
+
|
3 |
+
## Basic:
|
4 |
+
```bash
|
5 |
+
git commit -m "message"
|
6 |
+
git push
|
7 |
+
git pull
|
8 |
+
git add .
|
9 |
+
```
|
10 |
+
|
11 |
+
## Branching:
|
12 |
+
```bash
|
13 |
+
git branch # see the branches
|
14 |
+
git status # see anything to commit
|
15 |
+
git checkout <branchname> # switch branch
|
16 |
+
|
17 |
+
Put New Branch On Remote
|
18 |
+
git push --set-upstream origin <branchname>
|
19 |
+
|
20 |
+
Delete Branch On Remote
|
21 |
+
git push origin --delete <branchname>
|
22 |
+
|
23 |
+
Delte Branch On Local
|
24 |
+
git branch -d <branchname>
|
25 |
+
|
26 |
+
Create Branch On Local
|
27 |
+
git checkout -b <branchname>
|
28 |
+
|
29 |
+
Merge branch
|
30 |
+
git merge <branchname> # make sure you are at the branch and merge the other branch
|
31 |
+
```
|
32 |
+
|
33 |
+
## Creating file
|
34 |
+
```bash
|
35 |
+
cat > <filaname>
|
36 |
+
vim <filename>
|
37 |
+
touch <filename>
|
38 |
+
```
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pickle
|
2 |
+
pandas
|
3 |
+
joblib
|
4 |
+
google-generativeai
|
5 |
+
xgboost
|
run.sh
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
# only work if use python 3.12 on bash shell
|
2 |
+
py -3.12 -m streamlit run app.py
|