keanteng commited on
Commit
53a9f04
·
1 Parent(s): da87b23
.gitignore ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python cache files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Jupyter Notebook checkpoints
7
+ .ipynb_checkpoints
8
+
9
+ # Environment variables
10
+ .env
11
+
12
+ # Streamlit specific files
13
+ .streamlit/
14
+
15
+
16
+
17
+ # Model files
18
+
19
+ *.h5
20
+
21
+ # Logs
22
+ *.log
23
+
24
+ # Virtual environment
25
+ venv/
26
+ env/
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 keanteng
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
Binary files a/README.md and b/README.md differ
 
abc.txt ADDED
File without changes
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from backend.model import load_model
4
+ import time
5
+ import google.generativeai as genai
6
+
7
+ # Page title
8
+ st.set_page_config(page_title='Employee Turnover Predictor', layout='wide')
9
+
10
+ # add sidebar
11
+ #st.sidebar.title("Employee Turnover Predictor")
12
+
13
+ # ~~~~ Title ~~~~
14
+ st.title("🧿 Employee Turnover Prediction")
15
+ st.markdown("""An example app powered by Streamlit to predict the likelihood of employee turnover using machine learning model""")
16
+
17
+ # ~~~~ Add Guidelines to the App ~~~~
18
+ with st.sidebar:
19
+ toggle = st.toggle("Show Guidelines", True)
20
+ st.write("**Input Details**")
21
+
22
+ if toggle:
23
+ with st.expander("💡 Guidelines", expanded=True):
24
+ st.write("Follow the steps below to predict the likelihood of employee turnover:")
25
+ st.write("1. Fill in the employee details, job-related information, salary information and satisfactory information in the form.")
26
+ st.write("2. Click on the 'Predict' button to get the prediction results.")
27
+ st.write("3. The prediction results will show whether the employee is likely to leave the company or not.")
28
+ st.write("4. The 'Project Information' section provides details about employee turnover, statistics, case study, and data source.")
29
+ st.write("5. The prediction results will be displayed below the 'Predict' button.")
30
+
31
+ # ~~~~ Layout: 2 Columns ~~~~
32
+
33
+
34
+ # ~~~~ Column 1 ~~~~
35
+ # ~~~~ Employee Details Input ~~~~
36
+ with st.sidebar:
37
+ with st.expander("👤 Employee Details", expanded=False):
38
+ age = st.number_input("Age", min_value=18, max_value=65, value=25)
39
+ marital_status = st.selectbox("Marital Status", ["Single", "Married", "Divorced"])
40
+ total_working_years = st.number_input("Total Working Years", min_value=0, max_value=50, value=5)
41
+
42
+ # ~~~~ Employee Job Related Information ~~~~
43
+ with st.expander("🏢 Job Related Information", expanded=False):
44
+ job_involvement = st.slider("Job Involvement", min_value=1, max_value=4, value=3)
45
+ overtime = st.radio("Overtime", ["Yes", "No"])
46
+ years_at_company = st.number_input("Years at Company", min_value=0, max_value=50, value=3)
47
+ years_in_current_role = st.number_input("Years in Current Role", min_value=0, max_value=50, value=2)
48
+
49
+ # ~~~~ Employee Salary Information ~~~~
50
+ with st.expander("💰 Salary Information (RM)", expanded=False):
51
+ monthly_income = st.number_input("Monthly Income", min_value=1000, max_value=20000, value=5000)
52
+ daily_rate = st.number_input("Daily Rate", min_value=100, max_value=1500, value=500)
53
+ hourly_rate = st.number_input("Hourly Rate", min_value=5, max_value=100, value=20)
54
+ percent_salary_hike = st.number_input("Percent Salary Hike", min_value=0, max_value=50, value=12)
55
+
56
+ # ~~~~ Employee Satisfaction Information ~~~~
57
+ with st.expander("😊 Satisfaction Information", expanded=False):
58
+ job_satisfaction = st.slider("Job Satisfaction", min_value=1, max_value=4, value=3)
59
+ environment_satisfaction = st.slider("Environment Satisfaction", min_value=1, max_value=4, value=3)
60
+
61
+ submit = st.button("Compute", type="primary")
62
+ st.divider()
63
+
64
+ with st.expander("🧪 Experimental Features", expanded=False):
65
+ st.caption("API token can be obtained at https://aistudio.google.com/.")
66
+ gemini_api = st.text_input("Gemini Token", "", type='password')
67
+ try:
68
+ genai.configure(api_key=gemini_api)
69
+ ai_model = genai.GenerativeModel("gemini-1.5-flash")
70
+ test = ai_model.generate_content("Explain how AI works")
71
+ st.success("API key is valid. Experimental feature access granted.")
72
+ except Exception as e:
73
+ st.error("API key is invalid. You don't have access to experimental features.")
74
+
75
+
76
+ # ~~~~ Column 2 ~~~~
77
+ # ~~~~Display the project information ~~~~
78
+ with st.sidebar:
79
+ st.caption("MIT License © 2025 Khor Kean Teng, Ng Jing Wen, Lim Sze Chie, Tan Yee Thong, Yee See Marn")
80
+
81
+ model = load_model('model/model.pkl')
82
+
83
+ # Do data transformation here
84
+ monthly_income = (monthly_income - 1000)/(20000 - 1000)
85
+ daily_rate = (daily_rate - 100)/(1500 - 100)
86
+ hourly_rate = (hourly_rate - 5)/(100 - 5)
87
+ marital_status = 1 if marital_status == "Divorced" else 2 if marital_status == "Married" else 3
88
+ overtime = 1 if overtime == "Yes" else 0
89
+
90
+ business_travel = 1
91
+ department = 1
92
+ distance_from_home = 2
93
+ education = 3
94
+ education_field = 1
95
+ gender = 1
96
+ job_role = 1
97
+ monthly_rate = 1
98
+ num_companies_worked = 2
99
+ performance_rating = 3
100
+ relationship_satisfaction = 3
101
+ stock_option_level = 0
102
+ training_times_last_year = 2
103
+ work_life_balance = 3
104
+ years_since_last_promotion = 0
105
+ years_with_curr_manager = 0
106
+
107
+ input_data = pd.DataFrame({
108
+ "Age": [age],
109
+ "BusinessTravel": [business_travel],
110
+ "DailyRate": [daily_rate],
111
+ "Department": [department],
112
+ "DistanceFromHome": [distance_from_home],
113
+ "Education": [education],
114
+ "EducationField": [education_field],
115
+ "EnvironmentSatisfaction": [environment_satisfaction],
116
+ "Gender": [gender],
117
+ "HourlyRate": [hourly_rate],
118
+ "JobInvolvement": [job_involvement],
119
+ "JobRole": [job_role],
120
+ "JobSatisfaction": [job_satisfaction],
121
+ "MaritalStatus": [marital_status],
122
+ "MonthlyIncome": [monthly_income],
123
+ "MonthlyRate": [monthly_rate],
124
+ "NumCompaniesWorked": [num_companies_worked],
125
+ "OverTime": [overtime],
126
+ "PercentSalaryHike": [percent_salary_hike],
127
+ "PerformanceRating": [performance_rating],
128
+ "RelationshipSatisfaction": [relationship_satisfaction],
129
+ "StockOptionLevel": [stock_option_level],
130
+ "TotalWorkingYears": [total_working_years],
131
+ "TrainingTimesLastYear": [training_times_last_year],
132
+ "WorkLifeBalance": [work_life_balance],
133
+ "YearsAtCompany": [years_at_company],
134
+ "YearsInCurrentRole": [years_in_current_role],
135
+ "YearsSinceLastPromotion": [years_since_last_promotion],
136
+ "YearsWithCurrManager": [years_with_curr_manager]
137
+ })
138
+
139
+ # ~~~~ Predict Button ~~~~
140
+ if submit:
141
+ prediction = model.predict(input_data)
142
+ if prediction[0] == 0:
143
+ message = "The employee is not likely to leave the company."
144
+ else:
145
+ message = "The employee is likely to leave the company."
146
+
147
+ with st.status("Predicting...", expanded = True) as status:
148
+ # Get the prediction
149
+ time.sleep(1)
150
+ status.update(
151
+ label = "Prediction Results", state="complete", expanded = True
152
+ )
153
+ # Display the prediction
154
+ if prediction[0] == 0:
155
+ st.error(message)
156
+ else:
157
+ st.success(message)
158
+
159
+ with st.status("AI Opinion", expanded=True):
160
+ try:
161
+ response = ai_model.generate_content(f"Give some opinions in about 100 word based on the prediction results where the employee is {message}")
162
+ st.write(response.text)
163
+ except Exception as e:
164
+ st.write("You don't have access to this feature. Please authenticate to use this feature.")
backend/__init__.py ADDED
File without changes
backend/bot.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def get_data(file_path):
4
+ data = pd.read_csv(file_path)
5
+ return data
backend/model.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import joblib
2
+ import sys
3
+
4
+ def load_model(file_path):
5
+ with open(file_path, 'rb') as f:
6
+ model = joblib.load(f)
7
+ print(f"Model loaded from {file_path}", file=sys.stderr)
8
+ return model
data/processed_data.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/sample_data.csv ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Age,BusinessTravel,DailyRate,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,Gender,HourlyRate,JobInvolvement,JobRole,JobSatisfaction,MaritalStatus,MonthlyIncome,MonthlyRate,NumCompaniesWorked,OverTime,PercentSalaryHike,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
2
+ 18,2,0.884896126713833,2,6,2,1,2,2,79.0638746727023,3.67096695622934,4,1,1,0.0,0.245421584941066,0,0,22.7837960009114,4,4,1,6,0,1,0,2,0,0
3
+ 35,3,0.528299633583058,1,10,2,3,2,1,44.462381645364,4.83823641694839,1,3,1,0.354165796688055,0.521718940362603,0,0,10.7537434714825,3,4,1,8,0,3,9,8,0,4
4
+ 46,3,0.225415889690312,2,1,2,2,2,1,31.8216855199669,2.21343541755574,9,2,1,0.261410600966966,0.824330446815363,4,0,13.092579722152,3,3,2,8,6,1,7,5,4,4
5
+ 26,2,0.300147564337888,1,1,2,3,3,2,82.7363207740769,2.99713469482815,9,4,3,0.156042789492239,0.0953940534594883,1,0,11.0152615045712,3,3,1,11,0,1,7,1,1,4
6
+ 40,3,0.387151473602023,3,5,3,2,1,1,85.7357395806325,1.94312035578636,8,1,1,0.161743663345642,0.0,0,0,13.7655367802936,3,3,1,12,2,3,0,7,0,4
7
+ 18,2,0.292969687020957,1,14,3,1,3,1,51.0496193264179,3.13436603524644,3,2,3,0.0,0.946398296355741,2,0,14.0517770562183,3,3,1,0,3,1,0,1,0,0
8
+ 39,3,0.247270982549977,3,1,3,5,2,1,30.0,2.79237143648608,7,2,3,0.390047682966701,0.0,1,0,23.4496029983759,4,1,1,15,0,1,16,5,5,0
9
+ 42,2,0.787214143098539,1,13,2,1,1,1,95.4470929054933,2.42684909943778,3,1,2,0.0,0.187015556131792,4,0,3.47247565458359,3,1,1,7,2,1,14,2,0,4
10
+ 39,2,0.360353272174383,3,24,4,4,2,1,84.7859793942981,3.3814830915685,5,1,1,0.576933175065172,0.844341727191312,2,0,15.193892585657,3,3,1,6,2,1,20,3,1,0
11
+ 41,2,0.318283622419431,2,30,3,2,1,1,98.2748549218772,2.81126043226691,3,4,1,0.290980421971081,0.725236083577489,1,0,6.64013154341647,3,4,1,6,1,2,11,8,4,6
12
+ 27,1,0.673707321188431,1,16,2,5,4,1,70.1303647171047,2.91339726786792,8,2,1,0.51164053240056,0.0,5,0,16.2257498300301,3,4,2,6,1,1,8,8,1,9
13
+ 44,3,0.0,2,10,3,4,2,1,61.8812825977229,3.43351682699186,2,4,1,0.0,0.383693356002619,0,0,13.308798149351,3,1,1,15,3,2,2,0,3,0
14
+ 20,1,0.577365171755751,1,1,3,2,1,2,63.1887682855909,2.30938877125004,8,3,1,0.0,0.0,1,0,10.9347322984555,3,1,1,3,4,1,0,0,0,0
15
+ 32,3,0.477765221569139,2,20,3,3,3,2,50.7958051233783,3.71673211590062,9,4,3,0.213189245431854,0.0121357517148391,1,0,19.058285037905,3,4,1,1,2,2,6,2,5,1
16
+ 30,3,0.275420197043712,2,4,1,3,1,1,36.0995676794035,1.92814957752323,6,4,1,0.274737404991838,0.316514367598619,4,0,15.4375055694529,3,1,1,2,2,2,1,1,0,8
17
+ 40,1,0.108375677644769,1,16,1,3,3,1,100.0,2.82942953155278,7,2,1,0.673287394667207,0.328312344263964,5,0,12.9250698257048,3,2,3,15,2,3,9,0,0,2
18
+ 32,1,0.420267107662114,3,17,4,5,3,1,48.5248053925291,4.00327292025545,9,1,2,0.512551434407615,0.046466061825807,0,0,8.93587059743023,3,2,1,1,2,2,3,5,10,10
19
+ 60,2,0.987539186490136,1,1,1,1,3,1,57.1100774153506,3.04215376140908,3,1,1,0.56241860397887,0.784536722141436,4,0,18.5114978949955,3,1,1,21,1,1,5,1,2,4
20
+ 31,2,0.397234116968643,3,11,3,2,1,1,56.690794804316,3.73321798999671,7,1,3,0.60864556197485,0.601344112666162,0,0,17.4563649716418,3,1,1,7,4,3,0,0,0,2
21
+ 34,2,0.631252327121418,2,1,2,3,2,1,61.2714432624009,4.06239428666384,3,3,2,0.0894032560941022,0.272017374963247,1,0,25.2964724360263,3,2,1,19,2,3,12,11,0,6
22
+ 32,3,0.583080060423501,2,6,3,3,4,1,38.3009778493344,2.58678373810886,7,4,3,0.141481564463505,0.266929456859619,2,0,17.1257055277748,3,1,1,19,0,2,4,9,6,7
23
+ 60,2,0.60886256413886,2,8,3,3,2,1,100.0,3.44946119445524,5,1,1,0.0,0.361824377958698,4,1,20.8668249436963,3,1,3,15,1,1,4,1,0,0
24
+ 43,3,1.0,1,5,2,3,2,1,69.8285383801696,0.144217082745642,3,1,1,0.0159625917405128,0.501361456095993,5,0,11.5691420209477,3,1,1,8,2,1,12,0,0,3
25
+ 32,3,0.283429576600065,1,15,3,4,4,2,52.9136230986694,0.465063230277119,1,2,2,0.0,0.0,0,0,18.5161622315807,3,4,2,0,1,3,7,0,0,3
26
+ 41,1,1.0,1,2,4,4,3,2,31.1532111448524,2.92135005637192,9,2,2,0.227615827551701,0.539244222417047,4,1,16.3499812754728,3,3,1,0,2,2,0,0,0,0
27
+ 52,1,0.101237935874057,1,13,1,4,2,1,30.0,1.31146351394951,1,1,1,0.504183987510003,1.0,3,0,14.8607847204962,3,4,1,33,1,3,40,14,15,0
28
+ 23,3,0.102684704723007,2,1,3,4,1,1,30.0,2.96416504673855,2,3,2,0.556808230844202,0.632677675411816,0,0,22.6700902921069,4,3,1,8,5,3,6,7,5,7
29
+ 25,3,0.491411456966071,3,1,2,3,3,2,41.9430711663668,0.24382800536667,7,1,3,0.292643444071101,0.520450903013562,4,0,16.3645194480684,3,2,1,13,4,3,0,1,0,0
30
+ 41,2,1.0,1,28,3,1,1,1,99.5163725714313,2.46998261191354,8,1,3,0.308352455339601,0.791677085623735,0,0,19.1087816462321,4,1,1,17,1,2,2,4,0,4
31
+ 47,3,1.0,1,8,3,1,4,1,100.0,1.93007107949463,4,2,1,1.0,0.808357336353662,0,0,8.05348716425844,3,3,1,29,4,2,14,7,12,10
32
+ 31,3,0.538726517040128,2,11,2,5,1,1,63.8730474293125,3.40768674158798,1,1,2,0.0304966971320197,0.114538667573307,3,0,25.9631396946108,3,3,1,11,1,4,0,1,0,2
33
+ 48,2,0.0,3,1,2,3,1,2,64.5341473958719,2.2905912521993,6,4,2,0.219366025848428,0.772960667494781,6,0,17.7292168228217,3,2,1,8,2,1,11,3,8,3
34
+ 34,3,0.139834906667606,1,18,2,1,2,1,54.0020408294524,1.27054952647828,5,3,1,0.506594986970618,0.737462667728527,7,0,13.7890171740499,3,1,1,14,0,2,5,0,0,5
35
+ 40,2,0.459288876683091,1,1,3,2,1,1,31.5010433871168,1.26774037991838,5,4,2,0.0478860782391292,0.665789974132911,0,0,7.74524304113271,3,1,1,19,4,3,18,15,15,5
36
+ 40,1,0.733826685803661,1,17,2,4,1,2,75.7008577598748,3.04269222471344,3,1,1,0.934884523021577,0.394679557552056,6,0,14.3137991644419,3,4,2,12,3,2,12,7,12,6
37
+ 51,3,1.0,1,2,4,3,3,1,53.7435032675141,2.9794360959042,4,3,1,0.927690541233592,0.585487118852776,1,0,12.6149477519526,3,3,1,22,4,3,6,4,5,2
38
+ 41,1,0.299939404080079,1,5,2,1,3,1,38.2363792591174,3.35728228528459,3,2,1,0.390666807436103,0.982111229858157,0,0,5.82668934597605,3,2,1,19,4,2,14,8,2,11
39
+ 40,3,0.347830860312821,2,25,3,1,2,1,100.0,3.44844464252123,4,1,2,0.410669433949105,0.285220975922328,0,1,18.8378149985021,4,4,1,10,0,1,7,0,1,0
40
+ 25,2,0.122405696923293,2,26,1,2,4,1,100.0,2.24115423983947,1,1,2,0.311746154220245,0.323694753892343,0,0,16.7614404831178,3,1,2,5,1,1,0,0,0,0
41
+ 30,1,0.0,3,16,4,6,1,1,53.5548830320373,3.36337156776314,9,1,3,0.447219301876704,0.691174250033538,8,0,13.2182391923076,3,2,1,10,4,1,0,6,1,0
42
+ 22,2,0.245186418295391,1,16,4,1,3,1,71.2252921762933,1.86915070056365,3,3,3,0.310870109100547,0.263397961192169,3,0,12.508787466875,3,4,1,0,3,1,6,3,3,5
43
+ 39,3,1.0,3,1,2,2,1,1,30.0,2.50267653614494,1,1,2,0.124353700607106,0.138761602977062,5,0,20.1699344610357,3,1,1,9,1,4,0,4,0,3
44
+ 55,1,0.457494732134314,2,8,2,2,2,1,39.9519944162056,3.2186290108445,3,3,3,0.0,0.809749024984892,8,0,8.41488603710065,3,3,1,20,4,2,0,2,0,0
45
+ 27,3,0.0683477075858986,1,1,1,2,1,2,87.1954841532308,2.55611347216872,5,4,1,0.470759327649574,1.0,0,0,15.1087293762466,3,2,1,4,2,3,14,4,0,2
46
+ 26,2,0.73948976814775,2,5,3,4,2,2,33.2284847405531,2.4796973838769,6,2,1,0.230683283081594,0.504880583632711,0,0,17.0764735976111,3,3,1,14,1,2,0,0,0,4
47
+ 29,3,0.0354388603265388,3,28,1,1,2,1,72.4159982559238,4.64794277009462,8,3,2,0.148891114835579,0.313020820976981,6,1,12.0672870246404,3,4,1,17,4,3,0,3,2,2
48
+ 19,3,0.991076195418594,3,21,2,4,2,2,75.2957551794003,3.44356466556024,9,4,3,0.0,0.753080589338407,1,0,10.9077198221586,3,2,1,6,3,2,0,0,0,0
49
+ 45,3,0.698863171850152,3,6,1,1,1,1,78.4980587048639,3.53464492500628,9,4,1,0.609884984478493,0.210101592914315,6,0,12.4065335784653,3,3,1,19,2,1,13,11,1,9
50
+ 29,2,0.666588067523572,2,6,1,2,4,1,54.6325888225537,3.8593961787976,5,3,3,0.109258449018128,0.721956790831691,0,0,17.1969208088674,3,1,1,11,0,2,6,0,1,2
51
+ 29,2,0.296397784083832,1,1,3,4,4,1,66.8239432425046,2.27399341963712,5,4,2,0.0586137895938363,0.177570657414424,0,0,12.9746107406301,3,3,1,2,1,1,4,15,6,9
52
+ 30,2,0.305896614627132,2,17,1,2,4,1,30.0,1.90490455758757,6,4,1,0.0651803014460173,0.0280282360137817,0,0,14.673909416404,3,4,1,16,3,1,7,0,5,0
53
+ 48,1,1.0,2,1,4,1,1,1,57.6621380496683,2.41613139942679,9,1,1,1.0,0.45071568192453,5,1,19.9419474090085,3,2,1,29,2,1,16,2,0,9
54
+ 24,3,0.549572887781486,3,11,2,4,1,1,56.6395204758942,1.17522926861091,7,4,1,0.166056841559839,0.618516627740679,7,0,16.1571036955847,3,4,1,7,4,1,0,0,0,3
55
+ 29,1,0.64423923596827,2,3,1,2,2,1,65.187684331889,2.97869476126021,4,1,1,0.0,0.931236025796003,5,0,11.8244446312614,3,4,3,0,3,3,4,0,0,0
56
+ 35,1,0.0497980808132505,2,1,3,2,2,1,81.0442037386244,3.40818216001324,7,1,3,0.342044656675025,1.0,0,0,13.4796891124763,3,1,1,10,2,2,1,2,5,0
57
+ 20,3,0.217032971741176,3,17,1,1,2,1,84.2319305900174,2.04535070025609,9,1,1,0.092220413281171,0.624169845063327,3,0,10.4384078905024,3,1,1,6,0,3,0,2,0,0
58
+ 45,2,0.628001563945607,1,1,1,3,1,1,56.631294242497,1.39765248312659,3,4,1,0.0468385069932922,0.175953189712635,5,0,8.94177165598518,3,1,3,9,1,3,0,5,0,0
59
+ 18,3,0.809748808057304,1,15,1,5,2,1,100.0,1.87820726361762,4,3,3,0.188433671375755,0.477661776734141,0,0,14.7236868239689,3,1,1,14,2,2,5,3,0,3
60
+ 57,3,0.50242134536307,2,26,2,2,1,2,73.2182912787534,1.31611544273862,3,3,1,0.687797758716077,1.0,4,0,16.8875858430876,3,3,1,17,3,1,2,0,0,0
61
+ 45,3,0.289683870986498,3,1,1,3,1,2,90.2975851774595,2.66088514427624,8,4,2,0.0,0.121820636964127,1,0,12.6582388222199,3,2,1,6,0,2,6,3,0,8
62
+ 34,3,0.848964499186754,1,1,4,1,4,1,100.0,4.01727491210708,5,3,2,0.386490117426162,0.657500820389643,3,0,13.508891426895,3,1,1,18,2,1,5,2,3,2
63
+ 29,1,0.350201779529346,2,3,1,2,4,1,86.126543361023,3.58486432511929,6,3,1,0.109813581837882,0.69446148475871,0,0,15.3044579048155,3,1,1,5,2,3,2,0,0,0
64
+ 31,1,0.138291373005619,2,3,3,5,1,1,91.2425090523438,2.44173960411085,4,2,1,0.406516546539312,0.977431858004463,3,0,17.5252083440194,3,2,1,7,6,3,10,11,9,1
65
+ 27,2,0.184926211218149,3,5,1,2,3,1,69.2413466445254,2.13140231345731,4,3,3,0.401596815933898,0.991180483799566,9,0,9.8701671297013,3,1,1,2,2,3,0,2,0,1
66
+ 41,1,0.301660900545997,2,5,1,4,1,1,69.1251020961862,3.303702721918,1,3,2,0.170758897572894,0.926567099081853,1,1,15.3486479229393,3,1,1,13,0,2,13,5,5,6
67
+ 53,2,0.127929389514159,2,1,1,4,1,1,30.0,3.64056139246181,7,3,3,0.0350563706070154,0.932185707564198,6,0,19.9410388749096,3,3,1,5,2,1,8,6,4,5
68
+ 32,2,0.537218139779456,2,12,1,2,2,2,30.2121676237593,1.02389538327548,5,3,2,0.0003468097307804,1.0,3,0,16.3527029513081,3,3,3,5,0,3,1,3,0,5
69
+ 51,3,0.420620019752982,1,5,4,1,1,1,41.1665536426107,4.16841998425722,6,1,3,0.718600021277349,1.0,9,0,12.2907775208906,3,3,1,39,3,2,35,17,7,4
70
+ 36,1,0.000539911682434,3,7,1,3,3,1,43.1831637554264,0.856880800663716,6,3,1,0.0,0.280222813691602,0,0,13.9889177591685,3,4,1,2,2,2,5,0,1,0
71
+ 55,3,0.612000043966685,1,5,2,2,3,1,79.1880042545104,2.32890009430343,1,1,1,0.277643535334797,0.057370855046233,8,0,24.5947889701218,4,4,1,17,2,3,0,5,0,0
72
+ 60,2,0.264359784514337,2,1,4,1,3,1,44.2619392221251,3.49485914568419,4,1,3,1.0,1.0,9,0,9.73156887226829,3,4,1,39,0,2,30,16,13,7
73
+ 18,1,0.1864210027337,2,1,3,1,2,1,36.3641373587089,1.53364965745007,6,3,2,0.313299665299516,0.724344242987018,0,1,11.3117576847644,3,3,1,0,2,3,6,7,0,5
74
+ 35,1,0.149976915447758,2,4,3,1,4,1,30.0,3.45596879546415,3,4,2,0.0,0.109758123967461,1,0,23.5388294679342,3,1,1,9,4,3,6,4,0,6
75
+ 24,3,0.0,1,16,1,6,1,1,88.7380368066718,1.68120875556832,1,2,2,0.329572045338643,0.463364701591192,2,0,13.0980864479692,3,1,1,0,2,4,0,0,0,0
76
+ 36,2,0.785280352606463,2,1,1,1,4,1,67.9733720965838,2.03641662262983,2,1,2,0.161104494928546,0.286160743077737,5,0,18.6238434791828,3,4,1,18,4,3,9,13,5,7
77
+ 56,1,0.678837697167519,2,7,4,4,1,1,100.0,3.9674499156043,2,3,1,0.956517844734314,1.0,0,0,23.7956385496542,4,1,1,35,1,2,16,7,2,12
78
+ 37,2,0.0448115878220008,1,1,4,3,1,2,69.2190755146291,1.96632893104638,6,3,3,0.0,0.601621016986509,2,1,16.2632826546757,3,2,1,7,2,4,1,6,2,0
79
+ 50,2,0.68223104952878,3,11,2,3,2,2,78.2108011094033,0.929492133143435,8,3,1,0.193048805427952,0.815493553880714,4,0,18.678728566695,3,4,2,0,1,1,2,1,2,7
80
+ 38,2,0.188203692922371,2,1,1,3,1,1,46.5605404327037,3.66448881961734,8,1,2,0.366177878589499,0.217528004179393,9,0,13.9243538955,3,4,1,7,3,4,14,7,9,8
81
+ 20,1,0.0,1,2,4,1,3,1,64.5251665412344,1.76556005965621,3,3,1,0.311859436922276,0.261422565519306,2,0,17.776323495146,3,1,1,0,0,2,0,0,9,1
82
+ 34,2,0.873721197014631,1,1,2,2,3,1,52.2047593206339,2.570714165024,5,4,2,0.0,0.911560641788863,5,0,20.1237936782554,3,1,2,7,1,1,4,0,3,6
83
+ 30,3,0.668962504357545,2,1,1,1,3,1,53.2983550932715,2.65479448412058,4,1,1,0.044336263162694,0.310761644954177,2,1,10.3410029768022,3,3,1,0,0,3,0,1,0,0
84
+ 25,2,0.111599733309324,2,27,5,6,2,1,94.3932821309932,1.01142455202132,6,1,2,0.0588144620147767,0.470976435692039,2,0,12.4208996400604,3,1,1,2,1,4,4,2,0,1
85
+ 36,2,0.410798507963005,2,16,2,3,2,1,69.0075924589165,1.09835353624233,5,3,2,0.0411839918915098,0.858893237497417,1,0,11.601284079293,3,1,1,10,0,3,7,3,1,0
86
+ 33,2,0.242771746453078,1,27,5,4,1,1,84.9500319582508,2.2432390572045,4,2,1,0.0,0.618578424455441,6,0,11.8760849400806,3,2,1,0,0,4,0,3,0,1
87
+ 32,3,0.946139773872123,3,1,1,2,2,1,100.0,-0.336825713563831,3,2,3,0.78903958519151,0.0441426774173903,6,1,11.5019593445149,3,1,1,22,2,2,25,6,6,11
88
+ 38,2,0.522810206298461,3,10,4,2,1,2,74.3198915239166,2.53817696550786,5,1,1,0.526351935083306,0.534526160221444,0,0,15.5567377220441,3,1,1,25,2,2,0,0,0,3
89
+ 18,2,0.371064218713859,1,1,2,6,1,2,71.8537357448658,1.96133220572076,1,2,2,0.13784008421834,0.906992789835979,2,0,9.22344448485286,3,2,1,6,2,4,0,2,0,0
90
+ 25,1,0.0657451757033434,2,2,1,2,2,1,55.8465917211613,2.19250413755175,6,1,3,0.0,0.212225378389616,0,1,21.8426378390144,3,1,1,1,0,3,0,0,0,1
91
+ 42,2,0.661593340762081,2,4,2,1,1,1,69.5572052280453,3.6703326924946,1,4,1,0.180865366477182,0.567356920305377,0,0,17.3035019856311,3,3,1,14,3,2,20,2,0,6
92
+ 58,3,0.344378940901919,2,17,1,3,4,2,82.0612589389906,1.65624537577875,7,3,3,0.349772749037473,0.43878636124656,0,0,10.5182540387197,3,3,1,7,4,2,13,8,0,8
93
+ 26,2,0.294054755831915,1,1,2,6,2,2,100.0,3.13016282768429,5,1,2,0.131897323187452,0.741690258031523,0,0,13.0864158725073,3,3,1,10,1,3,2,4,0,8
94
+ 53,3,0.327562997022995,1,1,2,2,1,2,70.3339039309627,1.48484842567305,5,3,2,0.39913440519942,0.808863366009971,8,1,10.2361495536742,3,4,1,21,2,3,0,1,0,7
95
+ 51,3,0.486897160786256,3,5,1,5,2,1,30.1517064036679,3.60970410790475,7,4,2,0.0,1.0,4,0,13.3261020569129,3,3,1,8,3,3,5,10,9,5
96
+ 24,2,0.0,2,17,2,5,1,1,89.0549816298444,4.07250236820328,9,2,2,0.390311119543518,0.773158909831957,7,0,15.8333971478341,3,1,1,9,0,2,0,1,0,5
97
+ 37,2,1.0,1,19,1,4,2,1,37.5283621503371,3.17505887239783,4,2,1,0.777472128406974,1.0,6,1,14.6097655989193,3,3,1,18,2,4,4,5,3,9
98
+ 27,2,0.638254653640545,3,3,2,3,2,1,61.4959253207427,3.5292024815491,6,2,2,0.0,0.128339818239367,0,1,13.6956921640298,3,2,1,0,1,3,3,1,1,0
99
+ 37,1,0.970191770062497,2,17,4,4,1,1,37.7331286954416,3.36149064009386,8,3,3,0.164442325636724,0.338528130628634,3,0,24.8483016116964,4,1,1,7,0,3,11,15,8,8
100
+ 31,3,0.794106942470392,2,7,3,3,4,1,47.1348275942282,2.44470116605974,6,3,3,0.0775067148711385,0.383947622952954,2,0,13.940247858847,3,1,1,1,2,3,2,0,0,0
101
+ 32,3,0.0872029008877171,1,18,3,4,1,1,100.0,2.95470712013349,5,1,1,0.359983918742085,0.0,0,0,11.5807832892493,3,2,1,9,2,4,10,8,5,3
draft.ipynb ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "## Data Deployment Draft"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": 1,
13
+ "metadata": {},
14
+ "outputs": [],
15
+ "source": [
16
+ "# import libraries\n",
17
+ "import pandas as pd"
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "code",
22
+ "execution_count": 2,
23
+ "metadata": {},
24
+ "outputs": [
25
+ {
26
+ "data": {
27
+ "text/html": [
28
+ "<div>\n",
29
+ "<style scoped>\n",
30
+ " .dataframe tbody tr th:only-of-type {\n",
31
+ " vertical-align: middle;\n",
32
+ " }\n",
33
+ "\n",
34
+ " .dataframe tbody tr th {\n",
35
+ " vertical-align: top;\n",
36
+ " }\n",
37
+ "\n",
38
+ " .dataframe thead th {\n",
39
+ " text-align: right;\n",
40
+ " }\n",
41
+ "</style>\n",
42
+ "<table border=\"1\" class=\"dataframe\">\n",
43
+ " <thead>\n",
44
+ " <tr style=\"text-align: right;\">\n",
45
+ " <th></th>\n",
46
+ " <th>Age</th>\n",
47
+ " <th>Attrition</th>\n",
48
+ " <th>BusinessTravel</th>\n",
49
+ " <th>DailyRate</th>\n",
50
+ " <th>Department</th>\n",
51
+ " <th>DistanceFromHome</th>\n",
52
+ " <th>Education</th>\n",
53
+ " <th>EducationField</th>\n",
54
+ " <th>EnvironmentSatisfaction</th>\n",
55
+ " <th>Gender</th>\n",
56
+ " <th>...</th>\n",
57
+ " <th>PerformanceRating</th>\n",
58
+ " <th>RelationshipSatisfaction</th>\n",
59
+ " <th>StockOptionLevel</th>\n",
60
+ " <th>TotalWorkingYears</th>\n",
61
+ " <th>TrainingTimesLastYear</th>\n",
62
+ " <th>WorkLifeBalance</th>\n",
63
+ " <th>YearsAtCompany</th>\n",
64
+ " <th>YearsInCurrentRole</th>\n",
65
+ " <th>YearsSinceLastPromotion</th>\n",
66
+ " <th>YearsWithCurrManager</th>\n",
67
+ " </tr>\n",
68
+ " </thead>\n",
69
+ " <tbody>\n",
70
+ " <tr>\n",
71
+ " <th>0</th>\n",
72
+ " <td>39</td>\n",
73
+ " <td>0</td>\n",
74
+ " <td>3</td>\n",
75
+ " <td>0.318958</td>\n",
76
+ " <td>2</td>\n",
77
+ " <td>3</td>\n",
78
+ " <td>1</td>\n",
79
+ " <td>5</td>\n",
80
+ " <td>2</td>\n",
81
+ " <td>1</td>\n",
82
+ " <td>...</td>\n",
83
+ " <td>3</td>\n",
84
+ " <td>2</td>\n",
85
+ " <td>1</td>\n",
86
+ " <td>7</td>\n",
87
+ " <td>3</td>\n",
88
+ " <td>2</td>\n",
89
+ " <td>16</td>\n",
90
+ " <td>11</td>\n",
91
+ " <td>0</td>\n",
92
+ " <td>9</td>\n",
93
+ " </tr>\n",
94
+ " <tr>\n",
95
+ " <th>1</th>\n",
96
+ " <td>29</td>\n",
97
+ " <td>0</td>\n",
98
+ " <td>2</td>\n",
99
+ " <td>0.845930</td>\n",
100
+ " <td>1</td>\n",
101
+ " <td>21</td>\n",
102
+ " <td>3</td>\n",
103
+ " <td>3</td>\n",
104
+ " <td>2</td>\n",
105
+ " <td>1</td>\n",
106
+ " <td>...</td>\n",
107
+ " <td>3</td>\n",
108
+ " <td>3</td>\n",
109
+ " <td>1</td>\n",
110
+ " <td>8</td>\n",
111
+ " <td>3</td>\n",
112
+ " <td>2</td>\n",
113
+ " <td>2</td>\n",
114
+ " <td>5</td>\n",
115
+ " <td>3</td>\n",
116
+ " <td>5</td>\n",
117
+ " </tr>\n",
118
+ " <tr>\n",
119
+ " <th>2</th>\n",
120
+ " <td>40</td>\n",
121
+ " <td>0</td>\n",
122
+ " <td>2</td>\n",
123
+ " <td>0.153782</td>\n",
124
+ " <td>2</td>\n",
125
+ " <td>1</td>\n",
126
+ " <td>1</td>\n",
127
+ " <td>5</td>\n",
128
+ " <td>2</td>\n",
129
+ " <td>1</td>\n",
130
+ " <td>...</td>\n",
131
+ " <td>3</td>\n",
132
+ " <td>1</td>\n",
133
+ " <td>1</td>\n",
134
+ " <td>24</td>\n",
135
+ " <td>2</td>\n",
136
+ " <td>2</td>\n",
137
+ " <td>0</td>\n",
138
+ " <td>0</td>\n",
139
+ " <td>0</td>\n",
140
+ " <td>0</td>\n",
141
+ " </tr>\n",
142
+ " <tr>\n",
143
+ " <th>3</th>\n",
144
+ " <td>24</td>\n",
145
+ " <td>0</td>\n",
146
+ " <td>2</td>\n",
147
+ " <td>0.785534</td>\n",
148
+ " <td>1</td>\n",
149
+ " <td>6</td>\n",
150
+ " <td>3</td>\n",
151
+ " <td>1</td>\n",
152
+ " <td>4</td>\n",
153
+ " <td>2</td>\n",
154
+ " <td>...</td>\n",
155
+ " <td>3</td>\n",
156
+ " <td>3</td>\n",
157
+ " <td>1</td>\n",
158
+ " <td>4</td>\n",
159
+ " <td>0</td>\n",
160
+ " <td>2</td>\n",
161
+ " <td>7</td>\n",
162
+ " <td>8</td>\n",
163
+ " <td>0</td>\n",
164
+ " <td>6</td>\n",
165
+ " </tr>\n",
166
+ " <tr>\n",
167
+ " <th>4</th>\n",
168
+ " <td>44</td>\n",
169
+ " <td>0</td>\n",
170
+ " <td>2</td>\n",
171
+ " <td>1.000000</td>\n",
172
+ " <td>1</td>\n",
173
+ " <td>5</td>\n",
174
+ " <td>5</td>\n",
175
+ " <td>5</td>\n",
176
+ " <td>1</td>\n",
177
+ " <td>1</td>\n",
178
+ " <td>...</td>\n",
179
+ " <td>3</td>\n",
180
+ " <td>4</td>\n",
181
+ " <td>1</td>\n",
182
+ " <td>0</td>\n",
183
+ " <td>2</td>\n",
184
+ " <td>3</td>\n",
185
+ " <td>0</td>\n",
186
+ " <td>5</td>\n",
187
+ " <td>1</td>\n",
188
+ " <td>2</td>\n",
189
+ " </tr>\n",
190
+ " </tbody>\n",
191
+ "</table>\n",
192
+ "<p>5 rows × 30 columns</p>\n",
193
+ "</div>"
194
+ ],
195
+ "text/plain": [
196
+ " Age Attrition BusinessTravel DailyRate Department DistanceFromHome \\\n",
197
+ "0 39 0 3 0.318958 2 3 \n",
198
+ "1 29 0 2 0.845930 1 21 \n",
199
+ "2 40 0 2 0.153782 2 1 \n",
200
+ "3 24 0 2 0.785534 1 6 \n",
201
+ "4 44 0 2 1.000000 1 5 \n",
202
+ "\n",
203
+ " Education EducationField EnvironmentSatisfaction Gender ... \\\n",
204
+ "0 1 5 2 1 ... \n",
205
+ "1 3 3 2 1 ... \n",
206
+ "2 1 5 2 1 ... \n",
207
+ "3 3 1 4 2 ... \n",
208
+ "4 5 5 1 1 ... \n",
209
+ "\n",
210
+ " PerformanceRating RelationshipSatisfaction StockOptionLevel \\\n",
211
+ "0 3 2 1 \n",
212
+ "1 3 3 1 \n",
213
+ "2 3 1 1 \n",
214
+ "3 3 3 1 \n",
215
+ "4 3 4 1 \n",
216
+ "\n",
217
+ " TotalWorkingYears TrainingTimesLastYear WorkLifeBalance YearsAtCompany \\\n",
218
+ "0 7 3 2 16 \n",
219
+ "1 8 3 2 2 \n",
220
+ "2 24 2 2 0 \n",
221
+ "3 4 0 2 7 \n",
222
+ "4 0 2 3 0 \n",
223
+ "\n",
224
+ " YearsInCurrentRole YearsSinceLastPromotion YearsWithCurrManager \n",
225
+ "0 11 0 9 \n",
226
+ "1 5 3 5 \n",
227
+ "2 0 0 0 \n",
228
+ "3 8 0 6 \n",
229
+ "4 5 1 2 \n",
230
+ "\n",
231
+ "[5 rows x 30 columns]"
232
+ ]
233
+ },
234
+ "execution_count": 2,
235
+ "metadata": {},
236
+ "output_type": "execute_result"
237
+ }
238
+ ],
239
+ "source": [
240
+ "# load the data\n",
241
+ "data = pd.read_csv('data/processed_data.csv')\n",
242
+ "\n",
243
+ "# preview the data\n",
244
+ "data.head()"
245
+ ]
246
+ },
247
+ {
248
+ "cell_type": "code",
249
+ "execution_count": 3,
250
+ "metadata": {},
251
+ "outputs": [],
252
+ "source": [
253
+ "# sampling data\n",
254
+ "# sample 10% of the data and save as sample_data.csv\n",
255
+ "sample_data = data.sample(frac=0.1, random_state=1)\n",
256
+ "\n",
257
+ "# remove the Attrition column\n",
258
+ "sample_data = sample_data.drop(columns='Attrition')\n",
259
+ "sample_data.to_csv('data/sample_data.csv', index=False)"
260
+ ]
261
+ },
262
+ {
263
+ "cell_type": "code",
264
+ "execution_count": 100,
265
+ "metadata": {},
266
+ "outputs": [
267
+ {
268
+ "data": {
269
+ "text/html": [
270
+ "<style>#sk-container-id-25 {color: black;}#sk-container-id-25 pre{padding: 0;}#sk-container-id-25 div.sk-toggleable {background-color: white;}#sk-container-id-25 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-25 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-25 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-25 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-25 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-25 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-25 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-25 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-25 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-25 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-25 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-25 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-25 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-25 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-25 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-25 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-25 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-25 div.sk-item {position: relative;z-index: 1;}#sk-container-id-25 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-25 div.sk-item::before, #sk-container-id-25 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-25 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-25 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-25 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-25 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-25 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-25 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-25 div.sk-label-container {text-align: center;}#sk-container-id-25 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-25 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-25\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
271
+ " colsample_bylevel=None, colsample_bynode=None,\n",
272
+ " colsample_bytree=None, early_stopping_rounds=None,\n",
273
+ " enable_categorical=False, eval_metric=None, feature_types=None,\n",
274
+ " gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
275
+ " interaction_constraints=None, learning_rate=None, max_bin=None,\n",
276
+ " max_cat_threshold=None, max_cat_to_onehot=None,\n",
277
+ " max_delta_step=None, max_depth=4, max_leaves=None,\n",
278
+ " min_child_weight=None, missing=nan, monotone_constraints=None,\n",
279
+ " n_estimators=250, n_jobs=None, num_parallel_tree=None,\n",
280
+ " predictor=None, random_state=None, ...)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-25\" type=\"checkbox\" checked><label for=\"sk-estimator-id-25\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">XGBClassifier</label><div class=\"sk-toggleable__content\"><pre>XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
281
+ " colsample_bylevel=None, colsample_bynode=None,\n",
282
+ " colsample_bytree=None, early_stopping_rounds=None,\n",
283
+ " enable_categorical=False, eval_metric=None, feature_types=None,\n",
284
+ " gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
285
+ " interaction_constraints=None, learning_rate=None, max_bin=None,\n",
286
+ " max_cat_threshold=None, max_cat_to_onehot=None,\n",
287
+ " max_delta_step=None, max_depth=4, max_leaves=None,\n",
288
+ " min_child_weight=None, missing=nan, monotone_constraints=None,\n",
289
+ " n_estimators=250, n_jobs=None, num_parallel_tree=None,\n",
290
+ " predictor=None, random_state=None, ...)</pre></div></div></div></div></div>"
291
+ ],
292
+ "text/plain": [
293
+ "XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
294
+ " colsample_bylevel=None, colsample_bynode=None,\n",
295
+ " colsample_bytree=None, early_stopping_rounds=None,\n",
296
+ " enable_categorical=False, eval_metric=None, feature_types=None,\n",
297
+ " gamma=None, gpu_id=None, grow_policy=None, importance_type=None,\n",
298
+ " interaction_constraints=None, learning_rate=None, max_bin=None,\n",
299
+ " max_cat_threshold=None, max_cat_to_onehot=None,\n",
300
+ " max_delta_step=None, max_depth=4, max_leaves=None,\n",
301
+ " min_child_weight=None, missing=nan, monotone_constraints=None,\n",
302
+ " n_estimators=250, n_jobs=None, num_parallel_tree=None,\n",
303
+ " predictor=None, random_state=None, ...)"
304
+ ]
305
+ },
306
+ "execution_count": 100,
307
+ "metadata": {},
308
+ "output_type": "execute_result"
309
+ }
310
+ ],
311
+ "source": [
312
+ "# train a xgboost model\n",
313
+ "from xgboost import XGBClassifier\n",
314
+ "\n",
315
+ "# the target is the Attrition\n",
316
+ "y = data['Attrition']\n",
317
+ "x = data.drop(['Attrition'], axis=1)\n",
318
+ "\n",
319
+ "# train the model\n",
320
+ "model = XGBClassifier()\n",
321
+ "\n",
322
+ "# parameter tuning\n",
323
+ "# from sklearn.model_selection import GridSearchCV\n",
324
+ "\n",
325
+ "# # more in depth search\n",
326
+ "# param_grid = {\n",
327
+ "# 'n_estimators': [100, 200, 300],\n",
328
+ "# 'max_depth': [2, 3, 4],\n",
329
+ "# 'learning_rate': [0.1, 0.01, 0.001]\n",
330
+ "# }\n",
331
+ "\n",
332
+ "# grid_search = GridSearchCV(model, param_grid, cv=10, n_jobs=-1)\n",
333
+ "# grid_search.fit(x_train, y_train)\n",
334
+ "\n",
335
+ "# # best parameters\n",
336
+ "# print(grid_search.best_params_)\n",
337
+ "\n",
338
+ "# train the model with the best parameters\n",
339
+ "model = XGBClassifier(n_estimators=250, max_depth=4)\n",
340
+ "\n",
341
+ "# fit the model\n",
342
+ "model.fit(x, y)"
343
+ ]
344
+ },
345
+ {
346
+ "cell_type": "code",
347
+ "execution_count": 101,
348
+ "metadata": {},
349
+ "outputs": [
350
+ {
351
+ "name": "stdout",
352
+ "output_type": "stream",
353
+ "text": [
354
+ "Accuracy: 1.0\n",
355
+ "Confusion matrix: \n",
356
+ "[[507 0]\n",
357
+ " [ 0 493]]\n"
358
+ ]
359
+ }
360
+ ],
361
+ "source": [
362
+ "# test the model\n",
363
+ "y_pred = model.predict(x)\n",
364
+ "\n",
365
+ "# evaluate the model\n",
366
+ "from sklearn.metrics import accuracy_score\n",
367
+ "accuracy = accuracy_score(y, y_pred)\n",
368
+ "print(f'Accuracy: {accuracy}')\n",
369
+ "\n",
370
+ "# confusion matrix\n",
371
+ "from sklearn.metrics import confusion_matrix\n",
372
+ "conf_matrix = confusion_matrix(y, y_pred)\n",
373
+ "print(f'Confusion matrix: \\n{conf_matrix}')"
374
+ ]
375
+ },
376
+ {
377
+ "cell_type": "code",
378
+ "execution_count": 102,
379
+ "metadata": {},
380
+ "outputs": [
381
+ {
382
+ "data": {
383
+ "text/plain": [
384
+ "['model/model.pkl']"
385
+ ]
386
+ },
387
+ "execution_count": 102,
388
+ "metadata": {},
389
+ "output_type": "execute_result"
390
+ }
391
+ ],
392
+ "source": [
393
+ "# save the model as pkl\n",
394
+ "import joblib\n",
395
+ "joblib.dump(model, 'model/model.pkl')"
396
+ ]
397
+ }
398
+ ],
399
+ "metadata": {
400
+ "kernelspec": {
401
+ "display_name": "Python 3",
402
+ "language": "python",
403
+ "name": "python3"
404
+ },
405
+ "language_info": {
406
+ "codemirror_mode": {
407
+ "name": "ipython",
408
+ "version": 3
409
+ },
410
+ "file_extension": ".py",
411
+ "mimetype": "text/x-python",
412
+ "name": "python",
413
+ "nbconvert_exporter": "python",
414
+ "pygments_lexer": "ipython3",
415
+ "version": "3.11.4"
416
+ }
417
+ },
418
+ "nbformat": 4,
419
+ "nbformat_minor": 2
420
+ }
git.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Get the current commit count
4
+ commit_count=$(git rev-list --count HEAD)
5
+
6
+ # Increment the commit count
7
+ next_commit_count=$((commit_count + 1))
8
+
9
+ # Add all changes
10
+ git add .
11
+
12
+ # Check if a custom message is provided
13
+ if [ -z "$1" ]; then
14
+ commit_message="auto commit #$next_commit_count"
15
+ else
16
+ commit_message="$1 #$next_commit_count"
17
+ fi
18
+
19
+ # Commit with the message
20
+ git commit -m "$commit_message"
21
+
22
+ # Push the changes
23
+ git push
24
+
25
+ # to run ./git_auto.sh "Your custom message"
lib/conversion_ref.xlsx ADDED
Binary file (9.29 kB). View file
 
lib/scaling_ref.xlsx.xlsx ADDED
Binary file (8.69 kB). View file
 
model/model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d619f70804761291b6ba4e20a26ed8781ec5efe2baf74f8c56acaa5a6c2707db
3
+ size 348121
pages/batch-processing.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from backend.bot import *
3
+ from backend.model import *
4
+ import pandas as pd
5
+ import time
6
+ import google.generativeai as genai
7
+
8
+ # Page title
9
+ st.set_page_config(page_title='Batch Processing', layout='wide')
10
+
11
+ st.title("Attrition Prediction Engine")
12
+ st.write("Welcome to the Attrition Prediction Engine! This tool is designed to help you batch process employee data and predict attrition.")
13
+
14
+ with st.sidebar:
15
+ with st.expander("🧪 Experimental Features", expanded=False):
16
+ st.caption("API token can be obtained at https://aistudio.google.com/.")
17
+ gemini_api = st.text_input("Gemini Token", "", type='password')
18
+ try:
19
+ genai.configure(api_key=gemini_api)
20
+ ai_model = genai.GenerativeModel("gemini-1.5-flash")
21
+ test = ai_model.generate_content("Explain how AI works")
22
+ st.success("API key is valid. Experimental feature access granted.")
23
+ except Exception as e:
24
+ st.error("API key is invalid. You don't have access to experimental features.")
25
+
26
+
27
+ with st.expander("⚠️ Disclaimer", expanded=False):
28
+ st.write("This web app is intended for prediction purposes only. The results are based on the input data provided and \
29
+ the performance of the machine learning model. The accuracy of the predictions may vary depending on data quality \
30
+ and model reliability.")
31
+
32
+ st.caption("MIT License © 2025 Khor Kean Teng, Ng Jing Wen, Lim Sze Chie, Tan Yee Thong, Yee See Marn")
33
+
34
+
35
+ # Display assistant response in chat message container
36
+ with st.chat_message("assistant", avatar="https://cdn4.iconfinder.com/data/icons/heroes-villains-vol-2-colored/100/Terminator-512.png"):
37
+ # response = st.write_stream(response_generator())
38
+ response = st.write("Hello admin! I am Az-147. How can I assist you today?")
39
+ st.caption("If you use predefined data, the file upload step will be hidden.")
40
+ toggle = st.toggle('Use Predefined Data', True)
41
+ data= get_data("data/sample_data.csv")
42
+
43
+ if toggle == False:
44
+ uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
45
+ if uploaded_file is not None:
46
+ data = pd.read_csv(uploaded_file)
47
+
48
+ submit = st.button("Execute", type='primary')
49
+
50
+ model = load_model("model/model.pkl")
51
+
52
+ def count_attrition(predictions):
53
+ return sum(predictions)
54
+
55
+ if submit:
56
+ with st.status("Data Preview", expanded=True):
57
+ time.sleep(.5)
58
+ st.write(f"You've uploaded a data file of {data.shape[0]} rows and {data.shape[1]} columns. Here's a preview of the data:")
59
+ st.write(data.head())
60
+
61
+ with st.status("Predicting Attrition...", expanded=True):
62
+ time.sleep(2)
63
+ prediction = model.predict(data)
64
+ data['Attrition'] = prediction
65
+ attrition_count = count_attrition(prediction)
66
+ output = f"Prediction completed! There are {attrition_count} cases of attrition. Here's a preview of the data with the predicted attrition status:"
67
+ st.write(output)
68
+ st.write(data.head())
69
+
70
+ with st.status("AI Opinion", expanded=True):
71
+ try:
72
+ response = ai_model.generate_content(f"Give some opinions in about 100 word based on the prediction results where there are {attrition_count} cases of attrition.")
73
+ st.write(response.text)
74
+ except Exception as e:
75
+ st.write("You don't have access to this feature. Please authenticate to use this feature.")
76
+
pages/documentation.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+
4
+ st.set_page_config(page_title='Documentation', layout='wide')
5
+
6
+ with st.sidebar:
7
+ with st.expander("⚠️ Disclaimer", expanded=True):
8
+ st.write("This web app is intended for prediction purposes only. The results are based on the input data provided and \
9
+ the performance of the machine learning model. The accuracy of the predictions may vary depending on data quality \
10
+ and model reliability.")
11
+
12
+ st.caption("MIT License © 2025 Khor Kean Teng, Ng Jing Wen, Lim Sze Chie, Tan Yee Thong, Yee See Marn")
13
+
14
+ st.title("📄 Documentation")
15
+ st.markdown("""
16
+ To learn more about the project, please refer to the sections below.
17
+ """)
18
+ st.subheader("About Employee Turnover")
19
+ st.write(
20
+ "Employee turnover, also known as employee attrition, refers to the number of workers leaving an \
21
+ organization over a specified time. It disrupts operations, increases recruitment costs, and impacts \
22
+ productivity, competitiveness, and profitability.")
23
+
24
+ st.subheader("Malaysian Employee Turnover Statistics")
25
+ st.write("- Nearly 49% of Malaysian organizations face employee turnover issues (Al-Suraihi et al., 2021).")
26
+ st.write("- Voluntary turnover rates rose from 6.5% (early 2019) to 8.7% (2020) (Bibi Nabi & Zahir, 2024).")
27
+ st.write("- Manufacturing sector turnover: 24% in 2019 (Kin et al., 2022).")
28
+ st.write("- FMCG sector voluntary turnover: 8.4% in 2020.")
29
+
30
+ st.subheader("Case Study Highlight")
31
+ st.write("Xerox’s Call Centre reduced turnover by 20% using predictive analytics to identify patterns \
32
+ # linked to attrition and improve employee engagement. (Solutyics, 2023)")
33
+
34
+ st.subheader("Data Source")
35
+ st.write(
36
+ "The dataset used in this project is from the IBM HR Analytics Employee Attrition & Performance dataset \
37
+ available on Kaggle. It is a synthetic dataset with 1,470 observations and 35 features, covering \
38
+ employee background, employment details, and satisfaction metrics. Below attached the sample data for reference.")
39
+
40
+ # set up the download data
41
+ data = pd.read_csv("data/sample_data.csv")
42
+ data = data.to_csv(index=False).encode("utf-8")
43
+
44
+ st.download_button(
45
+ label="Download Sample Data",
46
+ data=data,
47
+ file_name="sample_data.csv",
48
+ mime="text/csv"
49
+ )
reference.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # How to use Git:
2
+
3
+ ## Basic:
4
+ ```bash
5
+ git commit -m "message"
6
+ git push
7
+ git pull
8
+ git add .
9
+ ```
10
+
11
+ ## Branching:
12
+ ```bash
13
+ git branch # see the branches
14
+ git status # see anything to commit
15
+ git checkout <branchname> # switch branch
16
+
17
+ Put New Branch On Remote
18
+ git push --set-upstream origin <branchname>
19
+
20
+ Delete Branch On Remote
21
+ git push origin --delete <branchname>
22
+
23
+ Delte Branch On Local
24
+ git branch -d <branchname>
25
+
26
+ Create Branch On Local
27
+ git checkout -b <branchname>
28
+
29
+ Merge branch
30
+ git merge <branchname> # make sure you are at the branch and merge the other branch
31
+ ```
32
+
33
+ ## Creating file
34
+ ```bash
35
+ cat > <filaname>
36
+ vim <filename>
37
+ touch <filename>
38
+ ```
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ pickle
2
+ pandas
3
+ joblib
4
+ google-generativeai
5
+ xgboost
run.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # only work if use python 3.12 on bash shell
2
+ py -3.12 -m streamlit run app.py