Jeet Paul commited on
Commit
67d85c4
·
1 Parent(s): 530e03c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -0
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import re
5
+ import pickle
6
+ import pdfminer
7
+ from pdfminer.high_level import extract_text
8
+ from tensorflow.keras.models import Sequential
9
+ from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, LSTM, Dense, GlobalMaxPooling1D
10
+ from tensorflow.keras.preprocessing.text import Tokenizer
11
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
12
+ from tensorflow.keras.utils import to_categorical
13
+ from sklearn.preprocessing import LabelEncoder
14
+
15
+
16
+ def cleanResume(resumeText):
17
+ # Your existing cleanResume function remains unchanged
18
+ resumeText = re.sub('http\S+\s*', ' ', resumeText)
19
+ resumeText = re.sub('RT|cc', ' ', resumeText)
20
+ resumeText = re.sub('#\S+', '', resumeText)
21
+ resumeText = re.sub('@\S+', ' ', resumeText)
22
+ resumeText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', resumeText)
23
+ resumeText = re.sub(r'[^\x00-\x7f]',r' ', resumeText)
24
+ resumeText = re.sub('\s+', ' ', resumeText)
25
+ return resumeText
26
+
27
+ def pdf_to_text(file):
28
+ # Use pdfminer.six to extract text from the PDF file
29
+ text = extract_text(file)
30
+ return text
31
+
32
+ def predict_category(resumes_data, selected_category):
33
+ # Load the trained DeepRank model
34
+ model = load_deeprank_model()
35
+
36
+ # Process the resumes data
37
+ resumes_df = pd.DataFrame(resumes_data)
38
+ resumes_text = resumes_df['ResumeText'].values
39
+
40
+ # Tokenize the text and convert to sequences
41
+ tokenized_text = tokenizer.texts_to_sequences(resumes_text)
42
+
43
+ # Pad sequences to have the same length
44
+ max_sequence_length = 500 # Assuming maximum sequence length of 500 words
45
+ padded_text = pad_sequences(tokenized_text, maxlen=max_sequence_length)
46
+
47
+ # Make predictions
48
+ predicted_probs = model.predict(padded_text)
49
+
50
+ # Assign probabilities to respective job categories
51
+ for i, category in enumerate(label.classes_):
52
+ resumes_df[category] = predicted_probs[:, i]
53
+
54
+ resumes_df_sorted = resumes_df.sort_values(by=selected_category, ascending=False)
55
+
56
+ # Get the ranks for the selected category
57
+ ranks = []
58
+ for rank, (idx, row) in enumerate(resumes_df_sorted.iterrows()):
59
+ rank = rank + 1
60
+ file_name = row['FileName']
61
+ ranks.append({'Rank': rank, 'FileName': file_name})
62
+
63
+ return ranks
64
+
65
+ def load_deeprank_model():
66
+ # Load the saved DeepRank model
67
+ model = Sequential()
68
+ # Add layers to the model (example architecture, adjust as needed)
69
+ model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=max_sequence_length))
70
+ model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))
71
+ model.add(MaxPooling1D(pool_size=2))
72
+ model.add(LSTM(64))
73
+ model.add(Dense(num_classes, activation='softmax'))
74
+ model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
75
+ model.load_weights('deeprank_model.h5') # Replace 'deeprank_model.h5' with your saved model file
76
+ return model
77
+
78
+ def main():
79
+ st.title("Resume Ranking App")
80
+ st.text("Upload resumes and select a category to rank them.")
81
+
82
+ resumes_data = []
83
+ selected_category = ""
84
+
85
+ # Handle multiple file uploads
86
+ files = st.file_uploader("Upload resumes", type=["pdf"], accept_multiple_files=True)
87
+ if files:
88
+ for file in files:
89
+ text = cleanResume(pdf_to_text(file))
90
+ resumes_data.append({'ResumeText': text, 'FileName': file.name})
91
+ selected_category = st.selectbox("Select a category to rank by", label.classes_)
92
+
93
+ if st.button("Rank Resumes"):
94
+ if not resumes_data or not selected_category:
95
+ st.warning("Please upload resumes and select a category to continue.")
96
+ else:
97
+ ranks = predict_category(resumes_data, selected_category)
98
+ st.write(pd.DataFrame(ranks))
99
+
100
+ if __name__ == '__main__':
101
+ # Load label encoder and tokenizer
102
+ df = pd.read_csv('UpdatedResumeDataSet.csv')
103
+ df['cleaned'] = df['Resume'].apply(lambda x: cleanResume(x))
104
+ label = LabelEncoder()
105
+ df['Category'] = label.fit_transform(df['Category'])
106
+
107
+ # Tokenize text and get vocabulary size and number of classes
108
+ text = df['cleaned'].values
109
+ tokenizer = Tokenizer()
110
+ tokenizer.fit_on_texts(text)
111
+ vocab_size = len(tokenizer.word_index) + 1
112
+ num_classes = len(label.classes_)
113
+
114
+ main()