Spaces:

rushidarge
/

Gallagher_App

Sleeping

App Files Files Community

rushidarge commited on Oct 29, 2023

Commit

199b89f

1 Parent(s): 256a345

Upload 8 files

Browse files

Files changed (8) hide show

app.py +128 -0
output/bert_acc_src.pickle +3 -0
output/count_vector_step_1.pkl +3 -0
output/count_vector_step_2.pkl +3 -0
output/fewer_class_dictionary.pkl +3 -0
output/lr_basemodel_step_2.pickle +3 -0
output/lr_step_1.pickle +3 -0
requirements.txt +14 -0

app.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import streamlit as st
+import pandas as pd
+import pickle
+import joblib
+import re
+import pandas as pd
+import numpy as np
+import re
+import string
+from string import digits
+from sklearn import metrics
+import pickle
+import time
+from sentence_transformers import SentenceTransformer
+# Create a Streamlit app
+st.title("Text Classification and Excel Processing App")
+# File upload for Excel file
+uploaded_file = st.file_uploader("Upload an Excel file", type=["xlsx"])
+def pre_processing(data_frame):
+    # Lowercase all characters
+    data_frame['Claim Description']=data_frame['Claim Description'].apply(lambda x: x.lower())
+    data_frame['Claim Description'] = data_frame['Claim Description'].apply(lambda x: re.sub(r"won\'t", "will not", x))
+    data_frame['Claim Description'] = data_frame['Claim Description'].apply(lambda x: re.sub(r"can\'t", "can not", x))
+    # general
+    data_frame['Claim Description'] = data_frame['Claim Description'].apply(lambda x: re.sub(r"n\'t", " not", x))
+    data_frame['Claim Description'] = data_frame['Claim Description'].apply(lambda x: re.sub(r"\'re", " are", x))
+    data_frame['Claim Description'] = data_frame['Claim Description'].apply(lambda x: re.sub(r"\'s", " is", x))
+    data_frame['Claim Description'] = data_frame['Claim Description'].apply(lambda x: re.sub(r"\'d", " would", x))
+    data_frame['Claim Description'] = data_frame['Claim Description'].apply(lambda x: re.sub(r"\'ll", " will", x))
+    data_frame['Claim Description'] = data_frame['Claim Description'].apply(lambda x: re.sub(r"\'t", " not", x))
+    data_frame['Claim Description'] = data_frame['Claim Description'].apply(lambda x: re.sub(r"\'ve", " have", x))
+    data_frame['Claim Description'] = data_frame['Claim Description'].apply(lambda x: re.sub(r"\'m", " am", x))
+    # Remove quotes
+    data_frame['Claim Description']=data_frame['Claim Description'].apply(lambda x: re.sub("'", '', x))
+    exclude = set(string.punctuation) # Set of all special characters
+    # Remove all the special characters
+    data_frame['Claim Description']=data_frame['Claim Description'].apply(lambda x: ''.join(ch for ch in x if ch not in exclude))
+    # Remove all numbers from text
+    remove_digits = str.maketrans('', '', digits)
+    data_frame['Claim Description']=data_frame['Claim Description'].apply(lambda x: x.translate(remove_digits))
+    # remove extra
+    data_frame['Claim Description']=data_frame['Claim Description'].apply(lambda x: re.sub('[-_.:;\[\]\|,]', '', x))
+    # Remove extra spaces
+    data_frame['Claim Description']=data_frame['Claim Description'].apply(lambda x: x.strip())
+    data_frame['Claim Description']=data_frame['Claim Description'].apply(lambda x: re.sub(" +", " ", x))
+    return data_frame
+step_1_model_path = "output/lr_step_1.pickle"
+step_2_model_path = "output/lr_basemodel_step_2.pickle"
+step_1_model = pickle.load(open(step_1_model_path, 'rb'))
+step_2_model = pickle.load(open(step_2_model_path, 'rb'))
+count_vector_step_1 = joblib.load("output/count_vector_step_1.pkl")
+count_vector_step_2 = joblib.load("output/count_vector_step_2.pkl")
+fewer_class_dict = joblib.load("output/fewer_class_dictionary.pkl")
+acc_src_model = joblib.load("output/bert_acc_src.pickle")
+model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
+def predict(model_1,model_2,final_dict,query):
+    # predict
+    test_1 =  count_vector_step_1.transform([query])
+    y_pred = model_1.predict(test_1)
+    if y_pred == 'med':
+        test_2 =  count_vector_step_2.transform([query])
+        y_pred = model_2.predict(test_2)
+    else:
+        y_pred = y_pred
+    if query in final_dict.keys():
+        y_pred = final_dict[query]
+    else:
+        y_pred = y_pred
+    return y_pred[0]
+if uploaded_file is not None:
+    # Read the uploaded Excel file
+    excel_data = pd.read_excel(uploaded_file)
+    final_result= []
+    print('Preprocessing Started')
+    test_data = pre_processing(excel_data)
+    x_test = test_data['Claim Description']
+    print('Prediction Started')
+    for query in x_test:
+        result = predict(step_1_model,step_2_model,fewer_class_dict,query)
+        final_result.append(result)
+    excel_data['predicted_coverage_code'] = final_result
+    X_bert_enc = model.encode(x_test.values, show_progress_bar=True,)
+    accident_source_pred = acc_src_model.predict(X_bert_enc)
+    excel_data['predicted_accident_src'] = accident_source_pred
+    # Create a new Excel file with the processed data
+    output_filename = "processed_data.xlsx"
+    excel_data.to_excel(output_filename, index=False)
+    # Display a link to download the processed file
+    st.markdown(f"Download Processed Data: [Processed Data](data:{output_filename})")
+# Add a placeholder for displaying "Done" after processing
+if uploaded_file is not None:
+    st.write("Done")

output/bert_acc_src.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5dfe6bea7e8b9bee7801f0653dd191b2b030f512ef4b05624e2112011282ca60
+size 969252

output/count_vector_step_1.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db058f56e2185939cb35485acc242922e440365b06845dea9558dda5238585e1
+size 1111318

output/count_vector_step_2.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0eea35f3601237ec71bf083d1bb9a548878f7ebc48649b784c87cd244c445712
+size 136198

output/fewer_class_dictionary.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9bf4ede12a0d37cef25165d6b32de7a60057129d98c346395ee5ee8cf2220490
+size 1959

output/lr_basemodel_step_2.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85c831ca28039a004d57ca37e8b1a94a9b68863361ae9bfa997958e3b87922c7
+size 2152799

output/lr_step_1.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f9e7eceb902734e3f2050789c4565b3f91be4a2d9477b444b2c58c988e9eb269
+size 8070547

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+joblib                        1.1.0
+numpy                         1.21.5
+pandas                        1.4.4
+regex                         2022.7.9
+scikit-image                  0.19.2
+scikit-learn                  1.0.2
+scikit-learn-intelex          2021.20221004.171935
+scipy                         1.9.1
+Scrapy                        2.6.2
+sentence-transformers         2.2.2
+streamlit                     1.28.0
+tokenizers                    0.14.1
+tqdm                          4.64.1
+transformers                  4.34.1