Shredder commited on
Commit
2c330bd
·
1 Parent(s): 4b31fb8

Upload score_fincat.py

Browse files
Files changed (1) hide show
  1. score_fincat.py +33 -0
score_fincat.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import nltk
3
+ from fincat_utils import extract_context_words
4
+ from fincat_utils import bert_embedding_extract
5
+ import pickle
6
+ lr_clf = pickle.load(open("lr_clf_FiNCAT.pickle",'rb'))
7
+ nltk.download('punkt')
8
+
9
+ def score_fincat(txt):
10
+ li = []
11
+ highlight = []
12
+ txt = " " + txt + " "
13
+ k = ''
14
+ for word in txt.split():
15
+ if any(char.isdigit() for char in word):
16
+ if word[-1] in ['.', ',', ';', ":", "-", "!", "?", ")", '"', "'"]:
17
+ k = word[-1]
18
+ word = word[:-1]
19
+ st = txt.find(" " + word + k + " ")+1
20
+ k = ''
21
+ ed = st + len(word)
22
+ x = {'paragraph' : txt, 'offset_start':st, 'offset_end':ed}
23
+ context_text = extract_context_words(x)
24
+ features = bert_embedding_extract(context_text, word)
25
+ if(features[0]=='None'):
26
+ highlight.append((txt, ' '))
27
+ return highlight
28
+ prediction = lr_clf.predict(features.reshape(1, 768))
29
+ prediction_probability = '{:.4f}'.format(round(lr_clf.predict_proba(features.reshape(1, 768))[:,1][0], 4))
30
+ highlight.append((word, ' In-claim' if prediction==1 else 'Out-of-Claim'))
31
+ else:
32
+ highlight.append((word, ' '))
33
+ return highlight