Al John Lexter Lozano commited on
Commit
9a34627
·
1 Parent(s): ead2dcb

add DL model, fixed examples, add visual output

Browse files
Files changed (2) hide show
  1. app.py +85 -14
  2. demo_mixed.txt +5 -0
app.py CHANGED
@@ -1,20 +1,31 @@
 
1
  from fastapi import File
2
  import gradio as gr
3
  from gib_detect_module import detect
4
  import csv
 
 
 
 
 
 
 
 
 
5
 
6
  def greet(name):
7
  return "Hello " + name + "!!"
8
 
9
  def detect_gibberish(line,f):
10
-
11
  if line:
12
  if detect(line):
13
- return "Valid!!!!", None
14
  else:
15
- return "Bollocks Giberrish",None
16
  elif f:
17
- return None, annotate_csv(f)
 
 
18
 
19
 
20
  def annotate_csv(f):
@@ -25,22 +36,82 @@ def annotate_csv(f):
25
  cwriter = csv.writer(csvout, delimiter=',',
26
  quotechar='"', quoting=csv.QUOTE_MINIMAL)
27
  for row in creader:
28
- print(row)
29
  row.append(str(detect(row[0])))
30
- cwriter.writerow(row)
31
-
32
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  return "out.csv"
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  inputFile=gr.inputs.File(file_count="single", type="file", label="File to Annotate", optional=True)
 
 
 
 
 
 
 
36
  outputFile=gr.outputs.File( label="Annotated CSV")
 
 
37
 
38
  examples=[
39
- ["quetzalcoatl","demo_blank.csv"],
40
- ["Shinkansen","demo_blank.csv"],
41
- ["aasdf","demo_blank.csv"],
42
- ["Covfefe","demo_blank.csv"]
 
 
 
 
 
 
43
  ]
44
- iface = gr.Interface(fn=[detect_gibberish], inputs=["text",inputFile], outputs=["text",outputFile],examples=examples, allow_flagging='never')
 
 
 
45
 
46
- iface.launch()
 
 
1
+ from cProfile import label
2
  from fastapi import File
3
  import gradio as gr
4
  from gib_detect_module import detect
5
  import csv
6
+ import torch
7
+ import tensorflow as tf
8
+
9
+
10
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
11
+ DLmodel = AutoModelForSequenceClassification.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)
12
+ tokenizer = AutoTokenizer.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)
13
+
14
+
15
 
16
  def greet(name):
17
  return "Hello " + name + "!!"
18
 
19
  def detect_gibberish(line,f):
 
20
  if line:
21
  if detect(line):
22
+ return "Valid!!!!", None,None
23
  else:
24
+ return "Bollocks Giberrish",None,None
25
  elif f:
26
+ return None, annotate_csv(f), None
27
+
28
+
29
 
30
 
31
  def annotate_csv(f):
 
36
  cwriter = csv.writer(csvout, delimiter=',',
37
  quotechar='"', quoting=csv.QUOTE_MINIMAL)
38
  for row in creader:
 
39
  row.append(str(detect(row[0])))
40
+ cwriter.writerow(row)
41
+ return "out.csv"
42
+
43
+
44
+ def annotate_csv_deep(f):
45
+ labels = DLmodel.config.id2label
46
+ with open(f.name) as csvfile:
47
+ creader = csv.reader(csvfile, delimiter=',', quotechar='"')
48
+ with open('out.csv', 'w', newline='') as csvout:
49
+ cwriter = csv.writer(csvout, delimiter=',',
50
+ quotechar='"', quoting=csv.QUOTE_MINIMAL)
51
+ for row in creader:
52
+ inputs = tokenizer(row, return_tensors="pt")
53
+ outputs = DLmodel(**inputs)
54
+ probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
55
+ idx = probs.index(max(probs))
56
+
57
+ row.append(labels[idx])
58
+ row.append("{:.0%}".format(probs[idx]) )
59
+ cwriter.writerow(row)
60
  return "out.csv"
61
 
62
+
63
+ def detect_gibberish_deep(line,f):
64
+ if line:
65
+ inputs = tokenizer(line, return_tensors="pt")
66
+ labels = DLmodel.config.id2label
67
+ outputs = DLmodel(**inputs)
68
+ probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
69
+ output=dict(zip(labels.values(), probs))
70
+ readable_output=""
71
+ for k,v in output.items():
72
+ readable_output+=k+" : "+ "{:.0%}".format(v) + "\n"
73
+ return readable_output, None, output
74
+ if f:
75
+ return None, annotate_csv_deep(f),None
76
+
77
+
78
+
79
+ def detect_gibberish_abstract(model, line,f):
80
+ if model == "Deep Learning Model":
81
+ return detect_gibberish_deep(line,f)
82
+ else:
83
+ return detect_gibberish(line, f)
84
+
85
+
86
+ inputLine=gr.inputs.Textbox(lines=1, placeholder="Input text here, if both text and file have values, only the text input will be processed.", default="", label="Text", optional=False)
87
  inputFile=gr.inputs.File(file_count="single", type="file", label="File to Annotate", optional=True)
88
+
89
+
90
+ choices = ["Deep Learning Model", "Markov Chain"]
91
+ inputModel=gr.inputs.Dropdown(choices)
92
+
93
+
94
+ outputLine=gr.outputs.Textbox(type="auto", label=None)
95
  outputFile=gr.outputs.File( label="Annotated CSV")
96
+ label = gr.outputs.Label(num_top_classes=4)
97
+
98
 
99
  examples=[
100
+ ["Deep Learning Model","quetzalcoatl","demo_blank.csv"],
101
+ ["Deep Learning Model","aasdf","demo_blank.csv"],
102
+ ["Deep Learning Model","Covfefe","demo_blank.csv"],
103
+ ["Markov Chain","quetzalcoatl","demo_blank.csv"],
104
+ ["Markov Chain","aasdf","demo_blank.csv"],
105
+ ["Markov Chain","Covfefe","demo_blank.csv"],
106
+ ["Deep Learning Model","","demo_bad.txt"],
107
+ ["Deep Learning Model","","demo_mixed.txt"],
108
+ ["Markov Chain","","demo_bad.txt"],
109
+ ["Markov Chain","","demo_mixed.txt"],
110
  ]
111
+ #iface = gr.Interface(fn=[detect_gibberish], inputs=["text",inputFile], outputs=["text",outputFile],examples=examples, allow_flagging='never')
112
+
113
+ #iface.launch()
114
+
115
 
116
+ iface = gr.Interface(fn=[detect_gibberish_abstract], inputs=[inputModel,inputLine,inputFile], outputs=["text",outputFile,label],examples=examples, allow_flagging='never')
117
+ iface.launch()
demo_mixed.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ "The quick brown fox."
2
+ "nmnjcviburili,<>"
3
+ "This is a legitimate line"
4
+ "ertrjiloifdfyyoiu"
5
+ "1+1 =2"