shoukaku commited on
Commit
8eb0c1a
·
1 Parent(s): 501420e

add get input from url

Browse files
app.py CHANGED
@@ -1,6 +1,8 @@
1
  from typing import Callable
2
  import gradio as gr
3
 
 
 
4
  if gr.NO_RELOAD:
5
  import numpy as np
6
  from src.model import BaseTransferLearningModel
@@ -119,8 +121,9 @@ class WebUI:
119
  self.is_ready = False
120
  self.model = self.models[0][1]()
121
  self.is_ready = True
 
122
 
123
- def _change_model(self, idx: int) -> None:
124
  if gr.NO_RELOAD:
125
  try:
126
  print(self.models[idx])
@@ -132,6 +135,7 @@ class WebUI:
132
  except Exception as e:
133
  print(e)
134
  gr.Error(e)
 
135
 
136
  def _predict(self, text: str) -> str:
137
  print(text)
@@ -140,10 +144,25 @@ class WebUI:
140
  output = self.model.predict(text, self.device).detach().cpu().numpy()[0]
141
  return f'Fake: {output[0]:.10f}, Real: {output[1]:.10f}'
142
 
 
 
 
 
 
 
143
  def get_ui(self) -> None:
144
  with gr.Blocks() as ui:
145
  with gr.Row():
146
  with gr.Column():
 
 
 
 
 
 
 
 
 
147
  t_inp = gr.Textbox(label='Input')
148
  with gr.Row():
149
  btn_reset = gr.ClearButton(
@@ -157,13 +176,14 @@ class WebUI:
157
  ddl_model = gr.Dropdown(
158
  label='Model',
159
  choices=[model[0] for model in self.models],
160
- value=self.models[0][0],
161
  type='index',
162
  interactive=True,
163
  filterable=True,
164
  )
165
  t_out = gr.Textbox(label='Output')
166
  ddl_model.change(fn=self._change_model, inputs=ddl_model)
 
167
  btn_submit.click(fn=self._predict, inputs=t_inp, outputs=t_out)
168
  return ui
169
 
 
1
  from typing import Callable
2
  import gradio as gr
3
 
4
+ from src.scraper.generic_scraper import GenericScraper
5
+
6
  if gr.NO_RELOAD:
7
  import numpy as np
8
  from src.model import BaseTransferLearningModel
 
121
  self.is_ready = False
122
  self.model = self.models[0][1]()
123
  self.is_ready = True
124
+ self.scraper = GenericScraper()
125
 
126
+ def _change_model(self, idx: int) -> str:
127
  if gr.NO_RELOAD:
128
  try:
129
  print(self.models[idx])
 
135
  except Exception as e:
136
  print(e)
137
  gr.Error(e)
138
+ return self.models[idx][0]
139
 
140
  def _predict(self, text: str) -> str:
141
  print(text)
 
144
  output = self.model.predict(text, self.device).detach().cpu().numpy()[0]
145
  return f'Fake: {output[0]:.10f}, Real: {output[1]:.10f}'
146
 
147
+ def _scrape(self, url: str) -> str:
148
+ try:
149
+ return self.scraper.scrape(url)
150
+ except Exception as e:
151
+ return str(e)
152
+
153
  def get_ui(self) -> None:
154
  with gr.Blocks() as ui:
155
  with gr.Row():
156
  with gr.Column():
157
+ t_url = gr.Textbox(label='URL')
158
+ with gr.Row():
159
+ btn_scrape_reset = gr.ClearButton(
160
+ value='Reset',
161
+ components=[
162
+ t_url,
163
+ ],
164
+ )
165
+ btn_scrape = gr.Button(value='Get From URL', variant='primary')
166
  t_inp = gr.Textbox(label='Input')
167
  with gr.Row():
168
  btn_reset = gr.ClearButton(
 
176
  ddl_model = gr.Dropdown(
177
  label='Model',
178
  choices=[model[0] for model in self.models],
179
+ value=self._change_model(0),
180
  type='index',
181
  interactive=True,
182
  filterable=True,
183
  )
184
  t_out = gr.Textbox(label='Output')
185
  ddl_model.change(fn=self._change_model, inputs=ddl_model)
186
+ btn_scrape.click(fn=self._scrape, inputs=t_url, outputs=t_inp)
187
  btn_submit.click(fn=self._predict, inputs=t_inp, outputs=t_out)
188
  return ui
189
 
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  numpy==1.26.4
2
  torch==2.2.1
3
  transformers==4.39.3
 
1
+ beautifulsoup4==4.12.3
2
  numpy==1.26.4
3
  torch==2.2.1
4
  transformers==4.39.3
src/__init__.py CHANGED
@@ -1 +1,2 @@
1
  from .distilbert_tf import DistilBertTransferLearningModel
 
 
1
  from .distilbert_tf import DistilBertTransferLearningModel
2
+ from .scraper import GenericScraper
src/scraper/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .generic_scraper import GenericScraper
src/scraper/generic_scraper.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+ import requests
3
+ from bs4 import BeautifulSoup, ResultSet
4
+
5
+
6
+ class GenericScraper:
7
+
8
+ def __init__(self) -> None:
9
+ pass
10
+
11
+ def scrape(self, url: str) -> str:
12
+ response: requests.Response = requests.get(url)
13
+
14
+ if response.status_code != 200:
15
+ raise Exception(
16
+ f'Failed to fetch url: {url} with status code {response.status_code}'
17
+ )
18
+
19
+ soup: BeautifulSoup = BeautifulSoup(response.content, 'html.parser')
20
+
21
+ sections: ResultSet[BeautifulSoup] = soup.find_all(
22
+ ['div', 'section', 'article']
23
+ )
24
+ max_p_len = 0
25
+ best_section: Optional[BeautifulSoup] = None
26
+
27
+ for section in sections:
28
+ ps = section.find_all('p', recursive=False)
29
+ p_len = len('\n'.join([p.get_text() for p in ps]))
30
+ if p_len > max_p_len:
31
+ max_p_len = p_len
32
+ best_section = section
33
+
34
+ if best_section is None:
35
+ raise Exception('No sections found')
36
+
37
+ return best_section.get_text()