Spaces:
Sleeping
Sleeping
add get input from url
Browse files- app.py +22 -2
- requirements.txt +1 -0
- src/__init__.py +1 -0
- src/scraper/__init__.py +1 -0
- src/scraper/generic_scraper.py +37 -0
app.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
from typing import Callable
|
2 |
import gradio as gr
|
3 |
|
|
|
|
|
4 |
if gr.NO_RELOAD:
|
5 |
import numpy as np
|
6 |
from src.model import BaseTransferLearningModel
|
@@ -119,8 +121,9 @@ class WebUI:
|
|
119 |
self.is_ready = False
|
120 |
self.model = self.models[0][1]()
|
121 |
self.is_ready = True
|
|
|
122 |
|
123 |
-
def _change_model(self, idx: int) ->
|
124 |
if gr.NO_RELOAD:
|
125 |
try:
|
126 |
print(self.models[idx])
|
@@ -132,6 +135,7 @@ class WebUI:
|
|
132 |
except Exception as e:
|
133 |
print(e)
|
134 |
gr.Error(e)
|
|
|
135 |
|
136 |
def _predict(self, text: str) -> str:
|
137 |
print(text)
|
@@ -140,10 +144,25 @@ class WebUI:
|
|
140 |
output = self.model.predict(text, self.device).detach().cpu().numpy()[0]
|
141 |
return f'Fake: {output[0]:.10f}, Real: {output[1]:.10f}'
|
142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
def get_ui(self) -> None:
|
144 |
with gr.Blocks() as ui:
|
145 |
with gr.Row():
|
146 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
t_inp = gr.Textbox(label='Input')
|
148 |
with gr.Row():
|
149 |
btn_reset = gr.ClearButton(
|
@@ -157,13 +176,14 @@ class WebUI:
|
|
157 |
ddl_model = gr.Dropdown(
|
158 |
label='Model',
|
159 |
choices=[model[0] for model in self.models],
|
160 |
-
value=self.
|
161 |
type='index',
|
162 |
interactive=True,
|
163 |
filterable=True,
|
164 |
)
|
165 |
t_out = gr.Textbox(label='Output')
|
166 |
ddl_model.change(fn=self._change_model, inputs=ddl_model)
|
|
|
167 |
btn_submit.click(fn=self._predict, inputs=t_inp, outputs=t_out)
|
168 |
return ui
|
169 |
|
|
|
1 |
from typing import Callable
|
2 |
import gradio as gr
|
3 |
|
4 |
+
from src.scraper.generic_scraper import GenericScraper
|
5 |
+
|
6 |
if gr.NO_RELOAD:
|
7 |
import numpy as np
|
8 |
from src.model import BaseTransferLearningModel
|
|
|
121 |
self.is_ready = False
|
122 |
self.model = self.models[0][1]()
|
123 |
self.is_ready = True
|
124 |
+
self.scraper = GenericScraper()
|
125 |
|
126 |
+
def _change_model(self, idx: int) -> str:
|
127 |
if gr.NO_RELOAD:
|
128 |
try:
|
129 |
print(self.models[idx])
|
|
|
135 |
except Exception as e:
|
136 |
print(e)
|
137 |
gr.Error(e)
|
138 |
+
return self.models[idx][0]
|
139 |
|
140 |
def _predict(self, text: str) -> str:
|
141 |
print(text)
|
|
|
144 |
output = self.model.predict(text, self.device).detach().cpu().numpy()[0]
|
145 |
return f'Fake: {output[0]:.10f}, Real: {output[1]:.10f}'
|
146 |
|
147 |
+
def _scrape(self, url: str) -> str:
|
148 |
+
try:
|
149 |
+
return self.scraper.scrape(url)
|
150 |
+
except Exception as e:
|
151 |
+
return str(e)
|
152 |
+
|
153 |
def get_ui(self) -> None:
|
154 |
with gr.Blocks() as ui:
|
155 |
with gr.Row():
|
156 |
with gr.Column():
|
157 |
+
t_url = gr.Textbox(label='URL')
|
158 |
+
with gr.Row():
|
159 |
+
btn_scrape_reset = gr.ClearButton(
|
160 |
+
value='Reset',
|
161 |
+
components=[
|
162 |
+
t_url,
|
163 |
+
],
|
164 |
+
)
|
165 |
+
btn_scrape = gr.Button(value='Get From URL', variant='primary')
|
166 |
t_inp = gr.Textbox(label='Input')
|
167 |
with gr.Row():
|
168 |
btn_reset = gr.ClearButton(
|
|
|
176 |
ddl_model = gr.Dropdown(
|
177 |
label='Model',
|
178 |
choices=[model[0] for model in self.models],
|
179 |
+
value=self._change_model(0),
|
180 |
type='index',
|
181 |
interactive=True,
|
182 |
filterable=True,
|
183 |
)
|
184 |
t_out = gr.Textbox(label='Output')
|
185 |
ddl_model.change(fn=self._change_model, inputs=ddl_model)
|
186 |
+
btn_scrape.click(fn=self._scrape, inputs=t_url, outputs=t_inp)
|
187 |
btn_submit.click(fn=self._predict, inputs=t_inp, outputs=t_out)
|
188 |
return ui
|
189 |
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
numpy==1.26.4
|
2 |
torch==2.2.1
|
3 |
transformers==4.39.3
|
|
|
1 |
+
beautifulsoup4==4.12.3
|
2 |
numpy==1.26.4
|
3 |
torch==2.2.1
|
4 |
transformers==4.39.3
|
src/__init__.py
CHANGED
@@ -1 +1,2 @@
|
|
1 |
from .distilbert_tf import DistilBertTransferLearningModel
|
|
|
|
1 |
from .distilbert_tf import DistilBertTransferLearningModel
|
2 |
+
from .scraper import GenericScraper
|
src/scraper/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .generic_scraper import GenericScraper
|
src/scraper/generic_scraper.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Optional
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup, ResultSet
|
4 |
+
|
5 |
+
|
6 |
+
class GenericScraper:
|
7 |
+
|
8 |
+
def __init__(self) -> None:
|
9 |
+
pass
|
10 |
+
|
11 |
+
def scrape(self, url: str) -> str:
|
12 |
+
response: requests.Response = requests.get(url)
|
13 |
+
|
14 |
+
if response.status_code != 200:
|
15 |
+
raise Exception(
|
16 |
+
f'Failed to fetch url: {url} with status code {response.status_code}'
|
17 |
+
)
|
18 |
+
|
19 |
+
soup: BeautifulSoup = BeautifulSoup(response.content, 'html.parser')
|
20 |
+
|
21 |
+
sections: ResultSet[BeautifulSoup] = soup.find_all(
|
22 |
+
['div', 'section', 'article']
|
23 |
+
)
|
24 |
+
max_p_len = 0
|
25 |
+
best_section: Optional[BeautifulSoup] = None
|
26 |
+
|
27 |
+
for section in sections:
|
28 |
+
ps = section.find_all('p', recursive=False)
|
29 |
+
p_len = len('\n'.join([p.get_text() for p in ps]))
|
30 |
+
if p_len > max_p_len:
|
31 |
+
max_p_len = p_len
|
32 |
+
best_section = section
|
33 |
+
|
34 |
+
if best_section is None:
|
35 |
+
raise Exception('No sections found')
|
36 |
+
|
37 |
+
return best_section.get_text()
|