Spaces:

shoukaku
/

fake-health-news-detection

Sleeping

App Files Files Community

shoukaku commited on Aug 19, 2024

Commit

8eb0c1a

1 Parent(s): 501420e

add get input from url

Browse files

Files changed (5) hide show

app.py +22 -2
requirements.txt +1 -0
src/__init__.py +1 -0
src/scraper/__init__.py +1 -0
src/scraper/generic_scraper.py +37 -0

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from typing import Callable
 import gradio as gr
 if gr.NO_RELOAD:
     import numpy as np
     from src.model import BaseTransferLearningModel
@@ -119,8 +121,9 @@ class WebUI:
         self.is_ready = False
         self.model = self.models[0][1]()
         self.is_ready = True
-    def _change_model(self, idx: int) -> None:
         if gr.NO_RELOAD:
             try:
                 print(self.models[idx])
@@ -132,6 +135,7 @@ class WebUI:
             except Exception as e:
                 print(e)
                 gr.Error(e)
     def _predict(self, text: str) -> str:
         print(text)
@@ -140,10 +144,25 @@ class WebUI:
         output = self.model.predict(text, self.device).detach().cpu().numpy()[0]
         return f'Fake: {output[0]:.10f}, Real: {output[1]:.10f}'
     def get_ui(self) -> None:
         with gr.Blocks() as ui:
             with gr.Row():
                 with gr.Column():
                     t_inp = gr.Textbox(label='Input')
                     with gr.Row():
                         btn_reset = gr.ClearButton(
@@ -157,13 +176,14 @@ class WebUI:
                     ddl_model = gr.Dropdown(
                         label='Model',
                         choices=[model[0] for model in self.models],
-                        value=self.models[0][0],
                         type='index',
                         interactive=True,
                         filterable=True,
                     )
                     t_out = gr.Textbox(label='Output')
             ddl_model.change(fn=self._change_model, inputs=ddl_model)
             btn_submit.click(fn=self._predict, inputs=t_inp, outputs=t_out)
         return ui

 from typing import Callable
 import gradio as gr
+from src.scraper.generic_scraper import GenericScraper
 if gr.NO_RELOAD:
     import numpy as np
     from src.model import BaseTransferLearningModel
         self.is_ready = False
         self.model = self.models[0][1]()
         self.is_ready = True
+        self.scraper = GenericScraper()
+    def _change_model(self, idx: int) -> str:
         if gr.NO_RELOAD:
             try:
                 print(self.models[idx])
             except Exception as e:
                 print(e)
                 gr.Error(e)
+            return self.models[idx][0]
     def _predict(self, text: str) -> str:
         print(text)
         output = self.model.predict(text, self.device).detach().cpu().numpy()[0]
         return f'Fake: {output[0]:.10f}, Real: {output[1]:.10f}'
+    def _scrape(self, url: str) -> str:
+        try:
+            return self.scraper.scrape(url)
+        except Exception as e:
+            return str(e)
     def get_ui(self) -> None:
         with gr.Blocks() as ui:
             with gr.Row():
                 with gr.Column():
+                    t_url = gr.Textbox(label='URL')
+                    with gr.Row():
+                        btn_scrape_reset = gr.ClearButton(
+                            value='Reset',
+                            components=[
+                                t_url,
+                            ],
+                        )
+                        btn_scrape = gr.Button(value='Get From URL', variant='primary')
                     t_inp = gr.Textbox(label='Input')
                     with gr.Row():
                         btn_reset = gr.ClearButton(
                     ddl_model = gr.Dropdown(
                         label='Model',
                         choices=[model[0] for model in self.models],
+                        value=self._change_model(0),
                         type='index',
                         interactive=True,
                         filterable=True,
                     )
                     t_out = gr.Textbox(label='Output')
             ddl_model.change(fn=self._change_model, inputs=ddl_model)
+            btn_scrape.click(fn=self._scrape, inputs=t_url, outputs=t_inp)
             btn_submit.click(fn=self._predict, inputs=t_inp, outputs=t_out)
         return ui

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 numpy==1.26.4
 torch==2.2.1
 transformers==4.39.3

+beautifulsoup4==4.12.3
 numpy==1.26.4
 torch==2.2.1
 transformers==4.39.3

src/__init__.py CHANGED Viewed

	@@ -1 +1,2 @@
1	from .distilbert_tf import DistilBertTransferLearningModel


1	from .distilbert_tf import DistilBertTransferLearningModel
2	+ from .scraper import GenericScraper

src/scraper/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .generic_scraper import GenericScraper

src/scraper/generic_scraper.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from typing import Optional
+import requests
+from bs4 import BeautifulSoup, ResultSet
+class GenericScraper:
+    def __init__(self) -> None:
+        pass
+    def scrape(self, url: str) -> str:
+        response: requests.Response = requests.get(url)
+        if response.status_code != 200:
+            raise Exception(
+                f'Failed to fetch url: {url} with status code {response.status_code}'
+            )
+        soup: BeautifulSoup = BeautifulSoup(response.content, 'html.parser')
+        sections: ResultSet[BeautifulSoup] = soup.find_all(
+            ['div', 'section', 'article']
+        )
+        max_p_len = 0
+        best_section: Optional[BeautifulSoup] = None
+        for section in sections:
+            ps = section.find_all('p', recursive=False)
+            p_len = len('\n'.join([p.get_text() for p in ps]))
+            if p_len > max_p_len:
+                max_p_len = p_len
+                best_section = section
+        if best_section is None:
+            raise Exception('No sections found')
+        return best_section.get_text()