Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -36,7 +36,7 @@ from secrets import SystemRandom
|
|
36 |
from random import randint, sample
|
37 |
|
38 |
from enum import Enum
|
39 |
-
from re import sub
|
40 |
|
41 |
from functools import partial
|
42 |
|
@@ -197,18 +197,26 @@ async def __ocr(im, file_id):
|
|
197 |
return out
|
198 |
|
199 |
def convert_links_to_text(text):
|
200 |
-
|
201 |
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
|
|
|
|
|
|
|
|
208 |
|
209 |
-
|
210 |
-
return sub(link_pattern, r"\1", text)
|
211 |
|
|
|
|
|
|
|
|
|
|
|
212 |
|
213 |
async def __convert2md(inp):
|
214 |
# Use gfm-raw_html to strip styling data from source file
|
@@ -539,7 +547,7 @@ async def generate_questions(request):
|
|
539 |
|
540 |
# __raw_outputs = [await p for p in ptasks]
|
541 |
__raw_outputs = await asyncio.gather(*ptasks)
|
542 |
-
|
543 |
for pgph_i, o in enumerate(__raw_outputs):
|
544 |
# print(o)
|
545 |
# print(pgph_i)
|
|
|
36 |
from random import randint, sample
|
37 |
|
38 |
from enum import Enum
|
39 |
+
from re import sub, findall, escape
|
40 |
|
41 |
from functools import partial
|
42 |
|
|
|
197 |
return out
|
198 |
|
199 |
def convert_links_to_text(text):
|
200 |
+
txt = text
|
201 |
|
202 |
+
# Anything that isn't a square closing bracket
|
203 |
+
name_regex = "[^]]+"
|
204 |
+
# http:// or https:// followed by anything but a closing paren
|
205 |
+
url_regex = "http[s]?://[^)]+"
|
206 |
+
|
207 |
+
markup_regex = '\[({0})]\(\s*({1})\s*\)'.format(name_regex, url_regex)
|
208 |
+
|
209 |
+
for match in findall(markup_regex,txt):
|
210 |
+
link_str = f"[{match[0]}]({match[1]})"
|
211 |
+
txt = txt.replace(link_str, match[0])
|
212 |
|
213 |
+
return txt
|
|
|
214 |
|
215 |
+
def remove_wikipedia_footnote_ptrs(text):
|
216 |
+
txt = text
|
217 |
+
wiki_footnote_regex = r'\\\[\d+\\]'
|
218 |
+
txt = sub(wiki_footnote_regex, '', txt)
|
219 |
+
return txt
|
220 |
|
221 |
async def __convert2md(inp):
|
222 |
# Use gfm-raw_html to strip styling data from source file
|
|
|
547 |
|
548 |
# __raw_outputs = [await p for p in ptasks]
|
549 |
__raw_outputs = await asyncio.gather(*ptasks)
|
550 |
+
print(__raw_outputs)
|
551 |
for pgph_i, o in enumerate(__raw_outputs):
|
552 |
# print(o)
|
553 |
# print(pgph_i)
|