Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -299,55 +299,46 @@ def save_full_transcript(query, text):
|
|
299 |
"""Save full transcript of Arxiv results as a file."""
|
300 |
create_file(query, text, "md")
|
301 |
|
302 |
-
# ------------------------------
|
303 |
-
# NEW: Helper to parse references
|
304 |
-
# ------------------------------
|
305 |
def parse_arxiv_refs(ref_text: str):
|
306 |
"""
|
307 |
-
Parse
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
"""
|
314 |
-
|
|
|
|
|
|
|
315 |
results = []
|
316 |
-
for
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
#
|
321 |
-
|
322 |
-
if
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
else:
|
343 |
-
year = None
|
344 |
-
|
345 |
-
results.append({
|
346 |
-
'title': raw_title,
|
347 |
-
'summary': summary,
|
348 |
-
'year': year
|
349 |
-
})
|
350 |
-
return results
|
351 |
|
352 |
|
353 |
def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
|
|
|
299 |
"""Save full transcript of Arxiv results as a file."""
|
300 |
create_file(query, text, "md")
|
301 |
|
|
|
|
|
|
|
302 |
def parse_arxiv_refs(ref_text: str):
|
303 |
"""
|
304 |
+
Parse paper references with format:
|
305 |
+
**DATE | TITLE | ⬇️**
|
306 |
+
AUTHORS
|
307 |
+
SUMMARY
|
308 |
+
|
309 |
+
Returns list of dicts with paper details, limited to 20 papers.
|
310 |
"""
|
311 |
+
# Split on the paper header pattern
|
312 |
+
papers = re.split(r'\*\*.*?\|\s*.*?\|\s*.*?\*\*', ref_text)
|
313 |
+
headers = re.findall(r'\*\*.*?\|\s*.*?\|\s*.*?\*\*', ref_text)
|
314 |
+
|
315 |
results = []
|
316 |
+
for i, (header, content) in enumerate(zip(headers, papers[1:])):
|
317 |
+
if i >= 20: # Limit to 20 papers
|
318 |
+
break
|
319 |
+
|
320 |
+
# Parse header parts
|
321 |
+
header_parts = [p.strip() for p in header.strip('*').split('|')]
|
322 |
+
if len(header_parts) >= 2:
|
323 |
+
date_str = header_parts[0].strip()
|
324 |
+
title = header_parts[1].strip()
|
325 |
+
|
326 |
+
# Parse content into authors and summary
|
327 |
+
content_parts = content.strip().split('\n', 1)
|
328 |
+
authors = content_parts[0].strip('*') if content_parts else ""
|
329 |
+
summary = content_parts[1].strip() if len(content_parts) > 1 else ""
|
330 |
+
|
331 |
+
# Extract year from date
|
332 |
+
year_match = re.search(r'20\d{2}', date_str)
|
333 |
+
year = int(year_match.group(0)) if year_match else None
|
334 |
+
|
335 |
+
results.append({
|
336 |
+
'title': title,
|
337 |
+
'summary': summary,
|
338 |
+
'authors': authors,
|
339 |
+
'year': year,
|
340 |
+
'date': date_str
|
341 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
|
343 |
|
344 |
def perform_ai_lookup(q, vocal_summary=True, extended_refs=False,
|