Spaces:
Sleeping
Sleeping
File size: 3,946 Bytes
43c34cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
"""
Process and classify ICLR submissions using OpenReview API.
This script processes ICLR submissions, classifies them into subdirectories
based on decisions, extracts paper content into JSON format, and checks the
validity of the processed papers.
It includes three main functions:
- classify_ICLR_submissions_into_subdirectories: Classifies papers into
directories based on decisions.
- process_submission: Processes each submission by extracting text and saving
it as a JSON file.
- check_processed_paper: Verifies if all processed papers are valid JSON files.
"""
import os
import sys
import traceback
from collections import Counter
from tqdm import tqdm
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from agentreview.arguments import parse_args
from agentreview.utility.utils import print_colored
decision_map = {
# ICLR 2023
"Reject": "Reject",
"Accept: poster": "Accept-poster",
"Accept: notable-top-25%": "Accept-notable-top-25",
"Accept: notable-top-5%": "Accept-notable-top-5",
# ICLR 2022
"Accept (Poster)": "Accept-poster",
"Accept (Oral)": "Accept-oral",
"Accept (Spotlight)": "Accept-spotlight",
# ICLR 2021
"Significant concerns (Do not publish)": "Significant-concerns",
"Concerns raised (can publish with adjustment)": "Concerns-raised",
# ICLR 2020
"Accept (Talk)": "Accept-oral", # We assume this signifies an oral presentation
# ICLR 2018
"Invite to Workshop Track": "Reject"
}
def categorize_ICLR_submissions_into_subdirectories():
"""Classifies ICLR submissions into subdirectories based on review decisions.
This function iterates through the review notes and identifies the decision
(recommendation or final decision) for each submission. It then moves the
notes and their corresponding papers into directories based on the decision.
Raises:
AssertionError: If the line containing the decision does not have the
expected format.
"""
note_dir = f"data/{args.conference}/notes"
paper_dir = f"data/{args.conference}/paper"
for note in os.listdir(note_dir):
print(note)
# Skip directories or irrelevant files
if os.path.isdir(os.path.join(note_dir, note)) or ".DS_Store" in note:
continue
note_path = os.path.join(note_dir, note)
lines = open(note_path, "r").readlines()
decision = None
for line in tqdm(lines):
if "\"recommendation\"" in line:
assert Counter(line)["\""] == 4, "Unexpected format in recommendation line."
print(line)
decision = line.split("\"recommendation\"")[1].split("\"")[1]
break
elif "\"decision\"" in line:
assert Counter(line)["\""] == 4, "Unexpected format in decision line."
print(line)
try:
decision = line.split("\"decision\"")[1].split("\"")[1]
break
except Exception:
traceback.print_exc()
print_colored(line, 'red')
if decision is None:
# Possibly withdrawn papers
print_colored(f"Could not find decision for {note}", "red")
continue
os.makedirs(os.path.join(note_dir, decision_map[decision]), exist_ok=True)
os.makedirs(os.path.join(paper_dir, decision_map[decision]), exist_ok=True)
os.rename(note_path, os.path.join(note_dir, decision_map[decision], note))
paper_id = int(note.split(".json")[0])
paper_path = os.path.join(paper_dir, f"{paper_id}.pdf")
os.rename(paper_path, os.path.join(paper_dir, decision_map[decision], f"{paper_id}.pdf"))
if __name__ == "__main__":
args = parse_args()
# Extract contents of each paper into a JSON file
categorize_ICLR_submissions_into_subdirectories() |