Spaces:

USTC975
/

AgentReview

Sleeping

File size: 3,946 Bytes

43c34cc

"""
Process and classify ICLR submissions using OpenReview API.

This script processes ICLR submissions, classifies them into subdirectories
based on decisions, extracts paper content into JSON format, and checks the
validity of the processed papers.

It includes three main functions:
- classify_ICLR_submissions_into_subdirectories: Classifies papers into
  directories based on decisions.
- process_submission: Processes each submission by extracting text and saving
  it as a JSON file.
- check_processed_paper: Verifies if all processed papers are valid JSON files.
"""

import os
import sys
import traceback
from collections import Counter

from tqdm import tqdm

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from agentreview.arguments import parse_args
from agentreview.utility.utils import print_colored

decision_map = {
    # ICLR 2023
    "Reject": "Reject",
    "Accept: poster": "Accept-poster",
    "Accept: notable-top-25%": "Accept-notable-top-25",
    "Accept: notable-top-5%": "Accept-notable-top-5",

    # ICLR 2022
    "Accept (Poster)": "Accept-poster",
    "Accept (Oral)": "Accept-oral",
    "Accept (Spotlight)": "Accept-spotlight",

    # ICLR 2021
    "Significant concerns (Do not publish)": "Significant-concerns",
    "Concerns raised (can publish with adjustment)": "Concerns-raised",

    # ICLR 2020
    "Accept (Talk)": "Accept-oral",  # We assume this signifies an oral presentation

    # ICLR 2018
    "Invite to Workshop Track": "Reject"
}


def categorize_ICLR_submissions_into_subdirectories():
    """Classifies ICLR submissions into subdirectories based on review decisions.

    This function iterates through the review notes and identifies the decision
    (recommendation or final decision) for each submission. It then moves the
    notes and their corresponding papers into directories based on the decision.

    Raises:
        AssertionError: If the line containing the decision does not have the
                        expected format.
    """
    note_dir = f"data/{args.conference}/notes"
    paper_dir = f"data/{args.conference}/paper"

    for note in os.listdir(note_dir):
        print(note)

        # Skip directories or irrelevant files
        if os.path.isdir(os.path.join(note_dir, note)) or ".DS_Store" in note:
            continue

        note_path = os.path.join(note_dir, note)
        lines = open(note_path, "r").readlines()
        decision = None

        for line in tqdm(lines):
            if "\"recommendation\"" in line:
                assert Counter(line)["\""] == 4, "Unexpected format in recommendation line."
                print(line)
                decision = line.split("\"recommendation\"")[1].split("\"")[1]
                break

            elif "\"decision\"" in line:
                assert Counter(line)["\""] == 4, "Unexpected format in decision line."
                print(line)
                try:
                    decision = line.split("\"decision\"")[1].split("\"")[1]
                    break
                except Exception:
                    traceback.print_exc()
                    print_colored(line, 'red')

        if decision is None:
            # Possibly withdrawn papers
            print_colored(f"Could not find decision for {note}", "red")
            continue

        os.makedirs(os.path.join(note_dir, decision_map[decision]), exist_ok=True)
        os.makedirs(os.path.join(paper_dir, decision_map[decision]), exist_ok=True)
        os.rename(note_path, os.path.join(note_dir, decision_map[decision], note))

        paper_id = int(note.split(".json")[0])
        paper_path = os.path.join(paper_dir, f"{paper_id}.pdf")
        os.rename(paper_path, os.path.join(paper_dir, decision_map[decision], f"{paper_id}.pdf"))


if __name__ == "__main__":
    args = parse_args()

    # Extract contents of each paper into a JSON file
    categorize_ICLR_submissions_into_subdirectories()