summaryrefslogtreecommitdiff
path: root/problem_info.py
blob: 971568eb84c3c5555bcb9367ad31849aa9fd8885 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
from bs4 import BeautifulSoup
import requests
from util import Logger
from config import JOJ_COURSE_ID, JOJ_SESSION_ID


def problem_info(hw_id) -> list:
    """
    Retrive list of problems to be verified on JOJ by parsing HTML it provides

    Sample hw_id:
    615542bfb3c06a0006f45dd6

    Sample problem info:
    [
        # filenames     problem ID                      # of test cases
        (["ex2.m"],     "61553cffb3c06a0006f45da3",                  10),
        (["ex4.m"],     "61553e5fb3c06a0006f45da9",                  10),
        (["ex5.m"],     "61553f8cb3c06a0006f45db2",                  10),
        (["ex6.m"],     "6155414cb3c06a0006f45dc7",                  10),
    ]
    """

    problem_info = []

    logger = Logger()

    cookies = {
        "JSESSIONID": "dummy",
        "save": "1",
        "sid": JOJ_SESSION_ID,
    }
    session = requests.Session()
    session.cookies.update(cookies)
    course_url = f"https://joj.sjtu.edu.cn/d/{JOJ_COURSE_ID}"
    hw_url = f"{course_url}/homework/{hw_id}"
    hw_html = session.get(hw_url).text
    soup = BeautifulSoup(hw_html, features="lxml")

    # Find table of problems (the only one to have class="date-table")
    table = soup.find("table", class_="data-table")
    problems = [prob.find("a") for prob in table("td", class_="col--problem-name")]
    # we now know the ID for each problem
    # go on to obtain other data
    for prob in problems:
        prob_id = prob.get("href").split("/")[-1]
        prob_url = f"{hw_url}/{prob_id}"
        prob_html = session.get(prob_url).text
        prob_soup = BeautifulSoup(prob_html, features="lxml")
        # parse problem page (contains filename)
        # NOTE: this is tested on MATLAB submissions only
        # Format:
        """
        Finish homework 1 ex 1, save the script as <strong>ex1.m</strong>,
        compress it into <strong>tar</strong>, and upload it.
        """
        filenames = [
            (prob_soup.find("div", class_="problem-content").find("strong").string)
        ]

        submit_url = f"{prob_url}/submit"
        submit_html = session.get(submit_url).text
        submit_soup = BeautifulSoup(submit_html, features="lxml")
        # parse submit page (contains all records)
        # Important: user logged in must have at least one record
        # its result does not matter
        table_of_records = submit_soup.find("table", class_="data-table")
        if table_of_records is None:
            logger.error(f"No JOJ record found for {prob.string}. Abort.")
            exit(1)

        # Some records may be "waiting", in which case no test cases are shown.
        # iterate through all records to find one
        test_cases = []
        for record in table_of_records("tr"):
            record_id = record.get("data-rid")
            # parse record page (contains all test cases)
            record_url = f"{course_url}/records/{record_id}"
            record_html = session.get(record_url).text
            record_soup = BeautifulSoup(record_html, features="lxml")
            record_table = record_soup.find("table", class_="data-table")
            if record_table is None:
                continue
            test_cases = record_table.find("tbody").find_all("tr")

        if not test_cases:
            logger.error(f"No test cases found for {prob.string}. Abort.")

        problem_info.append((filenames, prob_id, len(test_cases)))  # 3-tuple

    return problem_info


if __name__ == "__main__":
    print(problem_info("615542bfb3c06a0006f45dd6"))