From 8e4188652629c2e5c0501debb5f3ddee076b5a4e Mon Sep 17 00:00:00 2001
From: Frederick Yin <fred.yin@sjtu.edu.cn>
Date: Sun, 10 Oct 2021 23:38:31 +0800
Subject: Obtain problem info from JOJ frontend

At the cost of multiprocessing. We will bring it back... some day.
---
 problem_info.py | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 problem_info.py

(limited to 'problem_info.py')
diff --git a/problem_info.py b/problem_info.py
new file mode 100644
index 0000000..971568e
--- /dev/null
+++ b/problem_info.py
@@ -0,0 +1,95 @@
+from bs4 import BeautifulSoup
+import requests
+from util import Logger
+from config import JOJ_COURSE_ID, JOJ_SESSION_ID
+
+
+def problem_info(hw_id) -> list:
+    """
+    Retrive list of problems to be verified on JOJ by parsing HTML it provides
+
+    Sample hw_id:
+    615542bfb3c06a0006f45dd6
+
+    Sample problem info:
+    [
+        # filenames     problem ID                      # of test cases
+        (["ex2.m"],     "61553cffb3c06a0006f45da3",                  10),
+        (["ex4.m"],     "61553e5fb3c06a0006f45da9",                  10),
+        (["ex5.m"],     "61553f8cb3c06a0006f45db2",                  10),
+        (["ex6.m"],     "6155414cb3c06a0006f45dc7",                  10),
+    ]
+    """
+
+    problem_info = []
+
+    logger = Logger()
+
+    cookies = {
+        "JSESSIONID": "dummy",
+        "save": "1",
+        "sid": JOJ_SESSION_ID,
+    }
+    session = requests.Session()
+    session.cookies.update(cookies)
+    course_url = f"https://joj.sjtu.edu.cn/d/{JOJ_COURSE_ID}"
+    hw_url = f"{course_url}/homework/{hw_id}"
+    hw_html = session.get(hw_url).text
+    soup = BeautifulSoup(hw_html, features="lxml")
+
+    # Find table of problems (the only one to have class="date-table")
+    table = soup.find("table", class_="data-table")
+    problems = [prob.find("a") for prob in table("td", class_="col--problem-name")]
+    # we now know the ID for each problem
+    # go on to obtain other data
+    for prob in problems:
+        prob_id = prob.get("href").split("/")[-1]
+        prob_url = f"{hw_url}/{prob_id}"
+        prob_html = session.get(prob_url).text
+        prob_soup = BeautifulSoup(prob_html, features="lxml")
+        # parse problem page (contains filename)
+        # NOTE: this is tested on MATLAB submissions only
+        # Format:
+        """
+        Finish homework 1 ex 1, save the script as <strong>ex1.m</strong>,
+        compress it into <strong>tar</strong>, and upload it.
+        """
+        filenames = [
+            (prob_soup.find("div", class_="problem-content").find("strong").string)
+        ]
+
+        submit_url = f"{prob_url}/submit"
+        submit_html = session.get(submit_url).text
+        submit_soup = BeautifulSoup(submit_html, features="lxml")
+        # parse submit page (contains all records)
+        # Important: user logged in must have at least one record
+        # its result does not matter
+        table_of_records = submit_soup.find("table", class_="data-table")
+        if table_of_records is None:
+            logger.error(f"No JOJ record found for {prob.string}. Abort.")
+            exit(1)
+
+        # Some records may be "waiting", in which case no test cases are shown.
+        # iterate through all records to find one
+        test_cases = []
+        for record in table_of_records("tr"):
+            record_id = record.get("data-rid")
+            # parse record page (contains all test cases)
+            record_url = f"{course_url}/records/{record_id}"
+            record_html = session.get(record_url).text
+            record_soup = BeautifulSoup(record_html, features="lxml")
+            record_table = record_soup.find("table", class_="data-table")
+            if record_table is None:
+                continue
+            test_cases = record_table.find("tbody").find_all("tr")
+
+        if not test_cases:
+            logger.error(f"No test cases found for {prob.string}. Abort.")
+
+        problem_info.append((filenames, prob_id, len(test_cases)))  # 3-tuple
+
+    return problem_info
+
+
+if __name__ == "__main__":
+    print(problem_info("615542bfb3c06a0006f45dd6"))
-- 
cgit v1.2.3