From 8e4188652629c2e5c0501debb5f3ddee076b5a4e Mon Sep 17 00:00:00 2001 From: Frederick Yin Date: Sun, 10 Oct 2021 23:38:31 +0800 Subject: Obtain problem info from JOJ frontend At the cost of multiprocessing. We will bring it back... some day. --- problem_info.py | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 problem_info.py (limited to 'problem_info.py') diff --git a/problem_info.py b/problem_info.py new file mode 100644 index 0000000..971568e --- /dev/null +++ b/problem_info.py @@ -0,0 +1,95 @@ +from bs4 import BeautifulSoup +import requests +from util import Logger +from config import JOJ_COURSE_ID, JOJ_SESSION_ID + + +def problem_info(hw_id) -> list: + """ + Retrive list of problems to be verified on JOJ by parsing HTML it provides + + Sample hw_id: + 615542bfb3c06a0006f45dd6 + + Sample problem info: + [ + # filenames problem ID # of test cases + (["ex2.m"], "61553cffb3c06a0006f45da3", 10), + (["ex4.m"], "61553e5fb3c06a0006f45da9", 10), + (["ex5.m"], "61553f8cb3c06a0006f45db2", 10), + (["ex6.m"], "6155414cb3c06a0006f45dc7", 10), + ] + """ + + problem_info = [] + + logger = Logger() + + cookies = { + "JSESSIONID": "dummy", + "save": "1", + "sid": JOJ_SESSION_ID, + } + session = requests.Session() + session.cookies.update(cookies) + course_url = f"https://joj.sjtu.edu.cn/d/{JOJ_COURSE_ID}" + hw_url = f"{course_url}/homework/{hw_id}" + hw_html = session.get(hw_url).text + soup = BeautifulSoup(hw_html, features="lxml") + + # Find table of problems (the only one to have class="date-table") + table = soup.find("table", class_="data-table") + problems = [prob.find("a") for prob in table("td", class_="col--problem-name")] + # we now know the ID for each problem + # go on to obtain other data + for prob in problems: + prob_id = prob.get("href").split("/")[-1] + prob_url = f"{hw_url}/{prob_id}" + prob_html = session.get(prob_url).text + prob_soup = BeautifulSoup(prob_html, features="lxml") + # parse problem page (contains filename) + # NOTE: this is tested on MATLAB submissions only + # Format: + """ + Finish homework 1 ex 1, save the script as ex1.m, + compress it into tar, and upload it. + """ + filenames = [ + (prob_soup.find("div", class_="problem-content").find("strong").string) + ] + + submit_url = f"{prob_url}/submit" + submit_html = session.get(submit_url).text + submit_soup = BeautifulSoup(submit_html, features="lxml") + # parse submit page (contains all records) + # Important: user logged in must have at least one record + # its result does not matter + table_of_records = submit_soup.find("table", class_="data-table") + if table_of_records is None: + logger.error(f"No JOJ record found for {prob.string}. Abort.") + exit(1) + + # Some records may be "waiting", in which case no test cases are shown. + # iterate through all records to find one + test_cases = [] + for record in table_of_records("tr"): + record_id = record.get("data-rid") + # parse record page (contains all test cases) + record_url = f"{course_url}/records/{record_id}" + record_html = session.get(record_url).text + record_soup = BeautifulSoup(record_html, features="lxml") + record_table = record_soup.find("table", class_="data-table") + if record_table is None: + continue + test_cases = record_table.find("tbody").find_all("tr") + + if not test_cases: + logger.error(f"No test cases found for {prob.string}. Abort.") + + problem_info.append((filenames, prob_id, len(test_cases))) # 3-tuple + + return problem_info + + +if __name__ == "__main__": + print(problem_info("615542bfb3c06a0006f45dd6")) -- cgit v1.2.3