from bs4 import BeautifulSoup import requests from util import Logger from config import JOJ_COURSE_ID, JOJ_SESSION_ID def problem_info(hw_id) -> list: """ Retrive list of problems to be verified on JOJ by parsing HTML it provides Sample hw_id: 615542bfb3c06a0006f45dd6 Sample problem info: [ # filenames problem ID # of test cases (["ex2.m"], "61553cffb3c06a0006f45da3", 10), (["ex4.m"], "61553e5fb3c06a0006f45da9", 10), (["ex5.m"], "61553f8cb3c06a0006f45db2", 10), (["ex6.m"], "6155414cb3c06a0006f45dc7", 10), ] """ problem_info = [] logger = Logger() cookies = { "JSESSIONID": "dummy", "save": "1", "sid": JOJ_SESSION_ID, } session = requests.Session() session.cookies.update(cookies) course_url = f"https://joj.sjtu.edu.cn/d/{JOJ_COURSE_ID}" hw_url = f"{course_url}/homework/{hw_id}" hw_html = session.get(hw_url).text soup = BeautifulSoup(hw_html, features="lxml") # Find table of problems (the only one to have class="date-table") table = soup.find("table", class_="data-table") problems = [prob.find("a") for prob in table("td", class_="col--problem-name")] # we now know the ID for each problem # go on to obtain other data for prob in problems: prob_id = prob.get("href").split("/")[-1] prob_url = f"{hw_url}/{prob_id}" prob_html = session.get(prob_url).text prob_soup = BeautifulSoup(prob_html, features="lxml") # parse problem page (contains filename) # NOTE: this is tested on MATLAB submissions only # Format: """ Finish homework 1 ex 1, save the script as ex1.m, compress it into tar, and upload it. """ filenames = [ (prob_soup.find("div", class_="problem-content").find("strong").string) ] submit_url = f"{prob_url}/submit" submit_html = session.get(submit_url).text submit_soup = BeautifulSoup(submit_html, features="lxml") # parse submit page (contains all records) # Important: user logged in must have at least one record # its result does not matter table_of_records = submit_soup.find("table", class_="data-table") if table_of_records is None: logger.error(f"No JOJ record found for {prob.string}. Abort.") exit(1) # Some records may be "waiting", in which case no test cases are shown. # iterate through all records to find one test_cases = [] for record in table_of_records("tr"): record_id = record.get("data-rid") # parse record page (contains all test cases) record_url = f"{course_url}/records/{record_id}" record_html = session.get(record_url).text record_soup = BeautifulSoup(record_html, features="lxml") record_table = record_soup.find("table", class_="data-table") if record_table is None: continue test_cases = record_table.find("tbody").find_all("tr") if not test_cases: logger.error(f"No test cases found for {prob.string}. Abort.") problem_info.append((filenames, prob_id, len(test_cases))) # 3-tuple return problem_info if __name__ == "__main__": print(problem_info("615542bfb3c06a0006f45dd6"))