from bs4 import BeautifulSoup
import requests
from util import Logger
from config import JOJ_COURSE_ID, JOJ_SESSION_ID
def problem_info(hw_id) -> list:
"""
Retrive list of problems to be verified on JOJ by parsing HTML it provides
Sample hw_id:
615542bfb3c06a0006f45dd6
Sample problem info:
[
# filenames problem ID # of test cases
(["ex2.m"], "61553cffb3c06a0006f45da3", 10),
(["ex4.m"], "61553e5fb3c06a0006f45da9", 10),
(["ex5.m"], "61553f8cb3c06a0006f45db2", 10),
(["ex6.m"], "6155414cb3c06a0006f45dc7", 10),
]
"""
problem_info = []
logger = Logger()
cookies = {
"JSESSIONID": "dummy",
"save": "1",
"sid": JOJ_SESSION_ID,
}
session = requests.Session()
session.cookies.update(cookies)
course_url = f"https://joj.sjtu.edu.cn/d/{JOJ_COURSE_ID}"
hw_url = f"{course_url}/homework/{hw_id}"
hw_html = session.get(hw_url).text
soup = BeautifulSoup(hw_html, features="lxml")
# Find table of problems (the only one to have class="date-table")
table = soup.find("table", class_="data-table")
problems = [prob.find("a") for prob in table("td", class_="col--problem-name")]
# we now know the ID for each problem
# go on to obtain other data
for prob in problems:
prob_id = prob.get("href").split("/")[-1]
prob_url = f"{hw_url}/{prob_id}"
prob_html = session.get(prob_url).text
prob_soup = BeautifulSoup(prob_html, features="lxml")
# parse problem page (contains filename)
# NOTE: this is tested on MATLAB submissions only
# Format:
"""
Finish homework 1 ex 1, save the script as ex1.m,
compress it into tar, and upload it.
"""
filenames = [
(prob_soup.find("div", class_="problem-content").find("strong").string)
]
submit_url = f"{prob_url}/submit"
submit_html = session.get(submit_url).text
submit_soup = BeautifulSoup(submit_html, features="lxml")
# parse submit page (contains all records)
# Important: user logged in must have at least one record
# its result does not matter
table_of_records = submit_soup.find("table", class_="data-table")
if table_of_records is None:
logger.error(f"No JOJ record found for {prob.string}. Abort.")
exit(1)
# Some records may be "waiting", in which case no test cases are shown.
# iterate through all records to find one
test_cases = []
for record in table_of_records("tr"):
record_id = record.get("data-rid")
# parse record page (contains all test cases)
record_url = f"{course_url}/records/{record_id}"
record_html = session.get(record_url).text
record_soup = BeautifulSoup(record_html, features="lxml")
record_table = record_soup.find("table", class_="data-table")
if record_table is None:
continue
test_cases = record_table.find("tbody").find_all("tr")
if not test_cases:
logger.error(f"No test cases found for {prob.string}. Abort.")
problem_info.append((filenames, prob_id, len(test_cases))) # 3-tuple
return problem_info
if __name__ == "__main__":
print(problem_info("615542bfb3c06a0006f45dd6"))