1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
import requests
import json
class JForm:
"""Retrieves results from one jForm questionnaire.
In this context, "JForm" refers to this class, and "jForm", the online service also known as
SJTU Questionnaires, hosted on https://wj.sjtu.edu.cn/.
JImbrella's database relies on JForm to sync its database against jForm's.
Each JForm object requires a checkpoint file in which to store the id of the latest answer
sheet it has read. Each time the API detects the presence of new answer sheets, the file will
be overwritten.
"""
def __init__(self, name: str, url: str, checkpoint_fp: str):
self._name = name # internal identifier
self.url = url
self._checkpoint_fp = checkpoint_fp
def _get(self, page=1) -> dict:
"""Internal method to HTTP GET the API in JSON format."""
resp = requests.get(
self.url,
params={
"params": json.dumps(
{
"current": page,
"pageSize": 100,
}
),
"sort": json.dumps({"id": "desc"}),
},
)
return resp
def _read_checkpoint(self) -> int:
"""Read checkpoint file and returns contents. No safeguards."""
try:
with open(self._checkpoint_fp) as f:
checkpoint = f.read()
f.close()
return int(checkpoint)
except FileNotFoundError:
return 0
def _write_checkpoint(self, checkpoint: int) -> None:
"""Write into checkpoint file."""
try:
with open(self._checkpoint_fp, "x") as f:
f.write(str(checkpoint))
f.close()
except FileExistsError:
with open(self._checkpoint_fp, "w") as f:
f.write(str(checkpoint))
f.close()
def get_unread(self) -> list:
"""Get unread answers to required fields as a list of dicts, most recent last."""
checkpoint = self._read_checkpoint()
unread = []
latest_id = 0
page = 1
found_read = False
while not found_read:
try:
resp = self._get(page=page)
except:
break # quietly abort
if resp.status_code != 200:
break
sheets = resp.json()["data"]["rows"]
if not latest_id:
# the first page of sheets we have retrieved this run.
# on this page, the first answer sheet is the latest.
# update checkpoint. next time, stop before this id.
latest_id = sheets[0]["id"]
self._write_checkpoint(latest_id)
if not sheets:
# somehow jForm doesn't respond with a 404
# when we exceed the total pages of a questionnaire
# instead it just returns 200 along with an empty data field
break
for sheet in sheets:
if sheet["id"] <= checkpoint:
# is checkpoint or earlier than checkpoint
found_read = True
break
ans = sheet["answers"]
unread.append(
{
"name": ans[0]["answer"],
"id": ans[1]["answer"],
"phone": ans[2]["answer"],
"key": ans[3]["answer"],
}
)
page += 1
return unread
|