compress.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101

#!/usr/bin/python
from openpyxl import Workbook, load_workbook
from openpyxl.styles import Alignment, Border, Side
from argparse import ArgumentParser
from datetime import datetime


def elide(string, length=5):
    if len(string) > length:
        return string[: (length - 1)] + "…"
    return string


def wrap_districts(districts, length=32):
    lines = [""]
    for dist in districts:
        if len(lines[-1]) + len(dist) > length:
            lines.append("")
        lines[-1] += " " + dist
    return lines


def compress(filepath: str) -> str:
    regions = {}  # all high/middle risk regions go here
    # open xlsx file
    xlsx = load_workbook(filepath)
    lst = None
    for sheet in xlsx.worksheets:
        if sheet.title == "列表":
            lst = sheet
            break
    else:
        # sheet not found
        raise Exception("未找到名为“列表”的表格")

    # B3:Dxxxx
    for province, city, district in lst.iter_rows(
        min_row=3, max_row=None, min_col=2, max_col=4, values_only=True
    ):
        if province not in regions:
            regions[province] = {}
        if city not in regions[province]:
            regions[province][city] = set()
        regions[province][city].add(district)
    # extract title
    title = lst.cell(row=1, column=1).value.split("\n")[0]

    # create new file
    wb = Workbook()
    ws = wb.active
    ws.title = "压缩列表"
    ws.set_printer_settings(ws.PAPERSIZE_A4, ws.ORIENTATION_PORTRAIT)
    ws.column_dimensions["A"].width = 10
    ws.column_dimensions["B"].width = 15
    ws.column_dimensions["C"].width = 50
    # truncate microseconds from datetime: YYYY-MM-DDThh:mm:ss
    ws.append([title + "\t生成时间：" + datetime.now().isoformat()[:19]])
    ws.merge_cells(start_row=1, end_row=1, start_column=1, end_column=3)

    prev_province = ""
    start_row = 2
    current_row = 2
    top_border = Border(top=Side(style="thin", color="FF000000"))
    for province, cities in regions.items():
        # if we see another province, merge first column of previous one
        #  print(f"Begin {province}, row {current_row}")
        if not prev_province:
            prev_province = province
        if province != prev_province:
            #  print(f"Merge rows {start_row} to {current_row - 1}")
            ws.merge_cells(
                start_row=start_row,
                end_row=current_row - 1,
                start_column=1,
                end_column=1,
            )
            ws.cell(row=start_row, column=1).alignment = Alignment(vertical="top")
            ws.cell(row=start_row, column=1).border = top_border
            prev_province = province
            start_row = current_row

        for city, districts in cities.items():
            #  print(f"\tBegin {city}, row {current_row}")
            dist_lines = wrap_districts(districts)
            ws.append([elide(province, 5), elide(city, 7), dist_lines[0]])
            for line in dist_lines[1:]:
                ws.append(["", "", line])
            for col in range(1, 4):
                ws.cell(row=current_row, column=col).border = top_border
            current_row += len(dist_lines)

    compressed_filepath = filepath.replace(".xlsx", "（压缩）.xlsx")
    wb.save(compressed_filepath)
    return compressed_filepath


if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("filepath")
    args = parser.parse_args()
    compress(args.filepath)