From 3b900d691b44a9ff1fd2c69043e2b624eded71bd Mon Sep 17 00:00:00 2001
From: Frederick Yin <fkfd@fkfd.me>
Date: Thu, 8 Sep 2022 13:50:49 +0800
Subject: Sheet compressor

---
 compress.py | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)
 create mode 100755 compress.py

diff --git a/compress.py b/compress.py
new file mode 100755
index 0000000..9e92790
--- /dev/null
+++ b/compress.py
@@ -0,0 +1,91 @@
+#!/usr/bin/python
+from openpyxl import Workbook, load_workbook
+from openpyxl.styles import Alignment, Border, Side
+from argparse import ArgumentParser
+from datetime import datetime
+
+
+def elide(string, length=5):
+    if len(string) > length:
+        return string[: (length - 1)] + "…"
+    return string
+
+
+def wrap_districts(districts, length=32):
+    lines = [""]
+    for dist in districts:
+        if len(lines[-1]) + len(dist) > length:
+            lines.append("")
+        lines[-1] += " " + dist
+    return lines
+
+
+def compress(filepath):
+    regions = {}  # all high/middle risk regions go here
+    # open xlsx file
+    xlsx = load_workbook(filepath)
+    lst = xlsx.worksheets[0]  # "列表"
+    # B3:Dxxxx
+    for province, city, district in lst.iter_rows(
+        min_row=3, max_row=None, min_col=2, max_col=4, values_only=True
+    ):
+        if province not in regions:
+            regions[province] = {}
+        if city not in regions[province]:
+            regions[province][city] = set()
+        regions[province][city].add(district)
+    # extract title
+    title = lst.cell(row=1, column=1).value.split("\n")[0]
+
+    # create new file
+    wb = Workbook()
+    ws = wb.active
+    ws.title = "压缩列表"
+    ws.set_printer_settings(ws.PAPERSIZE_A4, ws.ORIENTATION_PORTRAIT)
+    ws.column_dimensions["A"].width = 10
+    ws.column_dimensions["B"].width = 15
+    ws.column_dimensions["C"].width = 50
+    # truncate microseconds from datetime: YYYY-MM-DDThh:mm:ss
+    ws.append([title + "\t生成时间：" + datetime.now().isoformat()[:19]])
+    ws.merge_cells(start_row=1, end_row=1, start_column=1, end_column=3)
+
+    prev_province = ""
+    start_row = 2
+    current_row = 2
+    top_border = Border(top=Side(style="thin", color="FF000000"))
+    for province, cities in regions.items():
+        # if we see another province, merge first column of previous one
+        #  print(f"Begin {province}, row {current_row}")
+        if not prev_province:
+            prev_province = province
+        if province != prev_province:
+            #  print(f"Merge rows {start_row} to {current_row - 1}")
+            ws.merge_cells(
+                start_row=start_row,
+                end_row=current_row - 1,
+                start_column=1,
+                end_column=1,
+            )
+            ws.cell(row=start_row, column=1).alignment = Alignment(vertical="top")
+            ws.cell(row=start_row, column=1).border = top_border
+            prev_province = province
+            start_row = current_row
+
+        for city, districts in cities.items():
+            #  print(f"\tBegin {city}, row {current_row}")
+            dist_lines = wrap_districts(districts)
+            ws.append([elide(province, 5), elide(city, 7), dist_lines[0]])
+            for line in dist_lines[1:]:
+                ws.append(["", "", line])
+            for col in range(1, 4):
+                ws.cell(row=current_row, column=col).border = top_border
+            current_row += len(dist_lines)
+
+    wb.save(filepath.replace(".xlsx", "（压缩）.xlsx"))
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("filepath")
+    args = parser.parse_args()
+    compress(args.filepath)
-- 
cgit v1.2.3