atom/atom.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136

#!/usr/bin/python
import sys
import json
from urllib import parse as urlparse
from bs4 import BeautifulSoup
from datetime import datetime

"""
Unified Atom feed generator for fkfd.me and fkfd.me/comics.

Each time an entry is added, the oldest entry is automatically purged when
the list is at maximum capacity before atom.xml is generated.
"""

MAX_ENTRIES = 5

COMICS_DIR = "/home/fkfd/p/fkfd"
BLOG_DIR = "/home/fkfd/p/blog"


def utc_date() -> str:
    # UTC datetime: YYYY-MM-DDThh:mm:ssZ
    return datetime.utcnow().replace(microsecond=0).isoformat() + "Z"


def add_entry(entry: dict, dryrun=False):
    f = open("atom.xml")
    xml = f.read()
    f.close()
    soup = BeautifulSoup(xml, "xml")
    entries = soup.find_all("entry")
    if len(entries) >= MAX_ENTRIES:
        # truncate to (MAX_ENTRIES - 1) entries to make room
        for entry_to_remove in entries[(MAX_ENTRIES - 1) :]:
            entry_to_remove.decompose()

    new_entry = soup.new_tag("entry")
    new_id = soup.new_tag("id")
    new_id.string = entry["url"]
    new_title = soup.new_tag("title")
    new_title.string = entry["title"]
    new_updated = soup.new_tag("updated")
    new_updated.string = entry["date"]
    new_content = soup.new_tag("content", type="html")
    new_content.string = entry["html"]
    new_link = soup.new_tag("link", href=entry["url"], rel="alternate")

    for tag in [new_id, new_title, new_updated, new_content, new_link]:
        new_entry.append(tag)

    soup.find("entry").insert_before(new_entry)
    soup.find("updated").string = utc_date()

    if dryrun:
        print(soup.prettify())
    else:
        f = open("atom.xml", "w")
        f.write(str(soup))
        f.close()


def add_comic(num, dryrun=False):
    try:
        f = open(COMICS_DIR + f"/html/{num}/info.0.json")
    except FileNotFoundError:
        print(f"info.0.json for comic {num} not found. Skip.")
        return

    info = json.load(f)
    f.close()

    add_entry(
        {
            "url": f"https://fkfd.me/comics/{info['num']}",
            "title": "Comic: " + info["title"],
            "date": utc_date(),
            "html": '<p><img src="{0}" alt="{1}"></p><p>{2}</p>'.format(
                info["img"], info["transcript"], info["alt"]
            ),
        },
        dryrun=dryrun,
    )


def add_blogpost(path, dryrun=False):
    # example path: "shitpost/flat_egg", "projects/byseekel"
    path = path.strip("/")
    try:
        f = open(BLOG_DIR + f"/site/{path}/index.html")
    except FileNotFoundError:
        print(f"index.html for blogpost {path} not found. Skip.")
        return

    html = f.read()
    f.close()
    soup = BeautifulSoup(html, "html.parser")
    main = soup.find(role="main")
    url = f"https://fkfd.me/{path}/"  # trailing slash necessary

    # convert all relative paths to absolute URLs
    for img in main.find_all("img"):
        img["src"] = urlparse.urljoin(url, img["src"])

    for a in main.find_all("a"):
        try:
            if "headerlink" in a["class"]:  # remove "¶"
                a.decompose()
                continue
            a["href"] = urlparse.urljoin(url, a["href"])
        except KeyError: # just a regular link, no class attribute
            pass

    add_entry(
        {
            "url": url,
            "title": soup.title.text,
            "date": utc_date(),
            "html": main.decode_contents(),  # inner HTML of main container
        },
        dryrun=dryrun,
    )


if __name__ == "__main__":
    args = sys.argv

    if len(args) == 1:
        print(f"Usage: {args[0]} [<blogpath>|<comicnum>] [--dry]")
        exit(1)

    dryrun = len(args) >= 3 and args[2] == "--dry"

    if args[1].isdecimal():
        add_comic(args[1], dryrun)
    else:
        add_blogpost(args[1], dryrun)