atom/atom.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

#!/usr/bin/python
import sys
import json
from urllib import parse as urlparse
from bs4 import BeautifulSoup
from datetime import datetime

"""
Unified Atom feed generator for fkfd.me and fkfd.me/comics.

Each time an entry is added, the oldest entry is automatically purged when
the list is at maximum capacity before atom.xml is generated.
"""

MAX_ENTRIES = 5

COMICS_DIR = "/home/fkfd/p/fkfd"
BLOG_DIR = "/home/fkfd/p/blog"


def utc_date() -> str:
    # UTC datetime: YYYY-MM-DDThh:mm:ssZ
    return datetime.utcnow().replace(microsecond=0).isoformat() + "Z"


def add_entry(entry: dict, dryrun=False):
    f = open("atom.xml")
    xml = f.read()
    f.close()
    soup = BeautifulSoup(xml, "xml")
    entries = soup.find_all("entry")
    if len(entries) >= MAX_ENTRIES:
        # truncate to (MAX_ENTRIES - 1) entries to make room
        for entry_to_remove in entries[(MAX_ENTRIES - 1) :]:
            entry_to_remove.decompose()

    new_entry = soup.new_tag("entry")
    new_id = soup.new_tag("id")
    new_id.string = entry["url"]
    new_title = soup.new_tag("title")
    new_title.string = entry["title"]
    new_updated = soup.new_tag("updated")
    new_updated.string = entry["date"]
    new_content = soup.new_tag("content", type="html")
    new_content.string = entry["html"]
    new_link = soup.new_tag("link", href=entry["url"], rel="alternate")

    for tag in [new_id, new_title, new_updated, new_content, new_link]:
        new_entry.append(tag)

    soup.find("entry").insert_before(new_entry)
    soup.find("updated").string = utc_date()

    if dryrun:
        print(soup.prettify())
    else:
        f = open("atom.xml", "w")
        f.write(str(soup))
        f.close()


def add_comic(num, dryrun=False):
    try:
        f = open(COMICS_DIR + f"/html/{num}/info.0.json")
    except FileNotFoundError:
        print(f"info.0.json for comic {num} not found. Skip.")
        return

    info = json.load(f)
    f.close()

    add_entry(
        {
            "url": f"https://fkfd.me/comics/{info['num']}",
            "title": "Comic: " + info["title"],
            "date": utc_date(),
            "html": '<p><img src="{0}"/></p><p>{1}</p>'.format(info["img"], info["alt"]),
        },
        dryrun=dryrun,
    )


def add_blogpost(path, dryrun=False):
    # example path: "shitpost/flat_egg", "projects/byseekel"
    try:
        f = open(BLOG_DIR + f"/site/{path}/index.html")
    except FileNotFoundError:
        print(f"index.html for blogpost {path} not found. Skip.")
        return

    html = f.read()
    f.close()
    soup = BeautifulSoup(html, "html.parser")
    main = soup.find(role="main")
    url = f"https://fkfd.me/{path.strip('/')}/"  # trailing slash necessary

    # convert all relative paths to absolute URLs
    for img in main.find_all("img"):
        img["src"] = urlparse.urljoin(url, img["src"])

    for a in main.find_all("a"):
        a["href"] = urlparse.urljoin(url, a["href"])

    add_entry(
        {
            "url": url,
            "title": soup.title.text,
            "date": utc_date(),
            "html": main.decode_contents(),  # inner HTML of main container
        },
        dryrun=dryrun,
    )


if __name__ == "__main__":
    args = sys.argv

    if len(args) == 1:
        print(f"Usage: {args[0]} [<blogpath>|<comicnum>] [--dry]")
        exit(1)

    dryrun = len(args) >= 3 and args[2] == "--dry"

    if args[1].isdecimal():
        add_comic(args[1], dryrun)
    else:
        add_blogpost(args[1], dryrun)