Merge atom script

author: Frederick Yin <fkfd@fkfd.me> 2023-11-27 19:16:47 -0500
committer: Frederick Yin <fkfd@fkfd.me> 2023-11-27 19:16:47 -0500
commit: 745d6b74b094037404728e44d3e4e2325750dfdf (patch)
tree: 3f68365d238d155019ff4dd084a469a58a53ede1
parent: 44cb51bce28c07c2c3d2c80847637f8e1a611926 (diff)
3 files changed, 130 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index d7831fc..35038e9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ site/
 docs/*/img/*/fullsize/
 wip/
 *~
+atom.xml
diff --git a/atom/atom.py b/atom/atom.py
new file mode 100755
index 0000000..3a77d5f
--- /dev/null
+++ b/atom/atom.py
@@ -0,0 +1,127 @@
+#!/usr/bin/python
+import sys
+import json
+from urllib import parse as urlparse
+from bs4 import BeautifulSoup
+from datetime import datetime
+
+"""
+Unified Atom feed generator for fkfd.me and fkfd.me/comics.
+
+Each time an entry is added, the oldest entry is automatically purged when
+the list is at maximum capacity before atom.xml is generated.
+"""
+
+MAX_ENTRIES = 5
+
+COMICS_DIR = "/home/fkfd/p/fkfd"
+BLOG_DIR = "/home/fkfd/p/blog"
+
+
+def utc_date() -> str:
+    # UTC datetime: YYYY-MM-DDThh:mm:ssZ
+    return datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
+
+
+def add_entry(entry: dict, dryrun=False):
+    f = open("atom.xml")
+    xml = f.read()
+    f.close()
+    soup = BeautifulSoup(xml, "xml")
+    entries = soup.find_all("entry")
+    if len(entries) >= MAX_ENTRIES:
+        # truncate to (MAX_ENTRIES - 1) entries to make room
+        for entry_to_remove in entries[(MAX_ENTRIES - 1) :]:
+            entry_to_remove.decompose()
+
+    new_entry = soup.new_tag("entry")
+    new_id = soup.new_tag("id")
+    new_id.string = entry["url"]
+    new_title = soup.new_tag("title")
+    new_title.string = entry["title"]
+    new_updated = soup.new_tag("updated")
+    new_updated.string = entry["date"]
+    new_content = soup.new_tag("content", type="html")
+    new_content.string = entry["html"]
+    new_link = soup.new_tag("link", href=entry["url"], rel="alternate")
+
+    for tag in [new_id, new_title, new_updated, new_content, new_link]:
+        new_entry.append(tag)
+
+    soup.find("entry").insert_before(new_entry)
+    soup.find("updated").string = utc_date()
+
+    if dryrun:
+        print(soup.prettify())
+    else:
+        f = open("atom.xml", "w")
+        f.write(str(soup))
+        f.close()
+
+
+def add_comic(num, dryrun=False):
+    try:
+        f = open(COMICS_DIR + f"/html/{num}/info.0.json")
+    except FileNotFoundError:
+        print(f"info.0.json for comic {num} not found. Skip.")
+        return
+
+    info = json.load(f)
+    f.close()
+
+    add_entry(
+        {
+            "url": f"https://fkfd.me/comics/{info['num']}",
+            "title": "Comic: " + info["title"],
+            "date": utc_date(),
+            "html": '<p><img src="{0}"/></p><p>{1}</p>'.format(info["img"], info["alt"]),
+        },
+        dryrun=dryrun,
+    )
+
+
+def add_blogpost(path, dryrun=False):
+    # example path: "shitpost/flat_egg", "projects/byseekel"
+    try:
+        f = open(BLOG_DIR + f"/site/{path}/index.html")
+    except FileNotFoundError:
+        print(f"index.html for blogpost {path} not found. Skip.")
+        return
+
+    html = f.read()
+    f.close()
+    soup = BeautifulSoup(html, "html.parser")
+    main = soup.find(role="main")
+    url = f"https://fkfd.me/{path.strip('/')}/"  # trailing slash necessary
+
+    # convert all relative paths to absolute URLs
+    for img in main.find_all("img"):
+        img["src"] = urlparse.urljoin(url, img["src"])
+
+    for a in main.find_all("a"):
+        a["href"] = urlparse.urljoin(url, a["href"])
+
+    add_entry(
+        {
+            "url": url,
+            "title": soup.title.text,
+            "date": utc_date(),
+            "html": main.decode_contents(),  # inner HTML of main container
+        },
+        dryrun=dryrun,
+    )
+
+
+if __name__ == "__main__":
+    args = sys.argv
+
+    if len(args) == 1:
+        print(f"Usage: {args[0]} [<blogpath>|<comicnum>] [--dry]")
+        exit(1)
+
+    dryrun = len(args) >= 3 and args[2] == "--dry"
+
+    if args[1].isdecimal():
+        add_comic(args[1], dryrun)
+    else:
+        add_blogpost(args[1], dryrun)
diff --git a/atom/scp b/atom/scp
new file mode 100755
index 0000000..8e1d84f
--- /dev/null
+++ b/atom/scp
@@ -0,0 +1,2 @@
+#!/usr/bin/bash
+scp atom.xml www@fkfd.me:www/feed/
author	Frederick Yin <fkfd@fkfd.me>	2023-11-27 19:16:47 -0500
committer	Frederick Yin <fkfd@fkfd.me>	2023-11-27 19:16:47 -0500
commit	745d6b74b094037404728e44d3e4e2325750dfdf (patch)
tree	3f68365d238d155019ff4dd084a469a58a53ede1
parent	44cb51bce28c07c2c3d2c80847637f8e1a611926 (diff)