summaryrefslogtreecommitdiff
path: root/atom/atom.py
diff options
context:
space:
mode:
Diffstat (limited to 'atom/atom.py')
-rwxr-xr-xatom/atom.py136
1 files changed, 136 insertions, 0 deletions
diff --git a/atom/atom.py b/atom/atom.py
new file mode 100755
index 0000000..b240378
--- /dev/null
+++ b/atom/atom.py
@@ -0,0 +1,136 @@
+#!/usr/bin/python
+import sys
+import json
+from urllib import parse as urlparse
+from bs4 import BeautifulSoup
+from datetime import datetime
+
+"""
+Unified Atom feed generator for fkfd.me and fkfd.me/comics.
+
+Each time an entry is added, the oldest entry is automatically purged when
+the list is at maximum capacity before atom.xml is generated.
+"""
+
+MAX_ENTRIES = 5
+
+COMICS_DIR = "/home/fkfd/p/fkfd"
+BLOG_DIR = "/home/fkfd/p/blog"
+
+
+def utc_date() -> str:
+ # UTC datetime: YYYY-MM-DDThh:mm:ssZ
+ return datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
+
+
+def add_entry(entry: dict, dryrun=False):
+ f = open("atom.xml")
+ xml = f.read()
+ f.close()
+ soup = BeautifulSoup(xml, "xml")
+ entries = soup.find_all("entry")
+ if len(entries) >= MAX_ENTRIES:
+ # truncate to (MAX_ENTRIES - 1) entries to make room
+ for entry_to_remove in entries[(MAX_ENTRIES - 1) :]:
+ entry_to_remove.decompose()
+
+ new_entry = soup.new_tag("entry")
+ new_id = soup.new_tag("id")
+ new_id.string = entry["url"]
+ new_title = soup.new_tag("title")
+ new_title.string = entry["title"]
+ new_updated = soup.new_tag("updated")
+ new_updated.string = entry["date"]
+ new_content = soup.new_tag("content", type="html")
+ new_content.string = entry["html"]
+ new_link = soup.new_tag("link", href=entry["url"], rel="alternate")
+
+ for tag in [new_id, new_title, new_updated, new_content, new_link]:
+ new_entry.append(tag)
+
+ soup.find("entry").insert_before(new_entry)
+ soup.find("updated").string = utc_date()
+
+ if dryrun:
+ print(soup.prettify())
+ else:
+ f = open("atom.xml", "w")
+ f.write(str(soup))
+ f.close()
+
+
+def add_comic(num, dryrun=False):
+ try:
+ f = open(COMICS_DIR + f"/html/{num}/info.0.json")
+ except FileNotFoundError:
+ print(f"info.0.json for comic {num} not found. Skip.")
+ return
+
+ info = json.load(f)
+ f.close()
+
+ add_entry(
+ {
+ "url": f"https://fkfd.me/comics/{info['num']}",
+ "title": "Comic: " + info["title"],
+ "date": utc_date(),
+ "html": '<p><img src="{0}" alt="{1}"></p><p>{2}</p>'.format(
+ info["img"], info["transcript"], info["alt"]
+ ),
+ },
+ dryrun=dryrun,
+ )
+
+
+def add_blogpost(path, dryrun=False):
+ # example path: "shitpost/flat_egg", "projects/byseekel"
+ path = path.strip("/")
+ try:
+ f = open(BLOG_DIR + f"/site/{path}/index.html")
+ except FileNotFoundError:
+ print(f"index.html for blogpost {path} not found. Skip.")
+ return
+
+ html = f.read()
+ f.close()
+ soup = BeautifulSoup(html, "html.parser")
+ main = soup.find(role="main")
+ url = f"https://fkfd.me/{path}/" # trailing slash necessary
+
+ # convert all relative paths to absolute URLs
+ for img in main.find_all("img"):
+ img["src"] = urlparse.urljoin(url, img["src"])
+
+ for a in main.find_all("a"):
+ try:
+ if "headerlink" in a["class"]: # remove "ΒΆ"
+ a.decompose()
+ continue
+ a["href"] = urlparse.urljoin(url, a["href"])
+ except KeyError: # just a regular link, no class attribute
+ pass
+
+ add_entry(
+ {
+ "url": url,
+ "title": soup.title.text,
+ "date": utc_date(),
+ "html": main.decode_contents(), # inner HTML of main container
+ },
+ dryrun=dryrun,
+ )
+
+
+if __name__ == "__main__":
+ args = sys.argv
+
+ if len(args) == 1:
+ print(f"Usage: {args[0]} [<blogpath>|<comicnum>] [--dry]")
+ exit(1)
+
+ dryrun = len(args) >= 3 and args[2] == "--dry"
+
+ if args[1].isdecimal():
+ add_comic(args[1], dryrun)
+ else:
+ add_blogpost(args[1], dryrun)