From 745d6b74b094037404728e44d3e4e2325750dfdf Mon Sep 17 00:00:00 2001 From: Frederick Yin Date: Mon, 27 Nov 2023 19:16:47 -0500 Subject: Merge atom script --- .gitignore | 1 + atom/atom.py | 127 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ atom/scp | 2 + 3 files changed, 130 insertions(+) create mode 100755 atom/atom.py create mode 100755 atom/scp diff --git a/.gitignore b/.gitignore index d7831fc..35038e9 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ site/ docs/*/img/*/fullsize/ wip/ *~ +atom.xml diff --git a/atom/atom.py b/atom/atom.py new file mode 100755 index 0000000..3a77d5f --- /dev/null +++ b/atom/atom.py @@ -0,0 +1,127 @@ +#!/usr/bin/python +import sys +import json +from urllib import parse as urlparse +from bs4 import BeautifulSoup +from datetime import datetime + +""" +Unified Atom feed generator for fkfd.me and fkfd.me/comics. + +Each time an entry is added, the oldest entry is automatically purged when +the list is at maximum capacity before atom.xml is generated. +""" + +MAX_ENTRIES = 5 + +COMICS_DIR = "/home/fkfd/p/fkfd" +BLOG_DIR = "/home/fkfd/p/blog" + + +def utc_date() -> str: + # UTC datetime: YYYY-MM-DDThh:mm:ssZ + return datetime.utcnow().replace(microsecond=0).isoformat() + "Z" + + +def add_entry(entry: dict, dryrun=False): + f = open("atom.xml") + xml = f.read() + f.close() + soup = BeautifulSoup(xml, "xml") + entries = soup.find_all("entry") + if len(entries) >= MAX_ENTRIES: + # truncate to (MAX_ENTRIES - 1) entries to make room + for entry_to_remove in entries[(MAX_ENTRIES - 1) :]: + entry_to_remove.decompose() + + new_entry = soup.new_tag("entry") + new_id = soup.new_tag("id") + new_id.string = entry["url"] + new_title = soup.new_tag("title") + new_title.string = entry["title"] + new_updated = soup.new_tag("updated") + new_updated.string = entry["date"] + new_content = soup.new_tag("content", type="html") + new_content.string = entry["html"] + new_link = soup.new_tag("link", href=entry["url"], rel="alternate") + + for tag in [new_id, new_title, new_updated, new_content, new_link]: + new_entry.append(tag) + + soup.find("entry").insert_before(new_entry) + soup.find("updated").string = utc_date() + + if dryrun: + print(soup.prettify()) + else: + f = open("atom.xml", "w") + f.write(str(soup)) + f.close() + + +def add_comic(num, dryrun=False): + try: + f = open(COMICS_DIR + f"/html/{num}/info.0.json") + except FileNotFoundError: + print(f"info.0.json for comic {num} not found. Skip.") + return + + info = json.load(f) + f.close() + + add_entry( + { + "url": f"https://fkfd.me/comics/{info['num']}", + "title": "Comic: " + info["title"], + "date": utc_date(), + "html": '

{1}

'.format(info["img"], info["alt"]), + }, + dryrun=dryrun, + ) + + +def add_blogpost(path, dryrun=False): + # example path: "shitpost/flat_egg", "projects/byseekel" + try: + f = open(BLOG_DIR + f"/site/{path}/index.html") + except FileNotFoundError: + print(f"index.html for blogpost {path} not found. Skip.") + return + + html = f.read() + f.close() + soup = BeautifulSoup(html, "html.parser") + main = soup.find(role="main") + url = f"https://fkfd.me/{path.strip('/')}/" # trailing slash necessary + + # convert all relative paths to absolute URLs + for img in main.find_all("img"): + img["src"] = urlparse.urljoin(url, img["src"]) + + for a in main.find_all("a"): + a["href"] = urlparse.urljoin(url, a["href"]) + + add_entry( + { + "url": url, + "title": soup.title.text, + "date": utc_date(), + "html": main.decode_contents(), # inner HTML of main container + }, + dryrun=dryrun, + ) + + +if __name__ == "__main__": + args = sys.argv + + if len(args) == 1: + print(f"Usage: {args[0]} [|] [--dry]") + exit(1) + + dryrun = len(args) >= 3 and args[2] == "--dry" + + if args[1].isdecimal(): + add_comic(args[1], dryrun) + else: + add_blogpost(args[1], dryrun) diff --git a/atom/scp b/atom/scp new file mode 100755 index 0000000..8e1d84f --- /dev/null +++ b/atom/scp @@ -0,0 +1,2 @@ +#!/usr/bin/bash +scp atom.xml www@fkfd.me:www/feed/ -- cgit v1.2.3