#!/usr/bin/python import sys import json from urllib import parse as urlparse from bs4 import BeautifulSoup from datetime import datetime """ Unified Atom feed generator for fkfd.me and fkfd.me/comics. Each time an entry is added, the oldest entry is automatically purged when the list is at maximum capacity before atom.xml is generated. """ MAX_ENTRIES = 5 COMICS_DIR = "/home/fkfd/p/fkfd" BLOG_DIR = "/home/fkfd/p/blog" def utc_date() -> str: # UTC datetime: YYYY-MM-DDThh:mm:ssZ return datetime.utcnow().replace(microsecond=0).isoformat() + "Z" def add_entry(entry: dict, dryrun=False): f = open("atom.xml") xml = f.read() f.close() soup = BeautifulSoup(xml, "xml") entries = soup.find_all("entry") if len(entries) >= MAX_ENTRIES: # truncate to (MAX_ENTRIES - 1) entries to make room for entry_to_remove in entries[(MAX_ENTRIES - 1) :]: entry_to_remove.decompose() new_entry = soup.new_tag("entry") new_id = soup.new_tag("id") new_id.string = entry["url"] new_title = soup.new_tag("title") new_title.string = entry["title"] new_updated = soup.new_tag("updated") new_updated.string = entry["date"] new_content = soup.new_tag("content", type="html") new_content.string = entry["html"] new_link = soup.new_tag("link", href=entry["url"], rel="alternate") for tag in [new_id, new_title, new_updated, new_content, new_link]: new_entry.append(tag) soup.find("entry").insert_before(new_entry) soup.find("updated").string = utc_date() if dryrun: print(soup.prettify()) else: f = open("atom.xml", "w") f.write(str(soup)) f.close() def add_comic(num, dryrun=False): try: f = open(COMICS_DIR + f"/html/{num}/info.0.json") except FileNotFoundError: print(f"info.0.json for comic {num} not found. Skip.") return info = json.load(f) f.close() add_entry( { "url": f"https://fkfd.me/comics/{info['num']}", "title": "Comic: " + info["title"], "date": utc_date(), "html": '

{1}

{2}

'.format( info["img"], info["transcript"], info["alt"] ), }, dryrun=dryrun, ) def add_blogpost(path, dryrun=False): # example path: "shitpost/flat_egg", "projects/byseekel" path = path.strip("/") try: f = open(BLOG_DIR + f"/site/{path}/index.html") except FileNotFoundError: print(f"index.html for blogpost {path} not found. Skip.") return html = f.read() f.close() soup = BeautifulSoup(html, "html.parser") main = soup.find(role="main") url = f"https://fkfd.me/{path}/" # trailing slash necessary # convert all relative paths to absolute URLs for img in main.find_all("img"): img["src"] = urlparse.urljoin(url, img["src"]) for a in main.find_all("a"): try: if "headerlink" in a["class"]: # remove "ΒΆ" a.decompose() continue a["href"] = urlparse.urljoin(url, a["href"]) except KeyError: # just a regular link, no class attribute pass add_entry( { "url": url, "title": soup.title.text, "date": utc_date(), "html": main.decode_contents(), # inner HTML of main container }, dryrun=dryrun, ) if __name__ == "__main__": args = sys.argv if len(args) == 1: print(f"Usage: {args[0]} [|] [--dry]") exit(1) dryrun = len(args) >= 3 and args[2] == "--dry" if args[1].isdecimal(): add_comic(args[1], dryrun) else: add_blogpost(args[1], dryrun)