import sys import html import shlex from xml.etree import ElementTree from urllib.request import urlopen, Request from urllib.parse import urlparse, urlunparse, urljoin def find_ignore_ns(element, tag): for i in element.iter(): if i.tag == tag: return i if i.tag in ('item', 'entry') or i.tag.endswith('}item') or i.tag.endswith('}entry'): break for i in element.iter(): if i.tag.endswith('}' + tag): return i if i.tag in ('item', 'entry') or i.tag.endswith('}item') or i.tag.endswith('}entry'): break is_link = lambda i: i.attrib.get('rel') != 'self' and i.attrib.get('type', 'text/html') == 'text/html' def find_link(element): for i in element.iter(): if i.tag == 'link' and is_link(i): return i if i.tag in ('item', 'entry') or i.tag.endswith('}item') or i.tag.endswith('}entry'): break for i in element.iter(): if i.tag.endswith('}link') and is_link(i): return i if i.tag in ('item', 'entry') or i.tag.endswith('}item') or i.tag.endswith('}entry'): break with open(sys.argv[1]) as file, open(sys.argv[2], 'w+') as out: for i in file: inp = shlex.split(i, comments=True) if 'blog' in inp[1:]: feedurl = inp[0] if feedurl.startswith('exec:~/.local/bin/konbata '): text = f'
  • {html.escape(inp[-1][1:])} (gemini, feed)
  • ' print(text) out.write(text) continue rss_bridge = urlparse(feedurl).hostname.startswith('rss.') try: with urlopen(Request(feedurl, headers={'User-Agent': "stop being so fucking obsessed that i'm using urllib ffs"}), timeout=60) as resp: if resp.status != 200: print(feedurl, 'returned', resp.status, file=sys.stderr) continue tree = ElementTree.ElementTree().parse(resp) except Exception as e: print(feedurl, 'raised', e, file=sys.stderr) continue if tree.tag not in ('rss', 'feed') and not tree.tag.endswith('}rss') and not tree.tag.endswith('}feed'): print(f'{feedurl} is not a feed (root tag is {tree.tag})', file=sys.stderr) continue channel = find_ignore_ns(tree, 'channel') if channel is None: channel = tree if find_ignore_ns(channel, 'item') is None and find_ignore_ns(channel, 'entry') is None: print(feedurl, 'has no entries, skipping', file=sys.stderr) continue url = find_link(channel) if url is None: url = list(urlparse(feedurl)) url[2] = '' url = urlunparse(url) print(f'No mention of main page on {feedurl}, please see {url} or enter main page url: ', file=sys.stderr, end='', flush=True) url = input().strip() or url else: url = url.text or url.attrib['href'] text = f'
  • {html.escape(find_ignore_ns(channel, "title").text)} (' if rss_bridge: text += 'auto-generated ' text += 'feed)' if not rss_bridge: desc = find_ignore_ns(channel, 'description') if desc is not None and desc.text: if desc := desc.text.strip(): text += f': {html.escape(desc)}' else: text += '' else: desc = find_ignore_ns(channel, 'subtitle') if desc is not None and desc.text: if desc := desc.text.strip(): text += f': {html.escape(desc)}' else: text += '' else: text += '' else: text += '' print(text, '
  • ', sep='') out.write(text) out.write('\n') out.flush()