blankx.gitlab.io/gensbbsrss.py

42 lines
1.9 KiB
Python

import os
from io import BytesIO
from datetime import datetime
from xml.etree import ElementTree
tree = ElementTree.ElementTree().parse('index.html')
sbbsbase = tree.find('./head/link[@rel="canonical"]').attrib['href']
feed = ElementTree.Element('rss', version='2.0')
channel = ElementTree.SubElement(feed, 'channel')
title = ElementTree.SubElement(channel, 'title')
title.text = 'software being broken showcase'
link = ElementTree.SubElement(channel, 'link')
link.text = sbbsbase
description = ElementTree.SubElement(channel, 'description')
description.text = 'just a showcase of software being broken :)'
language = ElementTree.SubElement(channel, 'language')
language.text = 'en-us'
file = BytesIO()
def parse_file(i):
tree = ElementTree.ElementTree().parse(i)
item = ElementTree.Element('item')
title = ElementTree.SubElement(item, 'title')
title.text = tree.find('body').find('h1').text
link = ElementTree.SubElement(item, 'link')
link.text = tree.find('./head/link[@rel="canonical"]').attrib['href']
guid = ElementTree.SubElement(item, 'guid')
guid.text = f'sbbs/{os.path.splitext(i)[0]}'
description = ElementTree.SubElement(item, 'description')
ElementTree.ElementTree(tree.find('./body/div[@class="content"]')).write(file, 'utf-8', method='html')
description.text = file.getvalue().decode()
file.seek(0)
file.truncate(0)
return (datetime.fromisoformat(tree.find('./head/meta[@name="created"]').attrib['content']), item)
for date, i in sorted(map(parse_file, filter(lambda i: i not in ('index.html', 'index.xml'), os.listdir('.'))), reverse=True, key=lambda i: i[0]):
# http://johnbokma.com/blog/2019/10/09/rfc-822-and-rfc-3339-dates-in-python.html
ctime = date.ctime()
pubdate = ElementTree.SubElement(i, 'pubDate')
pubdate.text = f"{ctime[0:3]}, {date.day:02d} {ctime[4:7]} {date.strftime(' %Y %H:%M:%S %z')}"
channel.append(i)
ElementTree.ElementTree(feed).write('index.xml', 'utf-8', True)