Remove beautifulsoup4

This commit is contained in:
blank X 2021-07-21 15:14:30 +07:00
parent 17e046bf2f
commit e7bd1c80e5
Signed by: blankie
GPG Key ID: CC15FC822C7F61F5
2 changed files with 13 additions and 12 deletions

View File

@ -1,11 +1,10 @@
import os import os
from io import BytesIO
from datetime import datetime from datetime import datetime
from xml.etree import ElementTree from xml.etree import ElementTree
from bs4 import BeautifulSoup
with open('index.html') as file: tree = ElementTree.ElementTree().parse('index.html')
soup = BeautifulSoup(file.read()) sbbsbase = tree.find('./head/link[@rel="canonical"]').attrib['href']
sbbsbase = soup.find('link', rel='canonical')['href']
feed = ElementTree.Element('rss', version='2.0') feed = ElementTree.Element('rss', version='2.0')
channel = ElementTree.SubElement(feed, 'channel') channel = ElementTree.SubElement(feed, 'channel')
@ -17,23 +16,26 @@ description = ElementTree.SubElement(channel, 'description')
description.text = 'just a showcase of software being broken :)' description.text = 'just a showcase of software being broken :)'
language = ElementTree.SubElement(channel, 'language') language = ElementTree.SubElement(channel, 'language')
language.text = 'en-us' language.text = 'en-us'
file = BytesIO()
def parse_file(i): def parse_file(i):
with open(i) as file: tree = ElementTree.ElementTree().parse(i)
soup = BeautifulSoup(file.read())
item = ElementTree.Element('item') item = ElementTree.Element('item')
title = ElementTree.SubElement(item, 'title') title = ElementTree.SubElement(item, 'title')
title.text = soup.h1.string title.text = tree.find('body').find('h1').text
link = ElementTree.SubElement(item, 'link') link = ElementTree.SubElement(item, 'link')
link.text = soup.find('link', rel='canonical')['href'] link.text = tree.find('./head/link[@rel="canonical"]').attrib['href']
guid = ElementTree.SubElement(item, 'guid') guid = ElementTree.SubElement(item, 'guid')
guid.text = f'sbbs/{os.path.splitext(i)[0]}' guid.text = f'sbbs/{os.path.splitext(i)[0]}'
description = ElementTree.SubElement(item, 'description') description = ElementTree.SubElement(item, 'description')
description.text = str(soup.find('div', class_='content')) ElementTree.ElementTree(tree.find('./body/div[@class="content"]')).write(file, 'utf-8', method='html')
return (datetime.fromisoformat(soup.find('meta', {'name': 'created'})['content']), item) description.text = file.getvalue().decode()
file.seek(0)
file.truncate(0)
return (datetime.fromisoformat(tree.find('./head/meta[@name="created"]').attrib['content']), item)
for date, i in sorted(map(parse_file, filter(lambda i: i not in ('index.html', 'index.xml'), os.listdir('.'))), reverse=True, key=lambda i: i[0]): for date, i in sorted(map(parse_file, filter(lambda i: i not in ('index.html', 'index.xml'), os.listdir('.'))), reverse=True, key=lambda i: i[0]):
# http://johnbokma.com/blog/2019/10/09/rfc-822-and-rfc-3339-dates-in-python.html # http://johnbokma.com/blog/2019/10/09/rfc-822-and-rfc-3339-dates-in-python.html
ctime = date.ctime() ctime = date.ctime()
pubdate = ElementTree.SubElement(i, 'pubDate') pubdate = ElementTree.SubElement(i, 'pubDate')
pubdate.text = f"{ctime[0:3]}, {date.day:02d} {ctime[4:7]} {date.strftime(' %Y %H:%M:%S %z')}" pubdate.text = f"{ctime[0:3]}, {date.day:02d} {ctime[4:7]} {date.strftime(' %Y %H:%M:%S %z')}"
channel.append(i) channel.append(i)
ElementTree.ElementTree(feed).write('index.xml', 'utf-8') ElementTree.ElementTree(feed).write('index.xml', 'utf-8', True)

View File

@ -1,2 +1 @@
feedparser feedparser
beautifulsoup4