Remove beautifulsoup4
This commit is contained in:
parent
17e046bf2f
commit
e7bd1c80e5
|
@ -1,11 +1,10 @@
|
||||||
import os
|
import os
|
||||||
|
from io import BytesIO
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from xml.etree import ElementTree
|
from xml.etree import ElementTree
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
|
|
||||||
with open('index.html') as file:
|
tree = ElementTree.ElementTree().parse('index.html')
|
||||||
soup = BeautifulSoup(file.read())
|
sbbsbase = tree.find('./head/link[@rel="canonical"]').attrib['href']
|
||||||
sbbsbase = soup.find('link', rel='canonical')['href']
|
|
||||||
|
|
||||||
feed = ElementTree.Element('rss', version='2.0')
|
feed = ElementTree.Element('rss', version='2.0')
|
||||||
channel = ElementTree.SubElement(feed, 'channel')
|
channel = ElementTree.SubElement(feed, 'channel')
|
||||||
|
@ -17,23 +16,26 @@ description = ElementTree.SubElement(channel, 'description')
|
||||||
description.text = 'just a showcase of software being broken :)'
|
description.text = 'just a showcase of software being broken :)'
|
||||||
language = ElementTree.SubElement(channel, 'language')
|
language = ElementTree.SubElement(channel, 'language')
|
||||||
language.text = 'en-us'
|
language.text = 'en-us'
|
||||||
|
file = BytesIO()
|
||||||
def parse_file(i):
|
def parse_file(i):
|
||||||
with open(i) as file:
|
tree = ElementTree.ElementTree().parse(i)
|
||||||
soup = BeautifulSoup(file.read())
|
|
||||||
item = ElementTree.Element('item')
|
item = ElementTree.Element('item')
|
||||||
title = ElementTree.SubElement(item, 'title')
|
title = ElementTree.SubElement(item, 'title')
|
||||||
title.text = soup.h1.string
|
title.text = tree.find('body').find('h1').text
|
||||||
link = ElementTree.SubElement(item, 'link')
|
link = ElementTree.SubElement(item, 'link')
|
||||||
link.text = soup.find('link', rel='canonical')['href']
|
link.text = tree.find('./head/link[@rel="canonical"]').attrib['href']
|
||||||
guid = ElementTree.SubElement(item, 'guid')
|
guid = ElementTree.SubElement(item, 'guid')
|
||||||
guid.text = f'sbbs/{os.path.splitext(i)[0]}'
|
guid.text = f'sbbs/{os.path.splitext(i)[0]}'
|
||||||
description = ElementTree.SubElement(item, 'description')
|
description = ElementTree.SubElement(item, 'description')
|
||||||
description.text = str(soup.find('div', class_='content'))
|
ElementTree.ElementTree(tree.find('./body/div[@class="content"]')).write(file, 'utf-8', method='html')
|
||||||
return (datetime.fromisoformat(soup.find('meta', {'name': 'created'})['content']), item)
|
description.text = file.getvalue().decode()
|
||||||
|
file.seek(0)
|
||||||
|
file.truncate(0)
|
||||||
|
return (datetime.fromisoformat(tree.find('./head/meta[@name="created"]').attrib['content']), item)
|
||||||
for date, i in sorted(map(parse_file, filter(lambda i: i not in ('index.html', 'index.xml'), os.listdir('.'))), reverse=True, key=lambda i: i[0]):
|
for date, i in sorted(map(parse_file, filter(lambda i: i not in ('index.html', 'index.xml'), os.listdir('.'))), reverse=True, key=lambda i: i[0]):
|
||||||
# http://johnbokma.com/blog/2019/10/09/rfc-822-and-rfc-3339-dates-in-python.html
|
# http://johnbokma.com/blog/2019/10/09/rfc-822-and-rfc-3339-dates-in-python.html
|
||||||
ctime = date.ctime()
|
ctime = date.ctime()
|
||||||
pubdate = ElementTree.SubElement(i, 'pubDate')
|
pubdate = ElementTree.SubElement(i, 'pubDate')
|
||||||
pubdate.text = f"{ctime[0:3]}, {date.day:02d} {ctime[4:7]} {date.strftime(' %Y %H:%M:%S %z')}"
|
pubdate.text = f"{ctime[0:3]}, {date.day:02d} {ctime[4:7]} {date.strftime(' %Y %H:%M:%S %z')}"
|
||||||
channel.append(i)
|
channel.append(i)
|
||||||
ElementTree.ElementTree(feed).write('index.xml', 'utf-8')
|
ElementTree.ElementTree(feed).write('index.xml', 'utf-8', True)
|
||||||
|
|
|
@ -1,2 +1 @@
|
||||||
feedparser
|
feedparser
|
||||||
beautifulsoup4
|
|
||||||
|
|
Loading…
Reference in New Issue