You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

90 lines
4.0 KiB

import sys
import html
import shlex
from xml.etree import ElementTree
from urllib.request import urlopen, Request
from urllib.parse import urlparse, urlunparse, urljoin
def find_ignore_ns(element, tag):
for i in element.iter():
if i.tag == tag:
return i
if i.tag in ('item', 'entry') or i.tag.endswith('}item') or i.tag.endswith('}entry'):
break
for i in element.iter():
if i.tag.endswith('}' + tag):
return i
if i.tag in ('item', 'entry') or i.tag.endswith('}item') or i.tag.endswith('}entry'):
break
is_link = lambda i: i.attrib.get('rel') != 'self' and i.attrib.get('type', 'text/html') == 'text/html'
def find_link(element):
for i in element.iter():
if i.tag == 'link' and is_link(i):
return i
if i.tag in ('item', 'entry') or i.tag.endswith('}item') or i.tag.endswith('}entry'):
break
for i in element.iter():
if i.tag.endswith('}link') and is_link(i):
return i
if i.tag in ('item', 'entry') or i.tag.endswith('}item') or i.tag.endswith('}entry'):
break
with open(sys.argv[1]) as file, open(sys.argv[2], 'w+') as out:
for i in file:
inp = shlex.split(i, comments=True)
if 'blog' in inp[1:]:
feedurl = inp[0]
if feedurl.startswith('exec:~/.local/bin/konbata '):
text = f'<li><b><a href="{html.escape(feedurl[26:])}">{html.escape(inp[-1][1:])}</a> (<a href="{html.escape(feedurl[26:])}">feed</a>)</b></li>'
print(text)
out.write(text)
continue
try:
with urlopen(Request(feedurl, headers={'User-Agent': "stop being so fucking obsessed that i'm using urllib ffs"}), timeout=60) as resp:
if resp.status != 200:
print(feedurl, 'returned', resp.status, file=sys.stderr)
continue
tree = ElementTree.ElementTree().parse(resp)
except Exception as e:
print(feedurl, 'raised', e, file=sys.stderr)
continue
if tree.tag not in ('rss', 'feed') and not tree.tag.endswith('}rss') and not tree.tag.endswith('}feed'):
print(f'{feedurl} is not a feed (root tag is {tree.tag})', file=sys.stderr)
continue
channel = find_ignore_ns(tree, 'channel')
if channel is None:
channel = tree
if find_ignore_ns(channel, 'item') is None and find_ignore_ns(channel, 'entry') is None:
print(feedurl, 'has no entries, skipping', file=sys.stderr)
continue
url = find_link(channel)
if url is None:
url = list(urlparse(feedurl))
url[2] = ''
url = urlunparse(url)
print(f'No mention of main page on {feedurl}, please see {url} or enter main page url: ', file=sys.stderr, end='', flush=True)
url = input().strip() or url
else:
url = url.text or url.attrib['href']
text = f'<li><b><a href="{html.escape(urljoin(feedurl, url))}">{html.escape(find_ignore_ns(channel, "title").text)}</a> (<a href="{html.escape(feedurl)}">feed</a>)'
desc = find_ignore_ns(channel, 'description')
if desc is not None and desc.text:
if desc := desc.text.strip():
text += f':</b> {html.escape(desc)}'
else:
text += '</b>'
else:
desc = find_ignore_ns(channel, 'subtitle')
if desc is not None and desc.text:
if desc := desc.text.strip():
text += f':</b> {html.escape(desc)}'
else:
text += '</b>'
else:
text += '</b>'
print(text, '</li>', sep='')
out.write(text)
out.write('</li>\n')
out.flush()