Remove feedparser
This commit is contained in:
		
							parent
							
								
									1111e0f115
								
							
						
					
					
						commit
						da7c5618e2
					
				| 
						 | 
				
			
			@ -3,7 +3,7 @@ image: python:3.9-alpine
 | 
			
		|||
pages:
 | 
			
		||||
    stage: deploy
 | 
			
		||||
    script:
 | 
			
		||||
        - pip install -r requirements.txt && cd public/sbbs && python3 ../../gensbbsrss.py
 | 
			
		||||
        - cd public/sbbs && python3 ../../gensbbsrss.py
 | 
			
		||||
    artifacts:
 | 
			
		||||
        paths:
 | 
			
		||||
            - public
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,36 +1,85 @@
 | 
			
		|||
import sys
 | 
			
		||||
import html
 | 
			
		||||
import shlex
 | 
			
		||||
from urllib.parse import urlparse, urlunparse
 | 
			
		||||
import feedparser
 | 
			
		||||
from xml.etree import ElementTree
 | 
			
		||||
from urllib.request import urlopen, Request
 | 
			
		||||
from urllib.parse import urlparse, urlunparse, urljoin
 | 
			
		||||
 | 
			
		||||
def find_ignore_ns(element, tag):
 | 
			
		||||
    for i in element.iter():
 | 
			
		||||
        if i.tag == tag:
 | 
			
		||||
            return i
 | 
			
		||||
        if i.tag in ('item', 'entry') or i.tag.endswith('}item') or i.tag.endswith('}entry'):
 | 
			
		||||
            break
 | 
			
		||||
    for i in element.iter():
 | 
			
		||||
        if i.tag.endswith('}' + tag):
 | 
			
		||||
            return i
 | 
			
		||||
        if i.tag in ('item', 'entry') or i.tag.endswith('}item') or i.tag.endswith('}entry'):
 | 
			
		||||
            break
 | 
			
		||||
 | 
			
		||||
is_link = lambda i: i.attrib.get('rel') != 'self' and i.attrib.get('type', 'text/html') == 'text/html'
 | 
			
		||||
 | 
			
		||||
def find_link(element):
 | 
			
		||||
    for i in element.iter():
 | 
			
		||||
        if i.tag == 'link' and is_link(i):
 | 
			
		||||
            return i
 | 
			
		||||
        if i.tag in ('item', 'entry') or i.tag.endswith('}item') or i.tag.endswith('}entry'):
 | 
			
		||||
            break
 | 
			
		||||
    for i in element.iter():
 | 
			
		||||
        if i.tag.endswith('}link') and is_link(i):
 | 
			
		||||
            return i
 | 
			
		||||
        if i.tag in ('item', 'entry') or i.tag.endswith('}item') or i.tag.endswith('}entry'):
 | 
			
		||||
            break
 | 
			
		||||
 | 
			
		||||
with open(sys.argv[1]) as file, open(sys.argv[2], 'w+') as out:
 | 
			
		||||
    for i in file:
 | 
			
		||||
        inp = shlex.split(i, comments=True)
 | 
			
		||||
        if 'blog' in inp[1:]:
 | 
			
		||||
            feedurl = inp[0]
 | 
			
		||||
            try:
 | 
			
		||||
                d = feedparser.parse(inp[0])
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                print(inp[0], 'raised', e, file=sys.stderr)
 | 
			
		||||
                with urlopen(Request(feedurl, headers={'User-Agent': "stop being so fucking obsessed that i'm using urllib ffs"}), timeout=60) as resp:
 | 
			
		||||
                    if resp.status != 200:
 | 
			
		||||
                        print(feedurl, 'returned', resp.status, file=sys.stderr)
 | 
			
		||||
                        continue
 | 
			
		||||
            feedurl = d['href']
 | 
			
		||||
            if not d['entries']:
 | 
			
		||||
                    tree = ElementTree.ElementTree().parse(resp)
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                print(feedurl, 'raised', e, file=sys.stderr)
 | 
			
		||||
                continue
 | 
			
		||||
            if tree.tag not in ('rss', 'feed') and not tree.tag.endswith('}rss') and not tree.tag.endswith('}feed'):
 | 
			
		||||
                print(f'{feedurl} is not a feed (root tag is {tree.tag})', file=sys.stderr)
 | 
			
		||||
                continue
 | 
			
		||||
            channel = find_ignore_ns(tree, 'channel')
 | 
			
		||||
            if channel is None:
 | 
			
		||||
                channel = tree
 | 
			
		||||
            if find_ignore_ns(channel, 'item') is None and find_ignore_ns(channel, 'entry') is None:
 | 
			
		||||
                print(feedurl, 'has no entries, skipping', file=sys.stderr)
 | 
			
		||||
                continue
 | 
			
		||||
            if 'links' in d['feed']:
 | 
			
		||||
                url = next(filter(lambda i: i['type'] == 'text/html', d['feed']['links']))['href']
 | 
			
		||||
            else:
 | 
			
		||||
            url = find_link(channel)
 | 
			
		||||
            if url is None:
 | 
			
		||||
                url = list(urlparse(feedurl))
 | 
			
		||||
                url[2] = ''
 | 
			
		||||
                url = urlunparse(url)
 | 
			
		||||
                print(f'No mention of main page on {feedurl}, please see {url} or enter main page url: ', file=sys.stderr, end='', flush=True)
 | 
			
		||||
                url = input().strip() or url
 | 
			
		||||
            desc = d['feed'].get('description')
 | 
			
		||||
            text = f'<li><b><a href="{html.escape(url)}">{html.escape(d["feed"]["title"])}</a> (<a href="{html.escape(feedurl)}">feed</a>)'
 | 
			
		||||
            if desc := d['feed'].get('description'):
 | 
			
		||||
            else:
 | 
			
		||||
                url = url.text or url.attrib['href']
 | 
			
		||||
            text = f'<li><b><a href="{html.escape(urljoin(feedurl, url))}">{html.escape(find_ignore_ns(channel, "title").text)}</a> (<a href="{html.escape(feedurl)}">feed</a>)'
 | 
			
		||||
            desc = find_ignore_ns(channel, 'description')
 | 
			
		||||
            if desc is not None and desc.text:
 | 
			
		||||
                if desc := desc.text.strip():
 | 
			
		||||
                    text += f':</b> {html.escape(desc)}'
 | 
			
		||||
                else:
 | 
			
		||||
                    text += '</b>'
 | 
			
		||||
            text += '</li>\n'
 | 
			
		||||
            print(text, end='')
 | 
			
		||||
            else:
 | 
			
		||||
                desc = find_ignore_ns(channel, 'subtitle')
 | 
			
		||||
                if desc is not None and desc.text:
 | 
			
		||||
                    if desc := desc.text.strip():
 | 
			
		||||
                        text += f':</b> {html.escape(desc)}'
 | 
			
		||||
                    else:
 | 
			
		||||
                        text += '</b>'
 | 
			
		||||
                else:
 | 
			
		||||
                    text += '</b>'
 | 
			
		||||
            print(text, '</li>', sep='')
 | 
			
		||||
            out.write(text)
 | 
			
		||||
            out.write('</li>\n')
 | 
			
		||||
    out.flush()
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1 +0,0 @@
 | 
			
		|||
feedparser
 | 
			
		||||
		Loading…
	
		Reference in New Issue