fucking date formats

master
alciregi 3 years ago
parent 0720f292e4
commit fa2f5330bf
  1. 1
      .gitignore
  2. 34
      writefreelytag2rss.py

1
.gitignore vendored

@ -1,3 +1,4 @@
/atom.xml
/rss.xml
/scrape.py
/config.cfg

@ -22,27 +22,27 @@ fg = FeedGenerator()
fg.id(conf['url'])
fg.title(conf['feedtitle'])
fg.subtitle(conf['description'])
#fg.link(href=selflink, rel='self' )
fg.author( {'name': conf['authorname'],'email': conf['authormail']} )
fg.link(href=conf['url'], rel='alternate' )
# fg.link(href=selflink, rel='self' )
fg.author({'name': conf['authorname'], 'email': conf['authormail']})
fg.link(href=conf['url'], rel='alternate')
#fg.logo('http://ex.com/logo.jpg')
#fg.subtitle('This is a cool feed!')
#fg.language('en')
# fg.logo('http://ex.com/logo.jpg')
# fg.subtitle('This is a cool feed!')
# fg.language('en')
page = requests.get(conf['url'])
soup = BeautifulSoup(page.content,features="lxml")
soup = BeautifulSoup(page.content, features="lxml")
for i in soup.find_all('article'):
title = i.find("h2", {"class": "post-title"}).find("a").contents[0]
#print(title)
# print(title)
link = i.find("a", href=True)['href']
#print(link)
# print(link)
dtpublished = i.find("time", {"class": "dt-published"})["datetime"]
@ -50,19 +50,19 @@ for i in soup.find_all('article'):
date = dtpublished.split(".")[0]
tz = dtpublished.split(".")[1].split(" ")[1]
except:
date = dtpublished.split(" ")[0]+" "+dtpublished.split(" ")[1]
date = dtpublished.split(" ")[0] + " " + dtpublished.split(" ")[1]
tz = dtpublished.split(" ")[2]
time_in_datetime = datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
pubdate = time_in_datetime.strftime('%a, %d %b %Y %H:%M:%S')+" "+tz
pubdate = time_in_datetime.strftime('%a, %d %b %Y %H:%M:%S') + " " + tz
content = ""
for p in i.find_all('p'):
#print(p.get_text())
content=content+p.get_text()
# print(p.get_text())
content = content + p.get_text()
#print("-------",content)
# print("-------",content)
fe = fg.add_entry()
fe.id(link)
@ -71,8 +71,8 @@ for i in soup.find_all('article'):
fe.link(href=link)
fe.content(content)
atomfeed = fg.atom_str(pretty=True) # Get the ATOM feed as string
rssfeed = fg.rss_str(pretty=True) # Get the RSS feed as string
atomfeed = fg.atom_str(pretty=True) # Get the ATOM feed as string
rssfeed = fg.rss_str(pretty=True) # Get the RSS feed as string
# fg.atom_file(conf['destpath'], pretty=True) # Write the ATOM feed to a file
fg.rss_file(conf['destpath'], pretty=True) # Write the RSS feed to a file
fg.rss_file(conf['destpath'], pretty=True) # Write the RSS feed to a file

Loading…
Cancel
Save