|
|
|
@ -44,11 +44,18 @@ for i in soup.find_all('article'):
|
|
|
|
|
|
|
|
|
|
#print(link)
|
|
|
|
|
|
|
|
|
|
date = i.find("time", {"class": "dt-published"})["datetime"].split(".")[0]
|
|
|
|
|
tz = i.find("time", {"class": "dt-published"})["datetime"].split(".")[1].split(" ")[1]
|
|
|
|
|
dtpublished = i.find("time", {"class": "dt-published"})["datetime"]
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
date = dtpublished.split(".")[0]
|
|
|
|
|
tz = dtpublished.split(".")[1].split(" ")[1]
|
|
|
|
|
except:
|
|
|
|
|
date = dtpublished.split(" ")[0]+" "+dtpublished.split(" ")[1]
|
|
|
|
|
tz = dtpublished.split(" ")[2]
|
|
|
|
|
|
|
|
|
|
time_in_datetime = datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
|
|
|
|
|
|
|
|
|
|
pubdate = time_in_datetime.strftime('%a, %d %b %Y %H:%M:%S')+" "+tz
|
|
|
|
|
#Sat, 01 Feb 2020 17:05:25 +0000
|
|
|
|
|
|
|
|
|
|
content = ""
|
|
|
|
|
for p in i.find_all('p'):
|
|
|
|
|