fix for pleroma
This commit is contained in:
		
							parent
							
								
									a1324acfba
								
							
						
					
					
						commit
						eeba1c9066
					
				
							
								
								
									
										116
									
								
								main.py
								
								
								
								
							
							
						
						
									
										116
									
								
								main.py
								
								
								
								
							|  | @ -14,15 +14,15 @@ scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses"] | ||||||
| cfg = json.load(open('config.json', 'r')) | cfg = json.load(open('config.json', 'r')) | ||||||
| 
 | 
 | ||||||
| if os.path.exists("clientcred.secret"): | if os.path.exists("clientcred.secret"): | ||||||
|     print("Upgrading to new storage method") | 		print("Upgrading to new storage method") | ||||||
|     cc = open("clientcred.secret").read().split("\n") | 		cc = open("clientcred.secret").read().split("\n") | ||||||
|     cfg['client'] = { | 		cfg['client'] = { | ||||||
|         "id": cc[0], | 				"id": cc[0], | ||||||
|         "secret": cc[1] | 				"secret": cc[1] | ||||||
|     } | 		} | ||||||
|     cfg['secret'] = open("usercred.secret").read().rstrip("\n") | 		cfg['secret'] = open("usercred.secret").read().rstrip("\n") | ||||||
|     os.remove("clientcred.secret") | 		os.remove("clientcred.secret") | ||||||
|     os.remove("usercred.secret") | 		os.remove("usercred.secret") | ||||||
| 		 | 		 | ||||||
| 
 | 
 | ||||||
| if "client" not in cfg: | if "client" not in cfg: | ||||||
|  | @ -105,6 +105,26 @@ def handleCtrlC(signal, frame): | ||||||
| 
 | 
 | ||||||
| signal.signal(signal.SIGINT, handleCtrlC) | signal.signal(signal.SIGINT, handleCtrlC) | ||||||
| 
 | 
 | ||||||
|  | def get_toots_legacy(client, id): | ||||||
|  | 	i = 0 | ||||||
|  | 	toots = client.account_statuses(id) | ||||||
|  | 	while toots is not None and len(toots) > 0: | ||||||
|  | 		for toot in toots: | ||||||
|  | 			if toot.spoiler_text != "": continue | ||||||
|  | 			if toot.reblog is not None: continue | ||||||
|  | 			if toot.visibility not in ["public", "unlisted"]: continue | ||||||
|  | 			t = extract_toot(toot.content) | ||||||
|  | 			if t != None: | ||||||
|  | 				yield { | ||||||
|  | 					"toot": t, | ||||||
|  | 					"id": toot.id, | ||||||
|  | 					"uri": toot.uri | ||||||
|  | 				} | ||||||
|  | 			toots = client.fetch_next(toots) | ||||||
|  | 			i += 1 | ||||||
|  | 			if i%20 == 0: | ||||||
|  | 				print('.', end='', flush=True) | ||||||
|  | 
 | ||||||
| for f in following: | for f in following: | ||||||
| 	last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone() | 	last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone() | ||||||
| 	if last_toot != None: | 	if last_toot != None: | ||||||
|  | @ -114,7 +134,7 @@ for f in following: | ||||||
| 	print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot)) | 	print("Harvesting toots for user @{}, starting from {}".format(f.acct, last_toot)) | ||||||
| 
 | 
 | ||||||
| 	#find the user's activitypub outbox | 	#find the user's activitypub outbox | ||||||
| 	#print("WebFingering...") | 	print("WebFingering...") | ||||||
| 	instance = re.search(r"^.*@(.+)", f.acct) | 	instance = re.search(r"^.*@(.+)", f.acct) | ||||||
| 	if instance == None: | 	if instance == None: | ||||||
| 		instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1) | 		instance = re.search(r"https?:\/\/(.*)", cfg['site']).group(1) | ||||||
|  | @ -130,8 +150,12 @@ for f in following: | ||||||
| 		r = requests.get("https://{}/.well-known/host-meta".format(instance)) | 		r = requests.get("https://{}/.well-known/host-meta".format(instance)) | ||||||
| 		uri = re.search(r'template="([^"]+)"', r.text).group(1) | 		uri = re.search(r'template="([^"]+)"', r.text).group(1) | ||||||
| 		uri = uri.format(uri = "{}@{}".format(f.username, instance)) | 		uri = uri.format(uri = "{}@{}".format(f.username, instance)) | ||||||
| 		r = requests.get(uri) | 		r = requests.get(uri, headers={"Accept": "application/json"}) | ||||||
| 		uri = r.json()['aliases'][1] #TODO: find out if it's safe to rely on this | 		j = r.json() | ||||||
|  | 		if len(j['aliases']) == 1: #TODO: this is a hack on top of a hack, fix it | ||||||
|  | 			uri = j['aliases'][0] | ||||||
|  | 		else: | ||||||
|  | 			uri = j['aliases'][1] | ||||||
| 		uri = "{}/outbox?page=true&min_id={}".format(uri, last_toot) | 		uri = "{}/outbox?page=true&min_id={}".format(uri, last_toot) | ||||||
| 		r = requests.get(uri) | 		r = requests.get(uri) | ||||||
| 		j = r.json() | 		j = r.json() | ||||||
|  | @ -139,34 +163,54 @@ for f in following: | ||||||
| 		print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)") | 		print("oopsy woopsy!! we made a fucky wucky!!!\n(we're probably rate limited, please hang up and try again)") | ||||||
| 		sys.exit(1) | 		sys.exit(1) | ||||||
| 
 | 
 | ||||||
|  | 	pleroma = False | ||||||
|  | 	if 'first' in j: | ||||||
|  | 		print("{} is a pleroma instance -- falling back to legacy toot collection method".format(instance)) | ||||||
|  | 		pleroma = True | ||||||
|  | 	 | ||||||
| 	print("Downloading and parsing toots", end='', flush=True) | 	print("Downloading and parsing toots", end='', flush=True) | ||||||
| 	current = None | 	current = None | ||||||
| 	try: | 	try: | ||||||
| 		while len(j['orderedItems']) > 0: | 		if pleroma: | ||||||
| 			for oi in j['orderedItems']: | 			for t in get_toots_legacy(client, f.id): | ||||||
| 				if oi['type'] == "Create": | 				try: | ||||||
| 					# its a toost baby | 					c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)", | ||||||
| 					content = oi['object']['content'] | 						(t['id'], | ||||||
| 					if oi['object']['summary'] != None: | 						f.id, | ||||||
| 						#don't download CW'd toots | 						t['uri'], | ||||||
| 						continue | 						t['toot'] | ||||||
| 					toot = extract_toot(content) |  | ||||||
| 					# print(toot) |  | ||||||
| 					try: |  | ||||||
| 						c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)", |  | ||||||
| 							(re.search(r"[^\/]+$", oi['object']['id']).group(0), |  | ||||||
| 							f.id, |  | ||||||
| 							oi['object']['id'], |  | ||||||
| 							toot |  | ||||||
| 							) |  | ||||||
| 						) | 						) | ||||||
| 						pass | 					) | ||||||
| 					except: | 				except: | ||||||
| 						pass #ignore any toots that don't go into the DB | 					pass | ||||||
| 			# sys.exit(0) | 
 | ||||||
| 			r = requests.get(j['prev']) | 		else: | ||||||
| 			j = r.json() | 			while len(j['orderedItems']) > 0: | ||||||
| 			print('.', end='', flush=True) | 				for oi in j['orderedItems']: | ||||||
|  | 					if (not pleroma and oi['type'] == "Create") or (pleroma and oi['to']['type'] == "Create"): | ||||||
|  | 						# its a toost baby | ||||||
|  | 						content = oi['object']['content'] | ||||||
|  | 						if oi['object']['summary'] != None: | ||||||
|  | 							#don't download CW'd toots | ||||||
|  | 							continue | ||||||
|  | 						toot = extract_toot(content) | ||||||
|  | 						# print(toot) | ||||||
|  | 						try: | ||||||
|  | 							pid = re.search(r"[^\/]+$", oi['object']['id']).group(0) | ||||||
|  | 							c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)", | ||||||
|  | 								(pid, | ||||||
|  | 								f.id, | ||||||
|  | 								oi['object']['id'], | ||||||
|  | 								toot | ||||||
|  | 								) | ||||||
|  | 							) | ||||||
|  | 							pass | ||||||
|  | 						except: | ||||||
|  | 							pass #ignore any toots that don't go into the DB | ||||||
|  | 				# sys.exit(0) | ||||||
|  | 				r = requests.get(j['prev']) | ||||||
|  | 				j = r.json() | ||||||
|  | 				print('.', end='', flush=True) | ||||||
| 		print(" Done!") | 		print(" Done!") | ||||||
| 		db.commit() | 		db.commit() | ||||||
| 	except: | 	except: | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue