added support for learning from CW'd posts
This commit is contained in:
		
							parent
							
								
									5dfd167bc0
								
							
						
					
					
						commit
						2cecfd42a5
					
				| 
						 | 
				
			
			@ -1,4 +1,5 @@
 | 
			
		|||
{
 | 
			
		||||
	"site": "https://botsin.space",
 | 
			
		||||
	"cw": null
 | 
			
		||||
	"cw": null,
 | 
			
		||||
	"learn_from_cw": false
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -7,6 +7,8 @@ import markovify
 | 
			
		|||
from bs4 import BeautifulSoup
 | 
			
		||||
import re, multiprocessing, sqlite3, shutil, os, json
 | 
			
		||||
 | 
			
		||||
cfg = json.load(open('config.json'))
 | 
			
		||||
 | 
			
		||||
def make_sentence(output):
 | 
			
		||||
	class nlt_fixed(markovify.NewlineText): #modified version of NewlineText that never rejects sentences
 | 
			
		||||
		def test_sentence_input(self, sentence):
 | 
			
		||||
| 
						 | 
				
			
			@ -16,7 +18,10 @@ def make_sentence(output):
 | 
			
		|||
	db = sqlite3.connect("toots-copy.db")
 | 
			
		||||
	db.text_factory=str
 | 
			
		||||
	c = db.cursor()
 | 
			
		||||
	toots = c.execute("SELECT content FROM `toots` ORDER BY RANDOM() LIMIT 10000").fetchall()
 | 
			
		||||
	if cfg['learn_from_cw']:
 | 
			
		||||
		toots = c.execute("SELECT content FROM `toots` ORDER BY RANDOM() LIMIT 10000").fetchall()
 | 
			
		||||
	else: 
 | 
			
		||||
		toots = c.execute("SELECT content FROM `toots` WHERE cw = 0 ORDER BY RANDOM() LIMIT 10000").fetchall()
 | 
			
		||||
	toots_str = ""
 | 
			
		||||
	for toot in toots:
 | 
			
		||||
		toots_str += "\n{}".format(toot[0])
 | 
			
		||||
| 
						 | 
				
			
			@ -32,7 +37,6 @@ def make_sentence(output):
 | 
			
		|||
		tries = tries + 1
 | 
			
		||||
 | 
			
		||||
	sentence = re.sub("^(?:@\u202B[^ ]* )*", "", sentence) #remove leading pings (don't say "@bob blah blah" but still say "blah @bob blah")
 | 
			
		||||
	sentence = re.sub("^(?:@\u200B[^ ]* )*", "", sentence)
 | 
			
		||||
 | 
			
		||||
	output.send(sentence)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										17
									
								
								main.py
								
								
								
								
							
							
						
						
									
										17
									
								
								main.py
								
								
								
								
							| 
						 | 
				
			
			@ -18,7 +18,8 @@ except:
 | 
			
		|||
	shutil.copy2("config.sample.json", "config.json")
 | 
			
		||||
	cfg = json.load(open('config.json', 'r'))
 | 
			
		||||
 | 
			
		||||
#config.json *MUST* contain the instance URL, the instance blacklist (for dead/broken instances), and the CW text. if they're not provided, we'll fall back to defaults.
 | 
			
		||||
#config.json should contain the instance URL, the instance blacklist (for dead/broken instances), and the CW text. if they're not provided, we'll fall back to defaults.
 | 
			
		||||
# TODO: this is pretty messy
 | 
			
		||||
if 'site' not in cfg:
 | 
			
		||||
	cfg['website'] = "https://botsin.space"
 | 
			
		||||
if 'cw' not in cfg:
 | 
			
		||||
| 
						 | 
				
			
			@ -28,6 +29,8 @@ if 'instance_blacklist' not in cfg:
 | 
			
		|||
		"bofa.lol",
 | 
			
		||||
		"witches.town"
 | 
			
		||||
	]
 | 
			
		||||
if 'learn_from_cw' not in cfg:
 | 
			
		||||
	cfg['learn_from_cw'] = False
 | 
			
		||||
 | 
			
		||||
#if the user is using a (very!) old version that still uses the .secret files, migrate to the new method
 | 
			
		||||
if os.path.exists("clientcred.secret"):
 | 
			
		||||
| 
						 | 
				
			
			@ -82,7 +85,11 @@ following = client.account_following(me.id)
 | 
			
		|||
db = sqlite3.connect("toots.db")
 | 
			
		||||
db.text_factory=str
 | 
			
		||||
c = db.cursor()
 | 
			
		||||
c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID")
 | 
			
		||||
c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, cw INT NOT NULL DEFAULT 0, userid INT NOT NULL, uri VARCHAR NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID")
 | 
			
		||||
try:
 | 
			
		||||
	c.execute("ALTER TABLE `toots` ADD COLUMN cw INT NOT NULL DEFAULT 0")
 | 
			
		||||
except:
 | 
			
		||||
	pass # column already exists
 | 
			
		||||
db.commit()
 | 
			
		||||
 | 
			
		||||
def handleCtrlC(signal, frame):
 | 
			
		||||
| 
						 | 
				
			
			@ -157,9 +164,6 @@ for f in following:
 | 
			
		|||
				
 | 
			
		||||
				# its a toost baby
 | 
			
		||||
				content = oi['object']['content']
 | 
			
		||||
				if oi['object']['summary'] != None and oi['object']['summary'] != "":
 | 
			
		||||
					#don't download CW'd toots. if you want your bot to download and learn from CW'd toots, replace "continue" with "pass". (todo: add a config.json option for this)
 | 
			
		||||
					continue
 | 
			
		||||
				toot = extract_toot(content)
 | 
			
		||||
				# print(toot)
 | 
			
		||||
				try:
 | 
			
		||||
| 
						 | 
				
			
			@ -170,8 +174,9 @@ for f in following:
 | 
			
		|||
							done = True
 | 
			
		||||
							break
 | 
			
		||||
					pid = patterns["pid"].search(oi['object']['id']).group(0)
 | 
			
		||||
					c.execute("REPLACE INTO toots (id, userid, uri, content) VALUES (?, ?, ?, ?)", (
 | 
			
		||||
					c.execute("REPLACE INTO toots (id, cw, userid, uri, content) VALUES (?, ?, ?, ?)", (
 | 
			
		||||
						pid,
 | 
			
		||||
						1 if (oi['object']['summary'] != None and oi['object']['summary'] != "") else 0,
 | 
			
		||||
						f.id,
 | 
			
		||||
						oi['object']['id'],
 | 
			
		||||
						toot
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Reference in New Issue