commit be003a6000b780af830a01357dd594eaf62877b4 Author: Lynne Date: Tue Oct 9 11:11:51 2018 +1000 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d23632f --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +clientcred.secret +usercred.secret +run.sh +corpus.txt +meme.jpg +toots.db +toots.db-journal +toots.db-wal +__pycache__/* diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a612ad9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/README.md b/README.md new file mode 100644 index 0000000..e4eaff6 --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +# mastodon-ebooks +**Lynnear Edition** + +This version makes quite a few changes from the original, such as: +- Unicode support +- Non-Markov stuff +- its very cute + +## Original README +hey look it's an ebooks bot + +python3 + +install the requirements with `sudo pip3 install -r requirements` + +make a bot (probably on bots in space) and follow the target accounts + +run `python3 main.py` to login and scrape + +run `python3 gen.py` to make a toot + +cron is an okay choice to make it toot regularly diff --git a/create.py b/create.py new file mode 100755 index 0000000..08ec488 --- /dev/null +++ b/create.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import markovify +import json +import re, random, multiprocessing, time, sqlite3, shutil, os + +def make_sentence(output): + class nlt_fixed(markovify.NewlineText): + def test_sentence_input(self, sentence): + return True #all sentences are valid <3 + + # with open("corpus.txt", encoding="utf-8") as fp: + # model = nlt_fixed(fp.read()) + + shutil.copyfile("toots.db", "toots-copy.db") + db = sqlite3.connect("toots-copy.db") + db.text_factory=str + c = db.cursor() + toots = c.execute("SELECT content FROM `toots`").fetchall() + toots_str = "" + for toot in toots: + toots_str += "\n{}".format(toot[0]) + model = nlt_fixed(toots_str) + toots_str = None + db.close() + os.remove("toots-copy.db") + + sentence = None + while sentence is None: + sentence = model.make_short_sentence(500, tries=100000) + sentence = sentence.replace("\0", "\n") + output.send(sentence) + +def make_toot(force_markov = False, args = None): + return make_toot_markov() + +def make_toot_markov(): + tries = 0 + toot = None + while toot == None and tries < 10: + pin, pout = multiprocessing.Pipe(False) + p = multiprocessing.Process(target = make_sentence, args = [pout]) + p.start() + p.join(10) + if p.is_alive(): + p.terminate() + p.join() + toot = None + tries = tries + 1 + else: + toot = pin.recv() + return { + "toot":toot, + "media":None + } diff --git a/gen.py b/gen.py new file mode 100755 index 0000000..9654f7e --- /dev/null +++ b/gen.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from mastodon import Mastodon +import argparse, sys, traceback +import create + +parser = argparse.ArgumentParser(description='Generate and post a toot.') +parser.add_argument('reply', metavar='reply', type=str, nargs='?', + help='ID of the status to reply to') +parser.add_argument('-s', '--simulate', dest='simulate', action='store_true', + help="Print the toot to stdout without posting it") + +args = parser.parse_args() + +api_base_url = "https://botsin.space" #todo: this shouldn't be hardcoded + +client = Mastodon( + client_id="clientcred.secret", + access_token="usercred.secret", + api_base_url=api_base_url) + +toot = create.make_toot() +if not args.simulate: + try: + if toot['media'] != None: + mediaID = client.media_post(toot['media'], description = toot['toot']) + client.status_post(toot['toot'].replace("\n", " "), + media_ids = [mediaID], visibility = "unlisted") + else: + client.status_post(toot['toot'], visibility = 'unlisted') + except Exception as err: + toot = { + "toot": + "Mistress @lynnesbian@deadinsi.de, something has gone terribly" \ + + " wrong! While attempting to post a toot, I received the following" \ + + " error:\n" + "\n".join(traceback.format_tb(sys.exc_info()[2])) + } + client.status_post(toot['toot'], visibility = 'unlisted', spoiler_text = "Error!") +print(toot['toot']) diff --git a/main.py b/main.py new file mode 100644 index 0000000..42f05c8 --- /dev/null +++ b/main.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from mastodon import Mastodon +from getpass import getpass +from os import path +from bs4 import BeautifulSoup +import shutil, os, sqlite3, signal, sys +# import re + +api_base_url = "https://botsin.space" +scopes = ["read:statuses", "read:accounts", "read:follows", "write:statuses"] + +if not path.exists("clientcred.secret"): + + print("No clientcred.secret, registering application") + Mastodon.create_app("lynnesbian_mastodon_ebooks", api_base_url=api_base_url, to_file="clientcred.secret", scopes=scopes, website="https://github.com/Lynnesbian/mastodon-ebooks") + +if not path.exists("usercred.secret"): + print("No usercred.secret, registering application") + client = Mastodon(client_id="clientcred.secret", api_base_url=api_base_url) + print("Visit this url:") + print(client.auth_request_url(scopes=scopes)) + client.log_in(code=input("Secret: "), to_file="usercred.secret", scopes=scopes) + +def parse_toot(toot): + if toot.spoiler_text != "": return + if toot.reblog is not None: return + if toot.visibility not in ["public", "unlisted"]: return + + soup = BeautifulSoup(toot.content, "html.parser") + + # pull the mentions out + # for mention in soup.select("span.h-card"): + # mention.unwrap() + + # for mention in soup.select("a.u-url.mention"): + # mention.unwrap() + + # this is the code that removes all mentions + # TODO: make it so that it removes the @ and instance but keeps the name + for mention in soup.select("span.h-card"): + mention.decompose() + + # make all linebreaks actual linebreaks + for lb in soup.select("br"): + lb.insert_after("\n") + lb.decompose() + + # make each p element its own line because sometimes they decide not to be + for p in soup.select("p"): + p.insert_after("\n") + p.unwrap() + + # keep hashtags in the toots + for ht in soup.select("a.hashtag"): + ht.unwrap() + + # unwrap all links (i like the bots posting links) + for link in soup.select("a"): + link.insert_after(link["href"]) + link.decompose() + + text = map(lambda a: a.strip(), soup.get_text().strip().split("\n")) + + # next up: store this and patch markovify to take it + # return {"text": text, "mentions": mentions, "links": links} + # it's 4am though so we're not doing that now, but i still want the parser updates + return "\0".join(list(text)) + +def get_toots(client, id, since_id): + i = 0 + toots = client.account_statuses(id, since_id = since_id) + while toots is not None and len(toots) > 0: + for toot in toots: + t = parse_toot(toot) + if t != None: + yield { + "content": t, + "id": toot.id + } + try: + toots = client.fetch_next(toots) + except TimeoutError: + print("Operation timed out, committing to database and exiting.") + db.commit() + db.close() + sys.exit(1) + i += 1 + if i%10 == 0: + print(i) + +client = Mastodon( + client_id="clientcred.secret", + access_token="usercred.secret", + api_base_url=api_base_url) + +me = client.account_verify_credentials() +following = client.account_following(me.id) + +db = sqlite3.connect("toots.db") +db.text_factory=str +c = db.cursor() +c.execute("CREATE TABLE IF NOT EXISTS `toots` (id INT NOT NULL UNIQUE PRIMARY KEY, userid INT NOT NULL, content VARCHAR NOT NULL) WITHOUT ROWID") +db.commit() + +def handleCtrlC(signal, frame): + print("\nPREMATURE EVACUATION - Saving chunks") + db.commit() + sys.exit(1) + +signal.signal(signal.SIGINT, handleCtrlC) + +for f in following: + last_toot = c.execute("SELECT id FROM `toots` WHERE userid LIKE ? ORDER BY id DESC LIMIT 1", (f.id,)).fetchone() + if last_toot != None: + last_toot = last_toot[0] + else: + last_toot = 0 + print("Downloading toots for user @{}, starting from {}".format(f.username, last_toot)) + for t in get_toots(client, f.id, last_toot): + # try: + c.execute("REPLACE INTO toots (id, userid, content) VALUES (?, ?, ?)", (t['id'], f.id, t['content'])) + # except: + # pass #ignore toots that can't be encoded properly + +db.commit() +db.execute("VACUUM") #compact db +db.commit() +db.close() \ No newline at end of file diff --git a/reply.py b/reply.py new file mode 100755 index 0000000..762198e --- /dev/null +++ b/reply.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import mastodon +import os, random, re +import create +from bs4 import BeautifulSoup + +api_base_url = "https://botsin.space" +client = mastodon.Mastodon( + client_id="clientcred.secret", + access_token="usercred.secret", + api_base_url=api_base_url) + +def extract_toot(toot): + #copied from main.py, see there for comments + soup = BeautifulSoup(toot, "html.parser") + for lb in soup.select("br"): + lb.insert_after("\n") + lb.decompose() + for p in soup.select("p"): + p.insert_after("\n") + p.unwrap() + for ht in soup.select("a.hashtag"): + ht.unwrap() + for link in soup.select("a"): + link.insert_after(link["href"]) + link.decompose() + text = map(lambda a: a.strip(), soup.get_text().strip().split("\n")) + text = "\n".join(list(text)) + text = re.sub("https?://([^/]+)/(@[^ ]+)", r"\2@\1", text) #put mentions back in + text = re.sub("^@[^@]+@[^ ]+ *", r"", text) #...but remove the initial one + text = text.lower() #for easier matching + return text + +class ReplyListener(mastodon.StreamListener): + def on_notification(self, notification): + if notification['type'] == 'mention': + acct = "@" + notification['account']['acct'] + post_id = notification['status']['id'] + mention = extract_toot(notification['status']['content']) + toot = create.make_toot(True)['toot'] + toot = acct + " " + toot + print(acct + " says " + mention) + client.status_post(toot, post_id, visibility=notification['status']['visibility']) + print("replied with " + toot) + +rl = ReplyListener() +client.stream_user(rl) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..75be242 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +Mastodon.py==1.2.1 +markovify==0.7.1 +beautifulsoup4==4.6.0