This repository has been archived on 2022-03-22. You can view files and clone it, but cannot push or open issues or pull requests.
amber-ebooks-archived/status.go

200 lines
4.7 KiB
Go

// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
package main
import (
"context"
"encoding/json"
"encoding/xml"
"fmt"
"log"
"net/http"
"net/url"
"strings"
"sync"
"github.com/mattn/go-mastodon"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
type accountCache struct {
LatestRemoteTootID uint64
}
func downloadToots(ctx context.Context, instance *mastodon.Instance, following []*mastodon.Account) {
loadData()
var wg sync.WaitGroup
wg.Add(len(following))
markovLock.Lock()
for _, f := range following {
go func(account *mastodon.Account, start uint64) {
defer wg.Done()
log.Printf("Downloading toots for user %s, starting from %d", account.Acct, start)
acct := account.Acct
if !strings.Contains(acct, "@") {
acct += "@" + instance.URI
}
loadAllToots(ctx, acct, account.URL, start, func(id, content string) {
insertStatus(ctx, account.ID, id, content)
})
}(f, markov.Accounts[f.ID].LatestRemoteTootID)
}
markovLock.Unlock()
wg.Wait()
}
func cleanContent(s string) string {
paragraphs, err := html.ParseFragment(strings.NewReader(s), &html.Node{
Type: html.ElementNode,
Data: "div",
DataAtom: atom.Div,
})
checkError(err, "Failed to parse HTML %q", s)
var body []byte
var walk func(*html.Node)
walk = func(n *html.Node) {
for n != nil {
if n.Type == html.TextNode {
body = append(body, n.Data...)
} else if n.Type == html.ElementNode {
var isMention bool
if n.DataAtom == atom.A {
for _, a := range n.Attr {
if a.Key == "class" {
for _, c := range strings.Fields(a.Val) {
if c == "mention" {
isMention = true
break
}
}
break
}
}
} else if n.DataAtom == atom.Img {
for _, a := range n.Attr {
if a.Key == "alt" {
body = append(body, a.Val...)
break
}
}
}
if !isMention {
walk(n.FirstChild)
}
}
n = n.NextSibling
}
}
for i, p := range paragraphs {
if i != 0 {
body = append(body, "\n\n"...)
}
walk(p.FirstChild)
}
return string(body)
}
func getJSON(ctx context.Context, uri string, v interface{}) {
resp, err := http.Get(uri)
checkError(err, "Could not download %q", uri)
defer func() {
checkError(resp.Body.Close(), "Error when closing %q", uri)
}()
if resp.StatusCode != http.StatusOK {
log.Panicf("Error downloading %q: %v", uri, resp.Status)
}
checkError(json.NewDecoder(resp.Body).Decode(v), "Error decoding %q", uri)
}
func loadAllToots(ctx context.Context, acct, userURL string, start uint64, foundStatus func(id, content string)) {
webFingerURL := getWebFingerURL(ctx, acct, userURL)
outbox := webFingerUserActivity(ctx, webFingerURL) + "/outbox"
prev := fmt.Sprintf("%s?min_id=%d&page=true", outbox, start)
for prev != "" {
var page struct {
OrderedItems []struct {
Type string `json:"type"`
Object json.RawMessage `json:"object"`
} `json:"orderedItems"`
Prev string `json:"prev"`
}
getJSON(ctx, prev, &page)
for _, i := range page.OrderedItems {
if i.Type == "Create" {
var object struct {
ID string `json:"id"`
Sensitive bool `json:"sensitive"`
Content string `json:"content"`
}
checkError(json.Unmarshal(i.Object, &object), "Failed to decode toot JSON in %q", prev)
if !object.Sensitive {
foundStatus(object.ID, object.Content)
}
}
}
prev = page.Prev
}
}
func getWebFingerURL(ctx context.Context, acct, userURL string) string {
acct = url.QueryEscape("acct:" + acct)
u, err := url.Parse(userURL)
checkError(err, "Failed to parse user URL")
u.Path = "/.well-known/host-meta"
u.RawQuery = ""
resp, err := http.Get(u.String())
checkError(err, "Could not retrieve host-meta")
defer func() {
checkError(resp.Body.Close(), "Error closing host-meta request")
}()
if resp.StatusCode != http.StatusOK {
log.Panicf("Failed to load %q: %s", u, resp.Status)
}
var meta struct {
Link struct {
Template string `xml:"template,attr"`
} `xml:"Link"`
}
checkError(xml.NewDecoder(resp.Body).Decode(&meta), "Could not find webfinger URL")
return strings.Replace(meta.Link.Template, "{uri}", acct, -1)
}
func webFingerUserActivity(ctx context.Context, uri string) string {
var body struct {
Links []struct {
Href string `json:"href"`
Rel string `json:"rel"`
Type string `json:"type"`
} `json:"links"`
}
getJSON(ctx, uri, &body)
for _, l := range body.Links {
if l.Rel == "self" && l.Type == "application/activity+json" {
return l.Href
}
}
log.Panicf("Could not find ActivityPub URL in web finger response: %q", uri)
return ""
}