215 lines
4.4 KiB
Go
215 lines
4.4 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/gob"
|
|
"log"
|
|
"math/rand"
|
|
"os"
|
|
"path"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"unicode"
|
|
|
|
mastodon "github.com/mattn/go-mastodon"
|
|
)
|
|
|
|
const markovPrefixLength = 2
|
|
|
|
type probableString struct {
|
|
Order []string
|
|
Count map[string]int
|
|
Total int
|
|
}
|
|
|
|
func (ps probableString) insert(s string) probableString {
|
|
if n, ok := ps.Count[s]; ok {
|
|
ps.Count[s] = n + 1
|
|
ps.Total++
|
|
return ps
|
|
}
|
|
|
|
if ps.Count == nil {
|
|
ps.Count = make(map[string]int)
|
|
}
|
|
|
|
ps.Count[s] = 1
|
|
ps.Order = append(ps.Order, s)
|
|
ps.Total++
|
|
|
|
return ps
|
|
}
|
|
|
|
func (ps probableString) rand(r *rand.Rand) string {
|
|
n := r.Intn(ps.Total)
|
|
|
|
for _, s := range ps.Order {
|
|
n -= ps.Count[s]
|
|
if n < 0 {
|
|
return s
|
|
}
|
|
}
|
|
|
|
panic("unreachable")
|
|
}
|
|
|
|
var markovDirty = make(chan struct{}, 1)
|
|
|
|
func loadData() {
|
|
markovLock.Lock()
|
|
defer markovLock.Unlock()
|
|
|
|
f, err := os.Open(*flagData)
|
|
if os.IsNotExist(err) {
|
|
return
|
|
}
|
|
checkError(err, "Could not open data cache")
|
|
defer func() {
|
|
checkError(f.Close(), "Could not close data cache")
|
|
}()
|
|
|
|
checkError(gob.NewDecoder(f).Decode(&markov), "Could not read data")
|
|
}
|
|
|
|
func saveMarkov() {
|
|
markovLock.Lock()
|
|
defer markovLock.Unlock()
|
|
|
|
f, err := os.Create(*flagData + ".tmp")
|
|
checkError(err, "Could not create data staging file")
|
|
checkError(gob.NewEncoder(f).Encode(&markov), "Could not save data")
|
|
checkError(f.Close(), "Could not close data staging file")
|
|
checkError(os.Rename(*flagData+".tmp", *flagData), "Could not commit data update")
|
|
}
|
|
|
|
var markovLock sync.Mutex
|
|
var markov = struct {
|
|
Accounts map[mastodon.ID]accountCache
|
|
Next map[[markovPrefixLength]string]probableString
|
|
Prev map[[markovPrefixLength]string]probableString
|
|
}{
|
|
Accounts: make(map[mastodon.ID]accountCache),
|
|
Next: make(map[[markovPrefixLength]string]probableString),
|
|
Prev: make(map[[markovPrefixLength]string]probableString),
|
|
}
|
|
|
|
func insertStatus(ctx context.Context, account mastodon.ID, id, content string) {
|
|
content = cleanContent(content)
|
|
|
|
paragraphs := strings.Split(content, "\n\n")
|
|
|
|
markovLock.Lock()
|
|
defer markovLock.Unlock()
|
|
|
|
cache := markov.Accounts[account]
|
|
if n, err := strconv.ParseUint(path.Base(id), 10, 64); err == nil && n > cache.LatestRemoteTootID {
|
|
cache.LatestRemoteTootID = n
|
|
markov.Accounts[account] = cache
|
|
}
|
|
|
|
for _, p := range paragraphs {
|
|
updateMarkov(strings.Fields(p))
|
|
}
|
|
|
|
select {
|
|
case markovDirty <- struct{}{}:
|
|
default:
|
|
}
|
|
}
|
|
|
|
func updateMarkov(words []string) {
|
|
const last = markovPrefixLength - 1
|
|
var prefix [markovPrefixLength]string
|
|
|
|
for _, word := range words {
|
|
markov.Next[prefix] = markov.Next[prefix].insert(word)
|
|
copy(prefix[:], prefix[1:])
|
|
prefix[last] = normalizeWord(word)
|
|
}
|
|
|
|
markov.Next[prefix] = markov.Next[prefix].insert("")
|
|
|
|
for i := 1; i < len(words); i++ {
|
|
for l := 1; l <= markovPrefixLength; l++ {
|
|
var suffix [markovPrefixLength]string
|
|
copy(suffix[:l], words[i:])
|
|
for j := range suffix {
|
|
suffix[j] = normalizeWord(suffix[j])
|
|
}
|
|
markov.Prev[suffix] = markov.Prev[suffix].insert(words[i-1])
|
|
}
|
|
}
|
|
for l := 1; l <= markovPrefixLength; l++ {
|
|
var suffix [markovPrefixLength]string
|
|
copy(suffix[:l], words)
|
|
for j := range suffix {
|
|
suffix[j] = normalizeWord(suffix[j])
|
|
}
|
|
markov.Prev[suffix] = markov.Prev[suffix].insert("")
|
|
}
|
|
}
|
|
|
|
func genMarkov(r *rand.Rand, seed string) []string {
|
|
const last = markovPrefixLength - 1
|
|
var prefix [markovPrefixLength]string
|
|
var line []string
|
|
|
|
markovLock.Lock()
|
|
defer markovLock.Unlock()
|
|
|
|
if seed != "" {
|
|
var suffix [markovPrefixLength]string
|
|
suffix[0] = normalizeWord(seed)
|
|
ps, ok := markov.Prev[suffix]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
|
|
line = append(line, seed)
|
|
|
|
for len(line) < 1000 {
|
|
s := ps.rand(r)
|
|
if s == "" {
|
|
break
|
|
}
|
|
line = append(line, s)
|
|
copy(suffix[1:], suffix[:])
|
|
suffix[0] = normalizeWord(s)
|
|
ps = markov.Prev[suffix]
|
|
}
|
|
|
|
for i, j := 0, len(line)-1; i < j; i, j = i+1, j-1 {
|
|
line[i], line[j] = line[j], line[i]
|
|
}
|
|
|
|
for i, j := len(line)-1, last; i >= 0 && j >= 0; i, j = i-1, j-1 {
|
|
prefix[j] = normalizeWord(line[i])
|
|
}
|
|
}
|
|
|
|
if _, ok := markov.Next[prefix]; !ok {
|
|
log.Panicln("No markov data available for prefix", prefix, line)
|
|
}
|
|
|
|
for len(line) < 1000 {
|
|
ps := markov.Next[prefix]
|
|
s := ps.rand(r)
|
|
|
|
if s == "" {
|
|
return line
|
|
}
|
|
|
|
line = append(line, s)
|
|
copy(prefix[:], prefix[1:])
|
|
prefix[last] = normalizeWord(s)
|
|
}
|
|
|
|
// probably an infinite loop
|
|
return line
|
|
}
|
|
|
|
func normalizeWord(s string) string {
|
|
return strings.Join(strings.FieldsFunc(strings.ToLower(s), unicode.IsPunct), "")
|
|
}
|