FediGraph/main.go

273 lines
6.6 KiB
Go

package main
import (
//"context"
"context"
"fmt"
"log"
"os"
"slices"
"strconv"
"strings"
"github.com/joho/godotenv"
"github.com/neo4j/neo4j-go-driver/v5/neo4j"
)
var (
driver neo4j.DriverWithContext
ctx context.Context
scrapedUsers []string
)
func init() {
// Environment var loading
err := godotenv.Load()
if err != nil {
log.Fatal("Error loading .env file")
}
dbUri := os.Getenv("DB_URI")
// DB connection
driver, err = neo4j.NewDriverWithContext(dbUri, neo4j.BasicAuth(os.Getenv("DB_USER"), os.Getenv("DB_PASSWORD"), ""))
if err != nil {
panic(err)
}
ctx = context.Background()
//defer driver.Close(ctx)
err = driver.VerifyConnectivity(ctx)
if err != nil {
panic(err)
} else {
fmt.Println("Connected to DB at " + dbUri)
}
}
type User struct {
Tag string
Name string
Instance string
}
func MakeUser(tag string) User {
var user User
user.Tag = tag
parts := strings.Split(tag, "@")
if len(parts) == 2 {
user.Name = parts[0]
user.Instance = parts[1]
} else {
user.Name = "foo"
user.Instance = "example.com"
}
return user
}
func checkConsent(desc string) bool {
// check if user has consented to being scraped
noscrape := strings.Contains(strings.ToLower(desc), "#noscrape")
nobot := strings.Contains(strings.ToLower(desc), "#nobot")
noindex := strings.Contains(strings.ToLower(desc), "#noindex")
return !(noscrape || nobot || noindex) // return true if user has no non-consent tags
}
func DbRunQuery(query string) *neo4j.EagerResult {
result, err := neo4j.ExecuteQuery(ctx, driver, query, nil, neo4j.EagerResultTransformer, neo4j.ExecuteQueryWithDatabase("memgraph"))
if err != nil {
panic(err)
} else {
return result
}
}
func DbRemoveDuplicateRelationships() {
DbRunQuery("MATCH (a:User)-[r:FOLLOWS]->(b:User) WITH a, type(r) as type, collect(r) as rels, b WHERE size(rels) > 1 UNWIND tail(rels) as rel DELETE rel;")
}
func DbCreateInstance(instance string) error {
// check if instance exists
instanceSoftware := getSoftware(instance)
result := DbRunQuery(fmt.Sprintf("MATCH (n:Instance {name: '%s', software:'%s'}) RETURN n;", instance, instanceSoftware))
if len(result.Records) > 0 {
return fmt.Errorf("instance already exists")
}
query := fmt.Sprintf("CREATE (n:Instance {name: '%s', software: '%s'});", instance, instanceSoftware)
DbRunQuery(query)
return nil
}
func DbCreateUser(u User) {
// check if user exists
result := DbRunQuery(fmt.Sprintf("MATCH (n:User {tag: '%s'}) RETURN n;", u.Tag))
if len(result.Records) > 0 {
return
}
query := fmt.Sprintf("CREATE (n:User {tag: '%s', name: '%s', instance: '%s', noscrape: %s});", u.Tag, u.Name, u.Instance, strconv.FormatBool(u.NoScrape))
DbRunQuery(query)
// check if instance exists
result = DbRunQuery(fmt.Sprintf("MATCH (n:Instance {name: '%s'}) RETURN n;", u.Instance))
if len(result.Records) == 0 {
DbCreateInstance(u.Instance)
}
// add on instance relationship
query = fmt.Sprintf("MATCH (a:User {tag: '%s'}), (b:Instance {name: '%s'}) CREATE (a)-[r:ON]->(b);", u.Tag, u.Instance)
DbRunQuery(query)
}
func DbAddFollower(u User, follower User) {
// check if user exists; create if not
DbCreateUser(u)
DbCreateUser(follower)
// check if relationship exists
result := DbRunQuery(fmt.Sprintf("MATCH (a:User {tag: '%s'})-[r:FOLLOWS]->(b:User {tag: '%s'}) RETURN r;", follower.Tag, u.Tag))
if len(result.Records) > 0 {
//fmt.Println("Relationship already exists")
return
}
query := fmt.Sprintf("MATCH (a:User {tag: '%s'}), (b:User {tag: '%s'}) CREATE (b)-[r:FOLLOWS]->(a);", u.Tag, follower.Tag)
DbRunQuery(query)
}
func GetFollowing(u User) []User {
if u.NoScrape {
return nil
}
switch getSoftware(u.Instance) {
case "firefish":
return FirefishGetFollowing(u)
case "mastodon":
return MastodonGetFollowing(u)
case "akkoma":
return AkkomaGetFollowing(u)
default:
return nil
}
}
func GetFollowers(u User) []User {
if u.NoScrape {
return nil
}
switch getSoftware(u.Instance) {
case "firefish":
return FirefishGetFollowers(u)
case "mastodon":
return MastodonGetFollowers(u)
case "akkoma":
return AkkomaGetFollowers(u)
default:
return nil
}
}
type QueueObject struct {
Follower User
Followee User
}
func MakeQueue(u User) []QueueObject {
var queue []QueueObject
for _, f := range GetFollowers(u) {
queue = append(queue, QueueObject{f, u})
}
for _, f := range GetFollowing(u) {
queue = append(queue, QueueObject{u, f})
}
return queue
}
func RecursiveQueueCollect(u User, depth int, depthString string) []QueueObject {
var queue []QueueObject
if depth == 0 || slices.Contains(scrapedUsers, u.Tag) {
return queue
} else {
scrapedUsers = append(scrapedUsers, u.Tag)
}
fmt.Printf("(%s) Collecting queue for %s\n", depthString, u.Tag)
queue = MakeQueue(u)
queueLength := len(queue)
for i, qo := range queue {
var newDepthString string
if depthString == "" {
newDepthString = fmt.Sprintf("%d/%d", i+1, queueLength)
} else {
newDepthString = fmt.Sprintf("%s | %d/%d", depthString, i+1, queueLength)
}
queue = append(queue, RecursiveQueueCollect(qo.Follower, depth-1, newDepthString)...)
}
return queue
}
func main() {
u := MakeUser(os.Args[1])
DbCreateUser(u)
recdepth, err := strconv.Atoi(os.Args[2])
if err != nil {
panic(err)
}
q := RecursiveQueueCollect(u, recdepth, "")
// turn queue into a set then back into a list to prune duplicates
// this is a bit of a hack but it works!!
// used it in a leetcode answer once, finally leetcode pays off
set := make(map[QueueObject]bool)
for _, qo := range q {
set[qo] = true
}
q = nil
for k := range set {
q = append(q, k)
}
fmt.Println("\n\n\nAdding queue to DB...")
fmt.Println("\n\n\nAdding instances to DB...")
instancesmap := make(map[string]bool)
for _, qo := range q {
instancesmap[qo.Follower.Instance] = true
instancesmap[qo.Followee.Instance] = true
}
instancelist := make([]string, 0, len(instancesmap))
for k := range instancesmap {
instancelist = append(instancelist, k)
}
for i, instance := range instancelist {
fmt.Printf("(%d/%d) %s\n", i, len(instancelist), instance)
DbCreateInstance(instance)
}
fmt.Println("\n\n\nAdding users to DB...")
usermap := make(map[string]bool)
for _, qo := range q {
usermap[qo.Follower.Tag] = true
usermap[qo.Followee.Tag] = true
}
userlist := make([]string, 0, len(usermap))
for k := range usermap {
userlist = append(userlist, k)
}
for i, user := range userlist {
fmt.Printf("(%d/%d) %s\n", i, len(userlist), user)
DbCreateUser(MakeUser(user))
}
fmt.Println("\n\n\nAdding relationships to DB...")
for i, qo := range q {
fmt.Printf("(%d/%d)%s is followed by %s\n", i, len(q), qo.Followee.Tag, qo.Follower.Tag)
DbAddFollower(qo.Followee, qo.Follower)
}
}