273 lines
6.6 KiB
Go
273 lines
6.6 KiB
Go
package main
|
|
|
|
import (
|
|
//"context"
|
|
"context"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"slices"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/joho/godotenv"
|
|
"github.com/neo4j/neo4j-go-driver/v5/neo4j"
|
|
)
|
|
|
|
var (
|
|
driver neo4j.DriverWithContext
|
|
ctx context.Context
|
|
scrapedUsers []string
|
|
)
|
|
|
|
func init() {
|
|
// Environment var loading
|
|
err := godotenv.Load()
|
|
if err != nil {
|
|
log.Fatal("Error loading .env file")
|
|
}
|
|
dbUri := os.Getenv("DB_URI")
|
|
|
|
// DB connection
|
|
driver, err = neo4j.NewDriverWithContext(dbUri, neo4j.BasicAuth(os.Getenv("DB_USER"), os.Getenv("DB_PASSWORD"), ""))
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
ctx = context.Background()
|
|
//defer driver.Close(ctx)
|
|
|
|
err = driver.VerifyConnectivity(ctx)
|
|
if err != nil {
|
|
panic(err)
|
|
} else {
|
|
fmt.Println("Connected to DB at " + dbUri)
|
|
}
|
|
|
|
}
|
|
|
|
type User struct {
|
|
Tag string
|
|
Name string
|
|
Instance string
|
|
}
|
|
|
|
func MakeUser(tag string) User {
|
|
var user User
|
|
user.Tag = tag
|
|
parts := strings.Split(tag, "@")
|
|
if len(parts) == 2 {
|
|
user.Name = parts[0]
|
|
user.Instance = parts[1]
|
|
} else {
|
|
user.Name = "foo"
|
|
user.Instance = "example.com"
|
|
}
|
|
return user
|
|
}
|
|
|
|
func checkConsent(desc string) bool {
|
|
// check if user has consented to being scraped
|
|
noscrape := strings.Contains(strings.ToLower(desc), "#noscrape")
|
|
nobot := strings.Contains(strings.ToLower(desc), "#nobot")
|
|
noindex := strings.Contains(strings.ToLower(desc), "#noindex")
|
|
return !(noscrape || nobot || noindex) // return true if user has no non-consent tags
|
|
}
|
|
|
|
func DbRunQuery(query string) *neo4j.EagerResult {
|
|
result, err := neo4j.ExecuteQuery(ctx, driver, query, nil, neo4j.EagerResultTransformer, neo4j.ExecuteQueryWithDatabase("memgraph"))
|
|
if err != nil {
|
|
panic(err)
|
|
} else {
|
|
return result
|
|
}
|
|
}
|
|
|
|
func DbRemoveDuplicateRelationships() {
|
|
DbRunQuery("MATCH (a:User)-[r:FOLLOWS]->(b:User) WITH a, type(r) as type, collect(r) as rels, b WHERE size(rels) > 1 UNWIND tail(rels) as rel DELETE rel;")
|
|
}
|
|
|
|
func DbCreateInstance(instance string) error {
|
|
// check if instance exists
|
|
instanceSoftware := getSoftware(instance)
|
|
result := DbRunQuery(fmt.Sprintf("MATCH (n:Instance {name: '%s', software:'%s'}) RETURN n;", instance, instanceSoftware))
|
|
if len(result.Records) > 0 {
|
|
return fmt.Errorf("instance already exists")
|
|
}
|
|
query := fmt.Sprintf("CREATE (n:Instance {name: '%s', software: '%s'});", instance, instanceSoftware)
|
|
DbRunQuery(query)
|
|
return nil
|
|
}
|
|
|
|
func DbCreateUser(u User) {
|
|
// check if user exists
|
|
result := DbRunQuery(fmt.Sprintf("MATCH (n:User {tag: '%s'}) RETURN n;", u.Tag))
|
|
if len(result.Records) > 0 {
|
|
return
|
|
}
|
|
|
|
query := fmt.Sprintf("CREATE (n:User {tag: '%s', name: '%s', instance: '%s', noscrape: %s});", u.Tag, u.Name, u.Instance, strconv.FormatBool(u.NoScrape))
|
|
DbRunQuery(query)
|
|
|
|
// check if instance exists
|
|
result = DbRunQuery(fmt.Sprintf("MATCH (n:Instance {name: '%s'}) RETURN n;", u.Instance))
|
|
if len(result.Records) == 0 {
|
|
DbCreateInstance(u.Instance)
|
|
}
|
|
|
|
// add on instance relationship
|
|
query = fmt.Sprintf("MATCH (a:User {tag: '%s'}), (b:Instance {name: '%s'}) CREATE (a)-[r:ON]->(b);", u.Tag, u.Instance)
|
|
DbRunQuery(query)
|
|
}
|
|
|
|
func DbAddFollower(u User, follower User) {
|
|
// check if user exists; create if not
|
|
DbCreateUser(u)
|
|
DbCreateUser(follower)
|
|
|
|
// check if relationship exists
|
|
result := DbRunQuery(fmt.Sprintf("MATCH (a:User {tag: '%s'})-[r:FOLLOWS]->(b:User {tag: '%s'}) RETURN r;", follower.Tag, u.Tag))
|
|
if len(result.Records) > 0 {
|
|
//fmt.Println("Relationship already exists")
|
|
return
|
|
}
|
|
query := fmt.Sprintf("MATCH (a:User {tag: '%s'}), (b:User {tag: '%s'}) CREATE (b)-[r:FOLLOWS]->(a);", u.Tag, follower.Tag)
|
|
DbRunQuery(query)
|
|
}
|
|
|
|
func GetFollowing(u User) []User {
|
|
if u.NoScrape {
|
|
return nil
|
|
}
|
|
switch getSoftware(u.Instance) {
|
|
case "firefish":
|
|
return FirefishGetFollowing(u)
|
|
case "mastodon":
|
|
return MastodonGetFollowing(u)
|
|
case "akkoma":
|
|
return AkkomaGetFollowing(u)
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func GetFollowers(u User) []User {
|
|
if u.NoScrape {
|
|
return nil
|
|
}
|
|
switch getSoftware(u.Instance) {
|
|
case "firefish":
|
|
return FirefishGetFollowers(u)
|
|
case "mastodon":
|
|
return MastodonGetFollowers(u)
|
|
case "akkoma":
|
|
return AkkomaGetFollowers(u)
|
|
default:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
type QueueObject struct {
|
|
Follower User
|
|
Followee User
|
|
}
|
|
|
|
func MakeQueue(u User) []QueueObject {
|
|
var queue []QueueObject
|
|
for _, f := range GetFollowers(u) {
|
|
queue = append(queue, QueueObject{f, u})
|
|
}
|
|
for _, f := range GetFollowing(u) {
|
|
queue = append(queue, QueueObject{u, f})
|
|
}
|
|
return queue
|
|
}
|
|
|
|
func RecursiveQueueCollect(u User, depth int, depthString string) []QueueObject {
|
|
var queue []QueueObject
|
|
if depth == 0 || slices.Contains(scrapedUsers, u.Tag) {
|
|
return queue
|
|
} else {
|
|
scrapedUsers = append(scrapedUsers, u.Tag)
|
|
}
|
|
|
|
fmt.Printf("(%s) Collecting queue for %s\n", depthString, u.Tag)
|
|
|
|
queue = MakeQueue(u)
|
|
queueLength := len(queue)
|
|
for i, qo := range queue {
|
|
var newDepthString string
|
|
if depthString == "" {
|
|
newDepthString = fmt.Sprintf("%d/%d", i+1, queueLength)
|
|
} else {
|
|
newDepthString = fmt.Sprintf("%s | %d/%d", depthString, i+1, queueLength)
|
|
}
|
|
queue = append(queue, RecursiveQueueCollect(qo.Follower, depth-1, newDepthString)...)
|
|
}
|
|
|
|
return queue
|
|
}
|
|
|
|
func main() {
|
|
u := MakeUser(os.Args[1])
|
|
DbCreateUser(u)
|
|
|
|
recdepth, err := strconv.Atoi(os.Args[2])
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
q := RecursiveQueueCollect(u, recdepth, "")
|
|
|
|
// turn queue into a set then back into a list to prune duplicates
|
|
// this is a bit of a hack but it works!!
|
|
// used it in a leetcode answer once, finally leetcode pays off
|
|
set := make(map[QueueObject]bool)
|
|
for _, qo := range q {
|
|
set[qo] = true
|
|
}
|
|
|
|
q = nil
|
|
for k := range set {
|
|
q = append(q, k)
|
|
}
|
|
|
|
fmt.Println("\n\n\nAdding queue to DB...")
|
|
|
|
fmt.Println("\n\n\nAdding instances to DB...")
|
|
instancesmap := make(map[string]bool)
|
|
for _, qo := range q {
|
|
instancesmap[qo.Follower.Instance] = true
|
|
instancesmap[qo.Followee.Instance] = true
|
|
}
|
|
instancelist := make([]string, 0, len(instancesmap))
|
|
for k := range instancesmap {
|
|
instancelist = append(instancelist, k)
|
|
}
|
|
for i, instance := range instancelist {
|
|
fmt.Printf("(%d/%d) %s\n", i, len(instancelist), instance)
|
|
DbCreateInstance(instance)
|
|
}
|
|
|
|
fmt.Println("\n\n\nAdding users to DB...")
|
|
usermap := make(map[string]bool)
|
|
for _, qo := range q {
|
|
usermap[qo.Follower.Tag] = true
|
|
usermap[qo.Followee.Tag] = true
|
|
}
|
|
userlist := make([]string, 0, len(usermap))
|
|
for k := range usermap {
|
|
userlist = append(userlist, k)
|
|
}
|
|
for i, user := range userlist {
|
|
fmt.Printf("(%d/%d) %s\n", i, len(userlist), user)
|
|
DbCreateUser(MakeUser(user))
|
|
}
|
|
|
|
fmt.Println("\n\n\nAdding relationships to DB...")
|
|
|
|
for i, qo := range q {
|
|
fmt.Printf("(%d/%d)%s is followed by %s\n", i, len(q), qo.Followee.Tag, qo.Follower.Tag)
|
|
DbAddFollower(qo.Followee, qo.Follower)
|
|
}
|
|
}
|