202 lines
4.2 KiB
Go

package logic
import (
"context"
"encoding/base64"
"fmt"
"git.anthrove.art/Anthrove/gorse-playground/internal/config"
"git.anthrove.art/Anthrove/gorse-playground/pkg/e621"
"git.anthrove.art/Anthrove/gorse-playground/pkg/models"
"git.anthrove.art/Anthrove/gorse-playground/pkg/utils"
"github.com/anthrove/openapi-e621-go"
"github.com/caarlos0/env/v11"
"golang.org/x/time/rate"
"log"
"net/http"
"os"
"strconv"
"strings"
"time"
_ "github.com/joho/godotenv/autoload"
)
var e621Config config.E621
var rateLimit *rate.Limiter
var apiClient *openapi.APIClient
func init() {
err := env.Parse(&e621Config)
if err != nil {
log.Panic(err)
}
rateLimit = rate.NewLimiter(1, 2)
apiClient, err = newE621Client(e621Config)
if err != nil {
log.Panic(err)
}
}
func newE621Client(cfg config.E621) (*openapi.APIClient, error) {
e621Config := openapi.NewConfiguration()
e621Config.DefaultHeader["Authorization"] = "Basic " + base64.StdEncoding.EncodeToString([]byte(cfg.Username+":"+cfg.ApiKey))
e621Config.UserAgent = fmt.Sprintf("gorse-playground-scraper used by %s | (made by the Anthrove Team)", cfg.Username)
e621Config.Debug = false
client := http.DefaultClient
client.Transport = newRateMiddleware(&http.Transport{})
e621Config.HTTPClient = client
return openapi.NewAPIClient(e621Config), nil
}
func GetAllFavorites(ctx context.Context, userID int) ([]openapi.Post, error) {
posts := make([]openapi.Post, 0)
page := 1
for {
currentFavs, err := GetFavoritePage(ctx, userID, page)
if err != nil {
return nil, err
}
if len(currentFavs) == 0 {
break
}
posts = append(posts, currentFavs...)
page++
}
return posts, nil
}
func GetFavoritePage(ctx context.Context, userId int, pageIdentifier int) ([]openapi.Post, error) {
favorites, _, err := apiClient.FavoritesAPI.ListFavorites(ctx).Page(int32(pageIdentifier)).UserId(int32(userId)).Execute()
if err != nil {
return make([]openapi.Post, 0), err
}
return favorites.Posts, nil
}
func SubmitItems(ctx context.Context) error {
currentDate := time.Now().Format("2006-01-02")
err := utils.DownloadE6Data(ctx, "posts-"+currentDate+".csv.gz", "post-file.csv")
if err != nil {
return err
}
fileReader, err := os.Open("post-file.csv")
if err != nil {
return err
}
inputE621PostChannel := make(chan e621.Post)
outputAnthrovePostChannel := make(chan models.GorseItem)
postChan := utils.GetStreamingData[e621.Post](ctx, fileReader)
go func() {
defer close(inputE621PostChannel)
for post := range postChan {
inputE621PostChannel <- post
}
log.Println("Loading ended")
}()
go func() {
defer close(outputAnthrovePostChannel)
err := postToItem(inputE621PostChannel, outputAnthrovePostChannel)
if err != nil { //TODO: DEADLOCK
log.Println(err)
}
log.Println("Convert ended")
}()
log.Println("Start with comparison check")
items := make([]models.GorseItem, 0)
length := 0
for item := range outputAnthrovePostChannel {
timeDate, err := time.Parse(time.DateTime, item.Timestamp)
if err != nil {
log.Println(err)
continue
}
if !timeDate.After(time.Date(2024, 1, 1, 1, 1, 1, 0, time.UTC)) {
continue
}
items = append(items, item)
if length%20_000 == 0 && length > 0 {
log.Println("Worked ", length, " items")
err := UpsertItems(ctx, items)
if err != nil {
return err
}
items = make([]models.GorseItem, 0)
}
length++
}
err = UpsertItems(ctx, items)
if err != nil {
return err
}
return nil
}
func newRateMiddleware(transport *http.Transport) http.RoundTripper {
return &rateMiddleware{
transport: transport,
}
}
type rateMiddleware struct {
transport *http.Transport
}
func (r rateMiddleware) RoundTrip(request *http.Request) (*http.Response, error) {
err := rateLimit.Wait(request.Context())
if err != nil {
return nil, err
}
return r.transport.RoundTrip(request)
}
func postToItem(input chan e621.Post, output chan models.GorseItem) error {
for e6Post := range input {
tagParts := strings.Split(e6Post.TagString, " ")
output <- models.GorseItem{
Comment: e6Post.Description,
IsHidden: e6Post.IsDeleted,
ItemId: strconv.Itoa(e6Post.ID),
Labels: tagParts,
Timestamp: e6Post.CreatedAt,
}
}
return nil
}