package logic import ( "context" "encoding/base64" "fmt" "git.anthrove.art/Anthrove/gorse-playground/internal/config" "git.anthrove.art/Anthrove/gorse-playground/pkg/e621" "git.anthrove.art/Anthrove/gorse-playground/pkg/models" "git.anthrove.art/Anthrove/gorse-playground/pkg/utils" "github.com/anthrove/openapi-e621-go" "github.com/caarlos0/env/v11" "golang.org/x/time/rate" "log" "net/http" "os" "strconv" "strings" "time" _ "github.com/joho/godotenv/autoload" ) var e621Config config.E621 var rateLimit *rate.Limiter var apiClient *openapi.APIClient func init() { err := env.Parse(&e621Config) if err != nil { log.Panic(err) } rateLimit = rate.NewLimiter(1, 2) apiClient, err = newE621Client(e621Config) if err != nil { log.Panic(err) } } func newE621Client(cfg config.E621) (*openapi.APIClient, error) { e621Config := openapi.NewConfiguration() e621Config.DefaultHeader["Authorization"] = "Basic " + base64.StdEncoding.EncodeToString([]byte(cfg.Username+":"+cfg.ApiKey)) e621Config.UserAgent = fmt.Sprintf("gorse-playground-scraper used by %s | (made by the Anthrove Team)", cfg.Username) e621Config.Debug = false client := http.DefaultClient client.Transport = newRateMiddleware(&http.Transport{}) e621Config.HTTPClient = client return openapi.NewAPIClient(e621Config), nil } func GetAllFavorites(ctx context.Context, userID int) ([]openapi.Post, error) { posts := make([]openapi.Post, 0) page := 1 for { currentFavs, err := GetFavoritePage(ctx, userID, page) if err != nil { return nil, err } if len(currentFavs) == 0 { break } posts = append(posts, currentFavs...) page++ } return posts, nil } func GetFavoritePage(ctx context.Context, userId int, pageIdentifier int) ([]openapi.Post, error) { favorites, _, err := apiClient.FavoritesAPI.ListFavorites(ctx).Page(int32(pageIdentifier)).UserId(int32(userId)).Execute() if err != nil { return make([]openapi.Post, 0), err } return favorites.Posts, nil } func SubmitItems(ctx context.Context) error { currentDate := time.Now().Format("2006-01-02") err := utils.DownloadE6Data(ctx, "posts-"+currentDate+".csv.gz", "post-file.csv") if err != nil { return err } fileReader, err := os.Open("post-file.csv") if err != nil { return err } inputE621PostChannel := make(chan e621.Post) outputAnthrovePostChannel := make(chan models.GorseItem) postChan := utils.GetStreamingData[e621.Post](ctx, fileReader) go func() { defer close(inputE621PostChannel) for post := range postChan { inputE621PostChannel <- post } log.Println("Loading ended") }() go func() { defer close(outputAnthrovePostChannel) err := postToItem(inputE621PostChannel, outputAnthrovePostChannel) if err != nil { //TODO: DEADLOCK log.Println(err) } log.Println("Convert ended") }() log.Println("Start with comparison check") items := make([]models.GorseItem, 0) length := 0 for item := range outputAnthrovePostChannel { timeDate, err := time.Parse(time.DateTime, item.Timestamp) if err != nil { log.Println(err) continue } if !timeDate.After(time.Date(2024, 1, 1, 1, 1, 1, 0, time.UTC)) { continue } items = append(items, item) if length%20_000 == 0 && length > 0 { log.Println("Worked ", length, " items") err := UpsertItems(ctx, items) if err != nil { return err } items = make([]models.GorseItem, 0) } length++ } err = UpsertItems(ctx, items) if err != nil { return err } return nil } func newRateMiddleware(transport *http.Transport) http.RoundTripper { return &rateMiddleware{ transport: transport, } } type rateMiddleware struct { transport *http.Transport } func (r rateMiddleware) RoundTrip(request *http.Request) (*http.Response, error) { err := rateLimit.Wait(request.Context()) if err != nil { return nil, err } return r.transport.RoundTrip(request) } func postToItem(input chan e621.Post, output chan models.GorseItem) error { for e6Post := range input { tagParts := strings.Split(e6Post.TagString, " ") output <- models.GorseItem{ Comment: e6Post.Description, IsHidden: e6Post.IsDeleted, ItemId: strconv.Itoa(e6Post.ID), Labels: tagParts, Timestamp: e6Post.CreatedAt, } } return nil }