diff --git a/build/package/Dockerfile b/build/package/Dockerfile new file mode 100644 index 0000000..269e183 --- /dev/null +++ b/build/package/Dockerfile @@ -0,0 +1,30 @@ +FROM golang:alpine as builder + +WORKDIR /go + +# Install dependencies +RUN apk add -U --no-cache ca-certificates && update-ca-certificates && go install github.com/swaggo/swag/cmd/swag@latest + +# Cache dependencies +COPY go.mod go.sum ./ +RUN go mod download + +# Copy source code +COPY . ./ + +# Build the application +RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags "-w -s" -o /app ./cmd/playground/ + +FROM scratch + +ARG VERSION +ENV VERSION=$VERSION + +WORKDIR / + +COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ +COPY --from=builder /app ./ +COPY web ./web + +EXPOSE 8080 +CMD ["/app"] \ No newline at end of file diff --git a/cmd/playground/main.go b/cmd/playground/main.go index 20989ff..42189b7 100644 --- a/cmd/playground/main.go +++ b/cmd/playground/main.go @@ -1,6 +1,7 @@ package main import ( + "context" "git.anthrove.art/Anthrove/gorse-playground/internal/logic" "git.anthrove.art/Anthrove/gorse-playground/pkg/models" "git.anthrove.art/Anthrove/gorse-playground/pkg/utils" @@ -9,9 +10,18 @@ import ( "github.com/gin-gonic/gin" "net/http" "strconv" + "time" ) func main() { + /*err := logic.SubmitItems(context.Background()) + + if err != nil { + panic(err) + }*/ + + go Routine(context.Background()) + router := gin.Default() store := cookie.NewStore([]byte("secret")) router.Use(sessions.Sessions("mysession", store)) @@ -126,7 +136,48 @@ func main() { c.HTML(http.StatusOK, "post.gohtml", gin.H{"recs": recs, "next_page": pageInt + 1, "last_page": pageInt - 1}) }) + router.POST("/like/:id", func(c *gin.Context) { + session := sessions.Default(c) + userid := session.Get("userid") + id := c.Param("id") + + err := logic.UpsertFavorites(c, []models.GorseFavorite{{ + Comment: "", + FeedbackType: "like", + ItemId: id, + Timestamp: time.Now().String(), + UserId: userid.(string), + }}) + + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"upsert favorite error": err.Error()}) + } + + c.JSON(http.StatusOK, gin.H{"item": id}) + }) router.Run(":8080") } + +func Routine(c context.Context) { + now := time.Now() + next := time.Date(now.Year(), now.Month(), now.Day(), 4, 0, 0, 0, now.Location()) + if now.After(next) { + next = next.Add(24 * time.Hour) + } + duration := next.Sub(now) + time.Sleep(duration) + ticker := time.NewTicker(24 * time.Hour) + + for { + select { + case <-c.Done(): + ticker.Stop() + return + case <-ticker.C: + logic.SubmitItems(c) + } + } + +} diff --git a/internal/logic/e621.go b/internal/logic/e621.go index d72e211..9c8345c 100644 --- a/internal/logic/e621.go +++ b/internal/logic/e621.go @@ -5,11 +5,18 @@ import ( "encoding/base64" "fmt" "git.anthrove.art/Anthrove/gorse-playground/internal/config" + "git.anthrove.art/Anthrove/gorse-playground/pkg/e621" + "git.anthrove.art/Anthrove/gorse-playground/pkg/models" + "git.anthrove.art/Anthrove/gorse-playground/pkg/utils" "github.com/anthrove/openapi-e621-go" "github.com/caarlos0/env/v11" "golang.org/x/time/rate" "log" "net/http" + "os" + "strconv" + "strings" + "time" _ "github.com/joho/godotenv/autoload" ) @@ -80,6 +87,83 @@ func GetFavoritePage(ctx context.Context, userId int, pageIdentifier int) ([]ope return favorites.Posts, nil } +func SubmitItems(ctx context.Context) error { + currentDate := time.Now().Format("2006-01-02") + err := utils.DownloadE6Data(ctx, "posts-"+currentDate+".csv.gz", "post-file.csv") + + if err != nil { + return err + } + + fileReader, err := os.Open("post-file.csv") + + if err != nil { + return err + } + + inputE621PostChannel := make(chan e621.Post) + outputAnthrovePostChannel := make(chan models.GorseItem) + postChan := utils.GetStreamingData[e621.Post](ctx, fileReader) + + go func() { + defer close(inputE621PostChannel) + for post := range postChan { + inputE621PostChannel <- post + } + log.Println("Loading ended") + }() + + go func() { + defer close(outputAnthrovePostChannel) + err := postToItem(inputE621PostChannel, outputAnthrovePostChannel) + if err != nil { //TODO: DEADLOCK + log.Println(err) + } + log.Println("Convert ended") + }() + + log.Println("Start with comparison check") + + items := make([]models.GorseItem, 0) + length := 0 + + for item := range outputAnthrovePostChannel { + timeDate, err := time.Parse(time.DateTime, item.Timestamp) + + if err != nil { + log.Println(err) + continue + } + + if !timeDate.After(time.Date(2024, 1, 1, 1, 1, 1, 0, time.UTC)) { + continue + } + + items = append(items, item) + + if length%20_000 == 0 && length > 0 { + log.Println("Worked ", length, " items") + err := UpsertItems(ctx, items) + + if err != nil { + return err + } + + items = make([]models.GorseItem, 0) + } + + length++ + } + + err = UpsertItems(ctx, items) + + if err != nil { + return err + } + + return nil +} + func newRateMiddleware(transport *http.Transport) http.RoundTripper { return &rateMiddleware{ transport: transport, @@ -99,3 +183,19 @@ func (r rateMiddleware) RoundTrip(request *http.Request) (*http.Response, error) return r.transport.RoundTrip(request) } + +func postToItem(input chan e621.Post, output chan models.GorseItem) error { + for e6Post := range input { + + tagParts := strings.Split(e6Post.TagString, " ") + + output <- models.GorseItem{ + Comment: e6Post.Description, + IsHidden: e6Post.IsDeleted, + ItemId: strconv.Itoa(e6Post.ID), + Labels: tagParts, + Timestamp: e6Post.CreatedAt, + } + } + return nil +} diff --git a/internal/logic/gorse.go b/internal/logic/gorse.go index 940ade8..e02ba07 100644 --- a/internal/logic/gorse.go +++ b/internal/logic/gorse.go @@ -44,12 +44,14 @@ func GetUserFavorites(ctx context.Context, userid string, page int) ([]string, e } q := req.URL.Query() - q.Set("name", "50") - q.Set("offset", strconv.Itoa((page-1)*50)) + q.Set("n", "20") + q.Set("offset", strconv.Itoa((page-1)*20)) + req.URL.RawQuery = q.Encode() req = req.WithContext(ctx) req.Header.Set("Content-Type", "application/json") req.Header.Set("X-API-Key", gorseConfig.ApiKey) + resp, err := client.Do(req) if err != nil { return nil, err diff --git a/pkg/e621/model.go b/pkg/e621/model.go new file mode 100644 index 0000000..ad70738 --- /dev/null +++ b/pkg/e621/model.go @@ -0,0 +1,33 @@ +package e621 + +type Post struct { + ID int `csv:"id"` + UploaderID int `csv:"uploader_id"` + CreatedAt string `csv:"created_at"` + MD5 string `csv:"md5"` + Source string `csv:"source"` + Rating string `csv:"rating"` + ImageWidth int `csv:"image_width"` + ImageHeight int `csv:"image_height"` + TagString string `csv:"tag_string"` + LockedTags string `csv:"locked_tags"` + FavCount int `csv:"fav_count"` + FileExt string `csv:"file_ext"` + ParentID int `csv:"parent_id"` + ChangeSeq int `csv:"change_seq"` + ApproverID int `csv:"approver_id"` + FileSize int `csv:"file_size"` + CommentCount int `csv:"comment_count"` + Description string `csv:"description"` + Duration int `csv:"duration"` + UpdatedAt string `csv:"updated_at"` + IsDeleted bool `csv:"is_deleted"` + IsPending bool `csv:"is_pending"` + IsFlagged bool `csv:"is_flagged"` + Score int `csv:"score"` + UpScore int `csv:"up_score"` + DownScore int `csv:"down_score"` + IsRatingLocked bool `csv:"is_rating_locked"` + IsStatusLocked bool `csv:"is_status_locked"` + IsNoteLocked bool `csv:"is_note_locked"` +} diff --git a/pkg/utils/e621.go b/pkg/utils/e621.go new file mode 100644 index 0000000..9619657 --- /dev/null +++ b/pkg/utils/e621.go @@ -0,0 +1,57 @@ +package utils + +import ( + "compress/gzip" + "context" + "fmt" + "io" + "net/http" + "os" +) + +var httpClient http.Client + +func DownloadE6Data(ctx context.Context, filename string, targetPath string) error { + req, err := buildE6Request(fmt.Sprintf("/db_export/%s", filename)) + + if err != nil { + return err + } + + req = req.WithContext(ctx) + resp, err := httpClient.Do(req) + + if err != nil { + return err + } + defer resp.Body.Close() + + uncompressedStream, err := gzip.NewReader(resp.Body) + + if err != nil { + return err + } + defer uncompressedStream.Close() + + out, err := os.Create(targetPath) + + if err != nil { + return err + } + + defer out.Close() + _, err = io.Copy(out, uncompressedStream) + + return err +} + +func buildE6Request(url string) (*http.Request, error) { + request, err := http.NewRequest("GET", fmt.Sprintf("%s%s", "https://e621.net", url), nil) + if err != nil { + return nil, err + } + + request.Header.Add("User-Agent", "Anthrove downloader (by alphyron)") + + return request, nil +} diff --git a/pkg/utils/streaming.go b/pkg/utils/streaming.go new file mode 100644 index 0000000..fc00a18 --- /dev/null +++ b/pkg/utils/streaming.go @@ -0,0 +1,117 @@ +package utils + +import ( + "context" + "encoding/csv" + "io" + "log" + "os" + "reflect" + "strconv" + "time" +) + +func GetStreamingFileData[T any](ctx context.Context, filePath string) chan T { + csvIn, err := os.Open(filePath) + if err != nil { + log.Fatal(err) + } + + return GetStreamingData[T](ctx, csvIn) +} + +func GetStreamingData[T any](ctx context.Context, rc io.Reader) chan T { + ch := make(chan T) + go func() { + inputChan := make(chan []string) + r := csv.NewReader(rc) + var header []string + var err error + if header, err = r.Read(); err != nil { + log.Fatal(err) + } + defer close(inputChan) + go func() { + defer close(ch) + returnChannel := parseRecord[T](header, inputChan) + for data := range returnChannel { + ch <- data + } + }() + + for { + rec, err := r.Read() + if err != nil { + if err == io.EOF { + break + } + log.Fatal(err) + } + + if len(rec) == 0 { + continue + } + + inputChan <- rec + } + log.Println("Input finished") + }() + return ch +} + +func parseRecord[T any](header []string, input chan []string) chan T { + channel := make(chan T) + go func() { + defer close(channel) + var e T + et := reflect.TypeOf(e) + var headers = make(map[string]int, et.NumField()) + for i := 0; i < et.NumField(); i++ { + headers[et.Field(i).Name] = func(element string, array []string) int { + for k, v := range array { + if v == element { + return k + } + } + return -1 + }(et.Field(i).Tag.Get("csv"), header) + } + for record := range input { + if len(record) == 0 { + continue + } + + for h, i := range headers { + if i == -1 { + continue + } + elem := reflect.ValueOf(&e).Elem() + field := elem.FieldByName(h) + if field.CanSet() { + switch field.Type().Name() { + case "bool": + a, _ := strconv.ParseBool(record[i]) + field.Set(reflect.ValueOf(a)) + case "int": + a, _ := strconv.Atoi(record[i]) + field.Set(reflect.ValueOf(a)) + case "float64": + a, _ := strconv.ParseFloat(record[i], 64) + field.Set(reflect.ValueOf(a)) + case "Time": + a, _ := time.Parse("2006-01-02T00:00:00Z", record[i]) + field.Set(reflect.ValueOf(a)) + case "string": + field.Set(reflect.ValueOf(record[i])) + default: + log.Printf("Unknown Fieldtype: %s\n", field.Type().Name()) + field.Set(reflect.ValueOf(record[i])) + } + } + } + channel <- e + } + log.Println("parsing ended") + }() + return channel +} diff --git a/web/template/page/post.gohtml b/web/template/page/post.gohtml index f313c58..6d9cdfd 100644 --- a/web/template/page/post.gohtml +++ b/web/template/page/post.gohtml @@ -6,7 +6,7 @@