feat: Add batch post-processing functionality for scraping posts
This commit is contained in:
parent
7975791d9b
commit
f708706d50
110
pkg/plug/scrape.go
Normal file
110
pkg/plug/scrape.go
Normal file
@ -0,0 +1,110 @@
|
||||
package plug
|
||||
|
||||
import (
|
||||
"context"
|
||||
"slices"
|
||||
|
||||
"git.anthrove.art/Anthrove/otter-space-sdk/v3/pkg/database"
|
||||
"git.anthrove.art/Anthrove/otter-space-sdk/v3/pkg/models"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
var BatchSize = 50
|
||||
|
||||
func BatchPostProcessing(ctx context.Context, userSource models.UserSource, posts []models.Post) error {
|
||||
|
||||
db, err := database.GetGorm(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
postIDs := make([]string, 0, len(posts))
|
||||
for _, post := range posts {
|
||||
postIDs = append(postIDs, post.References[0].SourcePostID)
|
||||
}
|
||||
|
||||
existingPosts, err := getAnthrovePost(ctx, db, userSource.SourceID, postIDs) // Third Query
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// for loop to convert PostReference to Post IDs
|
||||
var existingPostIDs []models.PostID
|
||||
for _, post := range existingPosts {
|
||||
existingPostIDs = append(existingPostIDs, models.PostID(post.PostID))
|
||||
}
|
||||
|
||||
var existingFavPostIDs []models.PostID
|
||||
existingFavPostIDs, err = getAlreadyFavoritesPostIDs(ctx, db, existingPostIDs, userSource.ID) // Fourth Query
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
anthroveFaves := make([]models.UserFavorite, 0, len(existingPosts))
|
||||
newPosts := make([]models.Post, 0, len(existingPosts))
|
||||
for _, post := range posts {
|
||||
if !slices.ContainsFunc(existingPosts, func(reference models.PostReference) bool {
|
||||
found := reference.SourcePostID == post.References[0].SourcePostID
|
||||
if found {
|
||||
// If Favoure is already existing skip this step TODO
|
||||
if !slices.Contains(existingFavPostIDs, models.PostID(reference.PostID)) {
|
||||
anthroveFaves = append(anthroveFaves, models.UserFavorite{
|
||||
UserID: userSource.UserID,
|
||||
PostID: models.PostID(reference.PostID),
|
||||
UserSourceID: userSource.ID,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return found
|
||||
}) {
|
||||
// Here we always need to create a fav because new post, so new fav too!
|
||||
anthroveFaves = append(anthroveFaves, models.UserFavorite{
|
||||
UserID: userSource.UserID,
|
||||
PostID: post.ID,
|
||||
UserSourceID: userSource.ID,
|
||||
})
|
||||
|
||||
newPosts = append(newPosts, post)
|
||||
}
|
||||
}
|
||||
|
||||
if len(newPosts) > 0 {
|
||||
err = database.CreatePostInBatch(ctx, newPosts, BatchSize) // Fifth Query
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if len(anthroveFaves) > 0 {
|
||||
err = database.CreateUserFavoriteInBatch(ctx, anthroveFaves, BatchSize) // Sixth Query
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getAnthrovePost(ctx context.Context, gorm *gorm.DB, id models.SourceID, postIDs []string) ([]models.PostReference, error) {
|
||||
var existingPosts []models.PostReference
|
||||
|
||||
err := gorm.WithContext(ctx).Model(models.PostReference{}).Find(&existingPosts, "source_id = ? AND source_post_id IN ?", id, postIDs).Error
|
||||
if err != nil {
|
||||
return existingPosts, err
|
||||
}
|
||||
|
||||
return existingPosts, nil
|
||||
}
|
||||
|
||||
func getAlreadyFavoritesPostIDs(ctx context.Context, gorm *gorm.DB, existingPostIDs []models.PostID, userSourceID models.UserSourceID) ([]models.PostID, error) {
|
||||
var existingFavPostIDS []models.PostID
|
||||
|
||||
// SELECT * FROM UserFavourite WHERE post_id IN (Post IDS) AND user_source_id = {your id} // Second Query
|
||||
err := gorm.WithContext(ctx).Model(&models.UserFavorite{}).Select("post_id").Find(&existingFavPostIDS, "user_source_id = ? AND post_id IN ?", userSourceID, existingPostIDs).Error
|
||||
if err != nil {
|
||||
return existingFavPostIDS, err
|
||||
}
|
||||
|
||||
return existingFavPostIDS, nil
|
||||
}
|
Loading…
Reference in New Issue
Block a user