feat: start implementing new algorithm
All checks were successful
Gitea Build Check / Build (push) Successful in 1m2s
Gitea Build Check / Build (pull_request) Successful in 1m4s

This commit is contained in:
SoXX 2024-10-26 23:13:24 +02:00
parent 1435ae8ea4
commit b2db0664d6
2 changed files with 73 additions and 69 deletions

View File

@ -2,10 +2,13 @@ package plug
import ( import (
"context" "context"
"git.anthrove.art/Anthrove/otter-space-sdk/v4/pkg/database"
"git.anthrove.art/Anthrove/otter-space-sdk/v4/pkg/models" "git.anthrove.art/Anthrove/otter-space-sdk/v4/pkg/models"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes" "go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/trace"
"slices"
) )
type User struct { type User struct {
@ -59,47 +62,72 @@ func algorithm(ctx context.Context, plugInterface Plug, userSource models.UserSo
} }
nextPage := "" nextPage := ""
var newPosts []models.Post
var anthroveFaves []models.UserFavorite
outer: outer:
for anthroveUserFavCount < profile.UserFavoriteCount { for {
select { for anthroveUserFavCount < profile.UserFavoriteCount {
case <-ctx.Done(): select {
break outer case <-ctx.Done():
default:
span.AddEvent("Executing getFavorites request")
favorites, err := plugInterface.GetFavoritePage(ctx, apiKey, userSource, nextPage)
span.AddEvent("Finished executing getFavorites request")
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
log.WithContext(ctx).WithFields(basicLoggingInfo).WithError(err).Error("Failed to execute favorites page")
return taskSummery, err
}
if len(favorites.Posts) <= 0 {
span.AddEvent("No more favorites found")
log.WithContext(ctx).WithFields(basicLoggingInfo).Info("No more favorites found")
break outer break outer
} default:
summery, err := BatchPostProcessingWithSummery(ctx, userSource, favorites.Posts) span.AddEvent("Executing getFavorites request")
if err != nil { favorites, err := plugInterface.GetFavoritePage(ctx, apiKey, userSource, nextPage)
span.RecordError(err) span.AddEvent("Finished executing getFavorites request")
span.SetStatus(codes.Error, err.Error()) if err != nil {
log.WithContext(ctx).WithFields(basicLoggingInfo).WithError(err).Error("Failed in BatchPostProcessing") span.RecordError(err)
return taskSummery, err span.SetStatus(codes.Error, err.Error())
} log.WithContext(ctx).WithFields(basicLoggingInfo).WithError(err).Error("Failed to execute favorites page")
return taskSummery, err
}
if summery.AddedFavorites != int64(len(favorites.Posts)) { if len(favorites.Posts) == 0 {
span.AddEvent("user has no more favorites to add") span.AddEvent("No more favorites found")
break outer log.WithContext(ctx).WithFields(basicLoggingInfo).Info("No more favorites found")
} break outer
}
nextPage = favorites.NextPage summery := BatchSummery{}
taskSummery.AddedPosts += int(summery.AddedFavorites) newPosts, anthroveFaves, summery, err = BatchPostProcessingWithSummery(ctx, userSource, favorites.Posts)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
log.WithContext(ctx).WithFields(basicLoggingInfo).WithError(err).Error("Failed in BatchPostProcessing")
return taskSummery, err
}
nextPage = favorites.NextPage
taskSummery.AddedPosts += int(summery.AddedFavorites)
}
} }
break outer
}
if len(newPosts) > 0 {
err = database.CreatePostInBatch(ctx, newPosts, BatchSize)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
log.WithContext(ctx).WithError(err).Error("Failed to create new posts in batch")
return taskSummery, err
}
span.AddEvent("Created new posts in batch", trace.WithAttributes(attribute.Int("batch_size", BatchSize)))
log.WithContext(ctx).WithFields(BasicLoggingFields).Info("Created new posts in batch")
}
if len(anthroveFaves) > 0 {
slices.Reverse(anthroveFaves)
err = database.CreateUserFavoriteInBatch(ctx, anthroveFaves, BatchSize)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
log.WithContext(ctx).WithError(err).WithFields(BasicLoggingFields).Error("Failed to create user favorites in batch")
return taskSummery, err
}
span.AddEvent("Created user favorites in batch", trace.WithAttributes(attribute.Int("batch_size", BatchSize)))
log.WithContext(ctx).WithFields(BasicLoggingFields).Info("Created user favorites in batch")
} }
span.AddEvent("Completed scraping algorithm") span.AddEvent("Completed scraping algorithm")

View File

@ -3,6 +3,7 @@ package plug
import ( import (
"context" "context"
"slices" "slices"
"time"
"git.anthrove.art/Anthrove/otter-space-sdk/v4/pkg/database" "git.anthrove.art/Anthrove/otter-space-sdk/v4/pkg/database"
"git.anthrove.art/Anthrove/otter-space-sdk/v4/pkg/models" "git.anthrove.art/Anthrove/otter-space-sdk/v4/pkg/models"
@ -21,12 +22,7 @@ type BatchSummery struct {
AddedFavorites int64 AddedFavorites int64
} }
func BatchPostProcessing(ctx context.Context, userSource models.UserSource, posts []models.Post) error { func BatchPostProcessingWithSummery(ctx context.Context, userSource models.UserSource, posts []models.Post) ([]models.Post, []models.UserFavorite, BatchSummery, error) {
_, err := BatchPostProcessingWithSummery(ctx, userSource, posts)
return err
}
func BatchPostProcessingWithSummery(ctx context.Context, userSource models.UserSource, posts []models.Post) (BatchSummery, error) {
ctx, span := tracer.Start(ctx, "BatchPostProcessing") ctx, span := tracer.Start(ctx, "BatchPostProcessing")
defer span.End() defer span.End()
@ -49,7 +45,7 @@ func BatchPostProcessingWithSummery(ctx context.Context, userSource models.UserS
span.RecordError(err) span.RecordError(err)
span.SetStatus(codes.Error, err.Error()) span.SetStatus(codes.Error, err.Error())
log.WithContext(ctx).WithError(err).WithFields(BasicLoggingFields).Error("Failed to get Gorm DB") log.WithContext(ctx).WithError(err).WithFields(BasicLoggingFields).Error("Failed to get Gorm DB")
return BatchSummery{}, err return nil, nil, BatchSummery{}, err
} }
postIDs := make([]string, 0, len(posts)) postIDs := make([]string, 0, len(posts))
@ -64,7 +60,7 @@ func BatchPostProcessingWithSummery(ctx context.Context, userSource models.UserS
span.RecordError(err) span.RecordError(err)
span.SetStatus(codes.Error, err.Error()) span.SetStatus(codes.Error, err.Error())
log.WithContext(ctx).WithError(err).WithFields(BasicLoggingFields).Error("Failed to fetch existing posts") log.WithContext(ctx).WithError(err).WithFields(BasicLoggingFields).Error("Failed to fetch existing posts")
return BatchSummery{}, err return nil, nil, BatchSummery{}, err
} }
span.AddEvent("Fetched existing posts", trace.WithAttributes(attribute.Int("existing_post_count", len(existingPosts)))) span.AddEvent("Fetched existing posts", trace.WithAttributes(attribute.Int("existing_post_count", len(existingPosts))))
log.WithContext(ctx).WithFields(BasicLoggingFields).WithField("existing_post_count", len(existingPosts)).Info("Fetched existing posts") log.WithContext(ctx).WithFields(BasicLoggingFields).WithField("existing_post_count", len(existingPosts)).Info("Fetched existing posts")
@ -80,14 +76,14 @@ func BatchPostProcessingWithSummery(ctx context.Context, userSource models.UserS
span.RecordError(err) span.RecordError(err)
span.SetStatus(codes.Error, err.Error()) span.SetStatus(codes.Error, err.Error())
log.WithContext(ctx).WithError(err).WithFields(BasicLoggingFields).Error("Failed to fetch existing favorite posts") log.WithContext(ctx).WithError(err).WithFields(BasicLoggingFields).Error("Failed to fetch existing favorite posts")
return BatchSummery{}, err return nil, nil, BatchSummery{}, err
} }
span.AddEvent("Fetched existing favorite posts", trace.WithAttributes(attribute.Int("existing_fav_post_count", len(existingFavPostIDs)))) span.AddEvent("Fetched existing favorite posts", trace.WithAttributes(attribute.Int("existing_fav_post_count", len(existingFavPostIDs))))
log.WithContext(ctx).WithFields(BasicLoggingFields).WithField("existing_fav_post_count", len(existingFavPostIDs)).Info("Fetched existing favorite posts") log.WithContext(ctx).WithFields(BasicLoggingFields).WithField("existing_fav_post_count", len(existingFavPostIDs)).Info("Fetched existing favorite posts")
anthroveFaves := make([]models.UserFavorite, 0, len(existingPosts)) anthroveFaves := make([]models.UserFavorite, 0, len(existingPosts))
newPosts := make([]models.Post, 0, len(existingPosts)) newPosts := make([]models.Post, 0, len(existingPosts))
for _, post := range posts { for i, post := range posts {
if !slices.ContainsFunc(existingPosts, func(reference models.PostReference) bool { if !slices.ContainsFunc(existingPosts, func(reference models.PostReference) bool {
found := reference.SourcePostID == post.References[0].SourcePostID found := reference.SourcePostID == post.References[0].SourcePostID
if found { if found {
@ -102,9 +98,13 @@ func BatchPostProcessingWithSummery(ctx context.Context, userSource models.UserS
return found return found
}) { }) {
anthroveFaves = append(anthroveFaves, models.UserFavorite{ anthroveFaves = append(anthroveFaves, models.UserFavorite{
BaseModel: models.BaseModel[models.UserFavoriteID]{
CreatedAt: time.Now().Add(time.Millisecond * time.Duration(i) * -1),
},
UserID: userSource.UserID, UserID: userSource.UserID,
PostID: post.ID, PostID: post.ID,
UserSourceID: userSource.ID, UserSourceID: userSource.ID,
UserSource: models.UserSource{},
}) })
newPosts = append(newPosts, post) newPosts = append(newPosts, post)
} }
@ -112,31 +112,7 @@ func BatchPostProcessingWithSummery(ctx context.Context, userSource models.UserS
span.AddEvent("Processed posts for favorites and new posts", trace.WithAttributes(attribute.Int("new_post_count", len(newPosts)), attribute.Int("new_fav_count", len(anthroveFaves)))) span.AddEvent("Processed posts for favorites and new posts", trace.WithAttributes(attribute.Int("new_post_count", len(newPosts)), attribute.Int("new_fav_count", len(anthroveFaves))))
log.WithContext(ctx).WithFields(BasicLoggingFields).Info("Processed posts for favorites and new posts") log.WithContext(ctx).WithFields(BasicLoggingFields).Info("Processed posts for favorites and new posts")
if len(newPosts) > 0 { return newPosts, anthroveFaves, BatchSummery{
err = database.CreatePostInBatch(ctx, newPosts, BatchSize)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
log.WithContext(ctx).WithError(err).Error("Failed to create new posts in batch")
return BatchSummery{}, err
}
span.AddEvent("Created new posts in batch", trace.WithAttributes(attribute.Int("batch_size", BatchSize)))
log.WithContext(ctx).WithFields(BasicLoggingFields).Info("Created new posts in batch")
}
if len(anthroveFaves) > 0 {
err = database.CreateUserFavoriteInBatch(ctx, anthroveFaves, BatchSize)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
log.WithContext(ctx).WithError(err).WithFields(BasicLoggingFields).Error("Failed to create user favorites in batch")
return BatchSummery{}, err
}
span.AddEvent("Created user favorites in batch", trace.WithAttributes(attribute.Int("batch_size", BatchSize)))
log.WithContext(ctx).WithFields(BasicLoggingFields).Info("Created user favorites in batch")
}
return BatchSummery{
AddedPosts: int64(len(newPosts)), AddedPosts: int64(len(newPosts)),
AddedFavorites: int64(len(anthroveFaves)), AddedFavorites: int64(len(anthroveFaves)),
}, nil }, nil