New and generic scrape algorithm #11
@ -1,12 +0,0 @@
|
||||
package otter
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"git.anthrove.art/Anthrove/otter-space-sdk/v4/pkg/database"
|
||||
"git.anthrove.art/Anthrove/otter-space-sdk/v4/pkg/models"
|
||||
)
|
||||
|
||||
func ConnectToDatabase(ctx context.Context, config models.DatabaseConfig) error {
|
||||
return database.Connect(ctx, config)
|
||||
}
|
129
pkg/plug/algorithm.go
Normal file
129
pkg/plug/algorithm.go
Normal file
@ -0,0 +1,129 @@
|
||||
package plug
|
||||
|
||||
import (
|
||||
"context"
|
||||
"git.anthrove.art/Anthrove/otter-space-sdk/v4/pkg/models"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/codes"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
type User struct {
|
||||
userFavoriteCount int64
|
||||
userName string
|
||||
userID string
|
||||
}
|
||||
|
||||
type Favorites struct {
|
||||
posts []models.Post
|
||||
nextPage string
|
||||
lastPage string
|
||||
}
|
||||
|
||||
type Plug interface {
|
||||
// GetFavoritePage
|
||||
// The API Key can be an empty string if it's not supplied by the user, in this case the default API Key should be used
|
||||
GetFavoritePage(ctx context.Context, apiKey string, userSource models.UserSource, pageIdentifier string) (Favorites, error)
|
||||
|
||||
// GetUserProfile
|
||||
// The API Key can be an empty string if it's not supplied by the user, in this case the default API Key should be used
|
||||
GetUserProfile(ctx context.Context, apiKey string, userSource models.UserSource) (User, error)
|
||||
}
|
||||
|
||||
func Algorithm(ctx context.Context, plugInterface Plug, db *gorm.DB, userSource models.UserSource, deepScrape bool, apiKey string) (TaskSummery, error) {
|
||||
ctx, span := tracer.Start(ctx, "mainScrapeAlgorithm")
|
||||
defer span.End()
|
||||
|
||||
span.SetAttributes(
|
||||
attribute.String("user_source_id", string(userSource.ID)),
|
||||
attribute.String("user_source_user_id", string(userSource.UserID)),
|
||||
attribute.String("user_source_source_id", string(userSource.SourceID)),
|
||||
)
|
||||
|
||||
basicLoggingInfo := log.Fields{
|
||||
"user_source_id": userSource.ID,
|
||||
"user_source_user_id": userSource.UserID,
|
||||
"user_source_source_id": userSource.SourceID,
|
||||
}
|
||||
|
||||
log.WithContext(ctx).WithFields(basicLoggingInfo).Info("Starting mainScrapeAlgorithm")
|
||||
|
||||
taskSummery := TaskSummery{
|
||||
AddedPosts: 0,
|
||||
DeletedPosts: 0,
|
||||
}
|
||||
|
||||
anthroveUserFavCount, err := getUserFavoriteCountFromDatabase(ctx, db, userSource.UserID, userSource.ID)
|
||||
if err != nil {
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
log.WithContext(ctx).WithFields(basicLoggingInfo).WithError(err).Error("Failed to get user favorite count from db")
|
||||
return taskSummery, err
|
||||
}
|
||||
|
||||
Alphyron marked this conversation as resolved
Outdated
|
||||
profile, err := plugInterface.GetUserProfile(ctx, apiKey, userSource)
|
||||
Alphyron marked this conversation as resolved
Outdated
Alphyron
commented
Issue still exists Issue still exists
https://git.anthrove.art/Anthrove/plug-e621/issues/15
|
||||
if err != nil {
|
||||
return taskSummery, err
|
||||
}
|
||||
|
||||
nextPage := ""
|
||||
|
||||
outer:
|
||||
for anthroveUserFavCount < profile.userFavoriteCount {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
break outer
|
||||
default:
|
||||
span.AddEvent("Executing getFavorites request")
|
||||
|
||||
favorites, err := plugInterface.GetFavoritePage(ctx, apiKey, userSource, nextPage)
|
||||
span.AddEvent("Finished executing getFavorites request")
|
||||
|
||||
if err != nil {
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
log.WithContext(ctx).WithFields(basicLoggingInfo).WithError(err).Error("Failed to execute favorites page")
|
||||
return taskSummery, err
|
||||
}
|
||||
|
||||
if len(favorites.posts) <= 0 {
|
||||
span.AddEvent("No more favorites found")
|
||||
log.WithContext(ctx).WithFields(basicLoggingInfo).Info("No more favorites found")
|
||||
break outer
|
||||
}
|
||||
|
||||
summery, err := BatchPostProcessingWithSummery(ctx, userSource, favorites.posts)
|
||||
if err != nil {
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
log.WithContext(ctx).WithFields(basicLoggingInfo).WithError(err).Error("Failed in BatchPostProcessing")
|
||||
return taskSummery, err
|
||||
}
|
||||
|
||||
if summery.AddedFavorites != int64(len(favorites.posts)) {
|
||||
span.AddEvent("user has no more favorites to add")
|
||||
break outer
|
||||
}
|
||||
|
||||
nextPage = favorites.nextPage
|
||||
taskSummery.AddedPosts += int(summery.AddedFavorites)
|
||||
}
|
||||
}
|
||||
|
||||
span.AddEvent("Completed scraping algorithm")
|
||||
log.WithContext(ctx).WithFields(basicLoggingInfo).Info("Completed scraping algorithm")
|
||||
return taskSummery, nil
|
||||
}
|
||||
|
||||
// getUserFavoriteCountFromDatabase
|
||||
func getUserFavoriteCountFromDatabase(ctx context.Context, gorm *gorm.DB, userID models.UserID, userSourceID models.UserSourceID) (int64, error) {
|
||||
var count int64
|
||||
|
||||
err := gorm.WithContext(ctx).Model(&models.UserFavorite{}).Where("user_id = ? AND user_source_id = ?", userID, userSourceID).Count(&count).Error
|
||||
if err != nil {
|
||||
return count, err
|
||||
}
|
||||
|
||||
return count, nil
|
||||
}
|
@ -17,20 +17,20 @@ import (
|
||||
|
||||
type server struct {
|
||||
gRPC.UnimplementedPlugConnectorServer
|
||||
ctx map[string]context.CancelFunc
|
||||
taskExecutionFunction TaskExecution
|
||||
sendMessageExecution SendMessageExecution
|
||||
getMessageExecution GetMessageExecution
|
||||
source models.Source
|
||||
ctx map[string]context.CancelFunc
|
||||
plugInterface Plug
|
||||
sendMessageExecution SendMessageExecution
|
||||
getMessageExecution GetMessageExecution
|
||||
source models.Source
|
||||
}
|
||||
|
||||
func NewGrpcServer(source models.Source, taskExecutionFunction TaskExecution, sendMessageExecution SendMessageExecution, getMessageExecution GetMessageExecution) gRPC.PlugConnectorServer {
|
||||
func NewGrpcServer(source models.Source, plugAPIInterface Plug, sendMessageExecution SendMessageExecution, getMessageExecution GetMessageExecution) gRPC.PlugConnectorServer {
|
||||
return &server{
|
||||
ctx: make(map[string]context.CancelFunc),
|
||||
taskExecutionFunction: taskExecutionFunction,
|
||||
sendMessageExecution: sendMessageExecution,
|
||||
getMessageExecution: getMessageExecution,
|
||||
source: source,
|
||||
ctx: make(map[string]context.CancelFunc),
|
||||
plugInterface: plugAPIInterface,
|
||||
sendMessageExecution: sendMessageExecution,
|
||||
getMessageExecution: getMessageExecution,
|
||||
source: source,
|
||||
}
|
||||
}
|
||||
|
||||
@ -116,7 +116,12 @@ func (s *server) TaskStart(ctx context.Context, creation *gRPC.PlugTaskCreation)
|
||||
|
||||
go func() {
|
||||
var err error
|
||||
taskSummery, err := s.taskExecutionFunction(taskCtx, userSource, creation.DeepScrape, creation.ApiKey)
|
||||
gorm, err := database.GetGorm(taskCtx)
|
||||
log.WithContext(taskCtx).WithError(err).WithField("task_id", id).Error("Failed to get Gorm client")
|
||||
span.RecordError(err)
|
||||
span.SetStatus(codes.Error, err.Error())
|
||||
|
||||
taskSummery, err := Algorithm(taskCtx, s.plugInterface, gorm, userSource, creation.DeepScrape, creation.ApiKey)
|
||||
if err != nil {
|
||||
log.WithContext(taskCtx).WithError(err).WithField("task_id", id).Error("Task execution failed")
|
||||
span.RecordError(err)
|
||||
|
@ -28,14 +28,13 @@ type TaskSummery struct {
|
||||
DeletedPosts int
|
||||
}
|
||||
|
||||
type TaskExecution func(ctx context.Context, userSource models.UserSource, deepScrape bool, apiKey string) (TaskSummery, error)
|
||||
type SendMessageExecution func(ctx context.Context, userSource models.UserSource, message string) error
|
||||
type GetMessageExecution func(ctx context.Context, userSource models.UserSource) ([]Message, error)
|
||||
|
||||
var (
|
||||
taskExecutionFunction TaskExecution
|
||||
sendMessageExecution SendMessageExecution
|
||||
getMessageExecution GetMessageExecution
|
||||
sendMessageExecution SendMessageExecution
|
||||
getMessageExecution GetMessageExecution
|
||||
plugAPIInterface Plug
|
||||
)
|
||||
|
||||
func Listen(ctx context.Context, listenAddr string, source models.Source) error {
|
||||
@ -78,7 +77,7 @@ func Listen(ctx context.Context, listenAddr string, source models.Source) error
|
||||
grpc.StatsHandler(otelgrpc.NewServerHandler()),
|
||||
)
|
||||
|
||||
pb.RegisterPlugConnectorServer(grpcServer, NewGrpcServer(source, taskExecutionFunction, sendMessageExecution, getMessageExecution))
|
||||
pb.RegisterPlugConnectorServer(grpcServer, NewGrpcServer(source, plugAPIInterface, sendMessageExecution, getMessageExecution))
|
||||
|
||||
go func() {
|
||||
err = grpcServer.Serve(lis)
|
||||
@ -98,8 +97,8 @@ func Listen(ctx context.Context, listenAddr string, source models.Source) error
|
||||
return nil
|
||||
}
|
||||
|
||||
func SetTaskExecutionFunction(function TaskExecution) {
|
||||
taskExecutionFunction = function
|
||||
func SetTaskExecutionFunction(plugInterface Plug) {
|
||||
Alphyron marked this conversation as resolved
Outdated
Alphyron
commented
irgendwie passt der Name der funktion nicht zu dem was gemacht wird. Es wird weder eine Function gesetzt, noch wird ein execution stuff gesetzt. irgendwie passt der Name der funktion nicht zu dem was gemacht wird. Es wird weder eine Function gesetzt, noch wird ein execution stuff gesetzt.
Hier ist es ja nur ein Interface als Schnittstelle zu der PlugAPI
|
||||
plugAPIInterface = plugInterface
|
||||
}
|
||||
|
||||
func SetSendMessageExecutionFunction(function SendMessageExecution) {
|
||||
|
Loading…
Reference in New Issue
Block a user
Hier muss eine sonderbedingung gemacht weerden, weil UserFavoriteCount kann ja -1 sein (wenn es nicht ausgelesen werden kann.
@SoXX to fix this problem and the Issue one comment bellow I would recommend, to change the algorithm to the following:
https://www.plantuml.com/plantuml/uml/RP1FRzH03CNl_XIlj-mDhea4BIgWIXMAgChTk-D9HyoVoEFija9yTtOIYTBIkOtzUy_Flgp6QakAT34hJucnLFaXQk70ySQZPA8LeVwsiCDz5Hsr-105Nal269VfQhmPwFJGQftf8ZiwFw3_AeOlV1nvsk1LFH2mUSbZg1RoXB5KgnlHs7__rv_-vvkdRFqtFPgcYQwSGoxsch62APOzHypdF-AvERo9RsEUSS_7bKPNr8aYL8Gq5pNETe4i9wa67xJQRa1B43owpo_TFk3T3hy80FOg_9E0tslOQ_4X2xx9esr7i8AxW_8i0qbswtM9-liv5cuvywkr1kg_or6qIfk4spLdBYUKw9vpzNyTjZXTi1Thm1v4fPKODN6CSC5xbNmGxCLE8haX2LtYfxtWFLBzkDji7Pj0nHRDk5jIOdtgYQgLcIubkoN5Fm00
fixed an issue @SoXX
https://plantuml.github.io/plantuml-core/raw.html?ZL9DJzmm4BtxLpmkABbKzBefL53Q2WbLKH6zUpVZh5N74uqdk_3lQoTBAGkjUeldVSoRoPoCOll1OahqWqJzneOR1ux69BMYPdNBjiDz8cc5dGy49poW3LD_sTuqPhyjjSgnq8waDWRm3fMDkXNUKH5-iRjFF4N51uoBnxj3cSKhBTZfwJ_02vpLq2r5L2eJrRa9i1QoF_CNnSwxqdVrbHHPsDh-aB8uDK-HvdBBIHFEpuyCyHV7UNXwy4bzx0_YPVvdi_bzVTMyvkSiTC3VyYYQ8hhiEaJMOuuo-i1h6p3cDq86cpVfIvjdkhExcOsAfIE1J_33oABvfvmWqfuovhmahHiyRUVpIxUB_rpGDJaV2T_eGPUTv1Xt6x4ZDIQpvkimPdK_XhAPoF5eoiQilfV3ILGQutQE7NaF3jqVCNyxBEagho-cAac5IiaY3JO4v7nEA6u8Qz8zvJaoPK0rsCSruJs6zB5UR3kh126T9E9k6WOhZqqvVXv_0G00