From 8f4767966a6363784dae9e6bcf9b67b9cfcfde88 Mon Sep 17 00:00:00 2001 From: SoXX Date: Tue, 2 Jun 2026 22:26:54 +0200 Subject: [PATCH] feat(listing): add per-page methods with HasNext flag GalleryPage / ScrapsPage / FavoritesPage return a ListingPage struct carrying the page items, the 1-based page number, and a HasNext flag that mirrors FA's "next page" link. This lets external scrapers drive their own pagination loop (checkpoint resume, parallel workers, custom throttling) without re-implementing the page-walking code. The existing iter.Seq2-shaped methods now share the same per-page primitive internally so behaviour stays in lock-step. --- gallery.go | 62 ++++++++++++++---- gallery_page_test.go | 149 +++++++++++++++++++++++++++++++++++++++++++ pagination.go | 18 ++++++ 3 files changed, 217 insertions(+), 12 deletions(-) create mode 100644 gallery_page_test.go diff --git a/gallery.go b/gallery.go index d9dec36..ce9c45b 100644 --- a/gallery.go +++ b/gallery.go @@ -12,7 +12,8 @@ import ( // Gallery iterates the submissions in a user's main gallery, newest first. // // Each yielded *Submission carries only the fields visible on the listing -// page: ID, Title, Author (for favorites), ThumbURL, and Rating. Call +// page: ID, Title, Author (for favorites), ThumbURL, Rating, and the Tags +// / CategorizedTags parsed from the figure's data-tags attribute. Call // [Client.GetSubmission] with the ID to load the full record. func (c *Client) Gallery(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] { return c.listGallerySection(ctx, name, urls.Gallery, opts, reqOpts) @@ -30,6 +31,50 @@ func (c *Client) Favorites(ctx context.Context, name string, opts ListOptions, r return c.listGallerySection(ctx, name, urls.Favorites, opts, reqOpts) } +// GalleryPage fetches a single page of /gallery/{name}/ and returns the +// items along with whether more pages exist. Pages are 1-based; pass 0 or +// 1 for the first page. Use this when driving pagination manually +// (resuming from a checkpoint, distributing pages across workers); use +// [Client.Gallery] when you just want every item in order. +func (c *Client) GalleryPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) { + return c.fetchListingPage(ctx, name, page, urls.Gallery, reqOpts) +} + +// ScrapsPage is the single-page counterpart to [Client.Scraps]. See +// [Client.GalleryPage] for usage notes. +func (c *Client) ScrapsPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) { + return c.fetchListingPage(ctx, name, page, urls.Scraps, reqOpts) +} + +// FavoritesPage is the single-page counterpart to [Client.Favorites]. See +// [Client.GalleryPage] for usage notes. +func (c *Client) FavoritesPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) { + return c.fetchListingPage(ctx, name, page, urls.Favorites, reqOpts) +} + +// fetchListingPage is the shared per-page primitive used by +// GalleryPage / ScrapsPage / FavoritesPage and the iterator engine. +func (c *Client) fetchListingPage( + ctx context.Context, + name string, + page int, + urlFn func(string, int) string, + reqOpts []Option, +) (*ListingPage, error) { + if page < 1 { + page = 1 + } + out := &ListingPage{Page: page} + err := c.fetch(ctx, urlFn(name, page), func(doc *goquery.Document) error { + out.Items, out.HasNext = parseGalleryPage(doc, c.cfg.jsonListings) + return nil + }, reqOpts...) + if err != nil { + return nil, err + } + return out, nil +} + // listGallerySection is the shared engine for Gallery / Scraps / Favorites. // urlFn picks the section-specific URL builder; the rest of the pagination // machinery is identical across all three sections. @@ -47,28 +92,21 @@ func (c *Client) listGallerySection( if opts.reachedLimit(pagesFetched) { return } - var ( - items []*Submission - hasNext bool - ) - err := c.fetch(ctx, urlFn(name, page), func(doc *goquery.Document) error { - items, hasNext = parseGalleryPage(doc, c.cfg.jsonListings) - return nil - }, reqOpts...) + lp, err := c.fetchListingPage(ctx, name, page, urlFn, reqOpts) if err != nil { yield(nil, err) return } pagesFetched++ - if len(items) == 0 { + if len(lp.Items) == 0 { return } - for _, s := range items { + for _, s := range lp.Items { if !yield(s, nil) { return } } - if !hasNext { + if !lp.HasNext { return } page++ diff --git a/gallery_page_test.go b/gallery_page_test.go new file mode 100644 index 0000000..b549de1 --- /dev/null +++ b/gallery_page_test.go @@ -0,0 +1,149 @@ +package fa + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" +) + +// fakeGalleryPage builds a minimal gallery-page response with two figures. +// hasNext controls whether the "Next" anchor is included so detectNextPage +// flips. +func fakeGalleryPage(startID int, hasNext bool) string { + var b strings.Builder + b.WriteString(``) + for i := 0; i < 2; i++ { + id := startID + i + fmt.Fprintf(&b, ` +
+ + + +
+

Sub %d

+ someartist +
+
`, id, id, id, id, id) + } + if hasNext { + b.WriteString(`Next`) + } + b.WriteString(``) + return b.String() +} + +func TestGalleryPage_HasNextPropagates(t *testing.T) { + var requests atomic.Int32 + mux := http.NewServeMux() + mux.HandleFunc("/gallery/u/", func(w http.ResponseWriter, _ *http.Request) { + requests.Add(1) + _, _ = w.Write([]byte(fakeGalleryPage(1000, true))) + }) + mux.HandleFunc("/gallery/u/2/", func(w http.ResponseWriter, _ *http.Request) { + requests.Add(1) + _, _ = w.Write([]byte(fakeGalleryPage(2000, false))) + }) + srv := httptest.NewServer(mux) + defer srv.Close() + client := newE2EClient(t, srv) + + first, err := client.GalleryPage(context.Background(), "u", 1) + if err != nil { + t.Fatalf("GalleryPage(1): %v", err) + } + if first.Page != 1 { + t.Errorf("first.Page = %d; want 1", first.Page) + } + if !first.HasNext { + t.Error("first.HasNext = false; want true") + } + if len(first.Items) != 2 { + t.Fatalf("first.Items len = %d; want 2", len(first.Items)) + } + if first.Items[0].ID != 1000 { + t.Errorf("first.Items[0].ID = %d; want 1000", first.Items[0].ID) + } + // data-tags routed through to the page method too. + if len(first.Items[0].Tags) == 0 || len(first.Items[0].CategorizedTags.Species) == 0 { + t.Errorf("first.Items[0]: tags not populated from data-tags: %+v", first.Items[0]) + } + + last, err := client.GalleryPage(context.Background(), "u", 2) + if err != nil { + t.Fatalf("GalleryPage(2): %v", err) + } + if last.HasNext { + t.Error("last.HasNext = true; want false (last page)") + } + if last.Page != 2 { + t.Errorf("last.Page = %d; want 2", last.Page) + } + + if requests.Load() != 2 { + t.Errorf("requests = %d; want 2", requests.Load()) + } +} + +func TestGalleryPage_ZeroPageDefaultsToOne(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/gallery/u/", func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(fakeGalleryPage(1, false))) + }) + srv := httptest.NewServer(mux) + defer srv.Close() + client := newE2EClient(t, srv) + + page, err := client.GalleryPage(context.Background(), "u", 0) + if err != nil { + t.Fatalf("GalleryPage(0): %v", err) + } + if page.Page != 1 { + t.Errorf("page.Page = %d; want 1 (zero should normalise)", page.Page) + } +} + +func TestScrapsPage_HitsScrapsRoute(t *testing.T) { + var gotPath string + mux := http.NewServeMux() + mux.HandleFunc("/scraps/u/", func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.Path + _, _ = w.Write([]byte(fakeGalleryPage(1, false))) + }) + srv := httptest.NewServer(mux) + defer srv.Close() + client := newE2EClient(t, srv) + + if _, err := client.ScrapsPage(context.Background(), "u", 1); err != nil { + t.Fatalf("ScrapsPage: %v", err) + } + if gotPath != "/scraps/u/" { + t.Errorf("gotPath = %q; want /scraps/u/", gotPath) + } +} + +func TestFavoritesPage_HitsFavoritesRoute(t *testing.T) { + var gotPath string + mux := http.NewServeMux() + mux.HandleFunc("/favorites/u/", func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.Path + _, _ = w.Write([]byte(fakeGalleryPage(1, true))) + }) + srv := httptest.NewServer(mux) + defer srv.Close() + client := newE2EClient(t, srv) + + p, err := client.FavoritesPage(context.Background(), "u", 1) + if err != nil { + t.Fatalf("FavoritesPage: %v", err) + } + if gotPath != "/favorites/u/" { + t.Errorf("gotPath = %q; want /favorites/u/", gotPath) + } + if !p.HasNext { + t.Error("p.HasNext = false; want true") + } +} diff --git a/pagination.go b/pagination.go index b3bf7dc..2788334 100644 --- a/pagination.go +++ b/pagination.go @@ -6,6 +6,24 @@ import ( "github.com/PuerkitoBio/goquery" ) +// ListingPage is one page of a listing endpoint (Gallery / Scraps / +// Favorites). It carries everything an external caller needs to drive +// pagination by hand: the items, the 1-based page number that produced +// them, and whether FA exposed a "next page" link. +// +// External scrapers that want to manage their own loop (resume from a +// checkpoint, run pages in parallel, throttle differently) should call +// the per-page methods ([Client.GalleryPage], [Client.ScrapsPage], +// [Client.FavoritesPage]) and stop when HasNext is false. Callers that +// just want every item in order should keep using the iter.Seq2-shaped +// methods ([Client.Gallery] et al.), which use the same primitive +// internally. +type ListingPage struct { + Items []*Submission + HasNext bool + Page int // 1-based page number this result corresponds to +} + // ListOptions configures the pagination of a simple iterator method like // [Client.Gallery] or [Client.Notes]. Filtered iterators ([Client.Search], // [Client.Browse]) use their own option structs that fold the same fields