fix(favorites): use cursor-based pagination instead of page numbers
FA's /favorites/{user}/ pagination is cursor-addressed by the fave-ID
of the last item on the previous page (e.g.
/favorites/{user}/1951234825/next), not by sequential integers. The
previous URL builder generated /favorites/{user}/{N}/ for N>=2; FA
interpreted that as a malformed cursor and silently returned page 1,
which caused the Favorites iterator to loop forever and the new
FavoritesPage to report HasNext=true on every call.
Changes:
- urls.Favorites(name) returns the first-page URL; new
urls.FavoritesCursor(name, cursor) builds /favorites/.../next URLs.
- FavoritesPage now takes a cursor string; empty = first page.
Returns ListingPage.NextPage as the opaque fave-ID for the next call.
- ListingPage gains NextPage string (decimal page number for
Gallery/Scraps, fave-ID cursor for Favorites) and drops the Page int
field that conflated those two notions.
- Client.Favorites iterator now walks cursors internally; StartPage
is ignored for favorites (documented).
- detectNextPage / nextPageURL now parse the form action so the same
helper works for both page-number and cursor pagination.
- Added regression test that fails on the infinite-loop bug.
- Example: examples/favorites_page demonstrates cursor walking.
This commit is contained in:
103
gallery.go
103
gallery.go
@@ -3,6 +3,7 @@ package fa
|
||||
import (
|
||||
"context"
|
||||
"iter"
|
||||
"strconv"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
|
||||
@@ -16,19 +17,50 @@ import (
|
||||
// / CategorizedTags parsed from the figure's data-tags attribute. Call
|
||||
// [Client.GetSubmission] with the ID to load the full record.
|
||||
func (c *Client) Gallery(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] {
|
||||
return c.listGallerySection(ctx, name, urls.Gallery, opts, reqOpts)
|
||||
return c.listPagedSection(ctx, name, urls.Gallery, opts, reqOpts)
|
||||
}
|
||||
|
||||
// Scraps iterates the user's scraps folder. Same yield shape as Gallery.
|
||||
func (c *Client) Scraps(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] {
|
||||
return c.listGallerySection(ctx, name, urls.Scraps, opts, reqOpts)
|
||||
return c.listPagedSection(ctx, name, urls.Scraps, opts, reqOpts)
|
||||
}
|
||||
|
||||
// Favorites iterates the user's favorited submissions. The yielded
|
||||
// *Submission's Author field reflects the original artist (not the user
|
||||
// whose favorites we are walking).
|
||||
//
|
||||
// Favorites use a fave-ID cursor for pagination, not sequential page
|
||||
// numbers, so [ListOptions.StartPage] is ignored — the walk always
|
||||
// begins at the newest favorite. [ListOptions.MaxPages] still bounds
|
||||
// the crawl.
|
||||
func (c *Client) Favorites(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] {
|
||||
return c.listGallerySection(ctx, name, urls.Favorites, opts, reqOpts)
|
||||
return func(yield func(*Submission, error) bool) {
|
||||
cursor := ""
|
||||
pagesFetched := 0
|
||||
for {
|
||||
if opts.reachedLimit(pagesFetched) {
|
||||
return
|
||||
}
|
||||
lp, err := c.FavoritesPage(ctx, name, cursor, reqOpts...)
|
||||
if err != nil {
|
||||
yield(nil, err)
|
||||
return
|
||||
}
|
||||
pagesFetched++
|
||||
if len(lp.Items) == 0 {
|
||||
return
|
||||
}
|
||||
for _, s := range lp.Items {
|
||||
if !yield(s, nil) {
|
||||
return
|
||||
}
|
||||
}
|
||||
if !lp.HasNext {
|
||||
return
|
||||
}
|
||||
cursor = lp.NextPage
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GalleryPage fetches a single page of /gallery/{name}/ and returns the
|
||||
@@ -36,25 +68,53 @@ func (c *Client) Favorites(ctx context.Context, name string, opts ListOptions, r
|
||||
// 1 for the first page. Use this when driving pagination manually
|
||||
// (resuming from a checkpoint, distributing pages across workers); use
|
||||
// [Client.Gallery] when you just want every item in order.
|
||||
//
|
||||
// On a non-final page the returned [ListingPage].NextPage is the next
|
||||
// page number as a decimal string ("2", "3", …) — pass it back to the
|
||||
// next call after [strconv.Atoi], or treat it as opaque.
|
||||
func (c *Client) GalleryPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) {
|
||||
return c.fetchListingPage(ctx, name, page, urls.Gallery, reqOpts)
|
||||
return c.fetchNumberedPage(ctx, name, page, urls.Gallery, reqOpts)
|
||||
}
|
||||
|
||||
// ScrapsPage is the single-page counterpart to [Client.Scraps]. See
|
||||
// [Client.GalleryPage] for usage notes.
|
||||
func (c *Client) ScrapsPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) {
|
||||
return c.fetchListingPage(ctx, name, page, urls.Scraps, reqOpts)
|
||||
return c.fetchNumberedPage(ctx, name, page, urls.Scraps, reqOpts)
|
||||
}
|
||||
|
||||
// FavoritesPage is the single-page counterpart to [Client.Favorites]. See
|
||||
// [Client.GalleryPage] for usage notes.
|
||||
func (c *Client) FavoritesPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) {
|
||||
return c.fetchListingPage(ctx, name, page, urls.Favorites, reqOpts)
|
||||
// FavoritesPage fetches a single page of /favorites/{name}/, addressed
|
||||
// by the cursor FA emitted on the previous page (empty string for the
|
||||
// first page). FA paginates favorites with a fave-ID cursor — not a
|
||||
// sequential page number — so the caller must walk forward by passing
|
||||
// the returned [ListingPage].NextPage value into the next call. Passing
|
||||
// a guessed cursor (e.g. "2") makes FA silently return the first page
|
||||
// and the loop will not terminate.
|
||||
func (c *Client) FavoritesPage(ctx context.Context, name string, cursor string, reqOpts ...Option) (*ListingPage, error) {
|
||||
out := &ListingPage{}
|
||||
err := c.fetch(ctx, urls.FavoritesCursor(name, cursor), func(doc *goquery.Document) error {
|
||||
items, nextURL, hasNext := parseListingPage(doc, c.cfg.jsonListings)
|
||||
out.Items = items
|
||||
out.HasNext = hasNext
|
||||
if hasNext {
|
||||
out.NextPage = favoritesCursorFromURL(nextURL)
|
||||
// If the markup was unrecognisable, refuse to claim a next
|
||||
// page rather than re-fetching the first one in a loop.
|
||||
if out.NextPage == "" {
|
||||
out.HasNext = false
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}, reqOpts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// fetchListingPage is the shared per-page primitive used by
|
||||
// GalleryPage / ScrapsPage / FavoritesPage and the iterator engine.
|
||||
func (c *Client) fetchListingPage(
|
||||
// fetchNumberedPage is the shared primitive for page-number-based
|
||||
// listings (Gallery / Scraps). urlFn picks the section-specific URL
|
||||
// builder; the rest of the pagination machinery is identical.
|
||||
func (c *Client) fetchNumberedPage(
|
||||
ctx context.Context,
|
||||
name string,
|
||||
page int,
|
||||
@@ -64,9 +124,14 @@ func (c *Client) fetchListingPage(
|
||||
if page < 1 {
|
||||
page = 1
|
||||
}
|
||||
out := &ListingPage{Page: page}
|
||||
out := &ListingPage{}
|
||||
err := c.fetch(ctx, urlFn(name, page), func(doc *goquery.Document) error {
|
||||
out.Items, out.HasNext = parseGalleryPage(doc, c.cfg.jsonListings)
|
||||
items, _, hasNext := parseListingPage(doc, c.cfg.jsonListings)
|
||||
out.Items = items
|
||||
out.HasNext = hasNext
|
||||
if hasNext {
|
||||
out.NextPage = strconv.Itoa(page + 1)
|
||||
}
|
||||
return nil
|
||||
}, reqOpts...)
|
||||
if err != nil {
|
||||
@@ -75,10 +140,10 @@ func (c *Client) fetchListingPage(
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// listGallerySection is the shared engine for Gallery / Scraps / Favorites.
|
||||
// urlFn picks the section-specific URL builder; the rest of the pagination
|
||||
// machinery is identical across all three sections.
|
||||
func (c *Client) listGallerySection(
|
||||
// listPagedSection is the shared engine for the page-number-based
|
||||
// listing iterators (Gallery / Scraps). Favorites has its own loop in
|
||||
// [Client.Favorites] because its pagination is cursor-based.
|
||||
func (c *Client) listPagedSection(
|
||||
ctx context.Context,
|
||||
name string,
|
||||
urlFn func(string, int) string,
|
||||
@@ -92,7 +157,7 @@ func (c *Client) listGallerySection(
|
||||
if opts.reachedLimit(pagesFetched) {
|
||||
return
|
||||
}
|
||||
lp, err := c.fetchListingPage(ctx, name, page, urlFn, reqOpts)
|
||||
lp, err := c.fetchNumberedPage(ctx, name, page, urlFn, reqOpts)
|
||||
if err != nil {
|
||||
yield(nil, err)
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user