fix(favorites): use cursor-based pagination instead of page numbers

FA's /favorites/{user}/ pagination is cursor-addressed by the fave-ID
of the last item on the previous page (e.g.
/favorites/{user}/1951234825/next), not by sequential integers. The
previous URL builder generated /favorites/{user}/{N}/ for N>=2; FA
interpreted that as a malformed cursor and silently returned page 1,
which caused the Favorites iterator to loop forever and the new
FavoritesPage to report HasNext=true on every call.

Changes:
- urls.Favorites(name) returns the first-page URL; new
  urls.FavoritesCursor(name, cursor) builds /favorites/.../next URLs.
- FavoritesPage now takes a cursor string; empty = first page.
  Returns ListingPage.NextPage as the opaque fave-ID for the next call.
- ListingPage gains NextPage string (decimal page number for
  Gallery/Scraps, fave-ID cursor for Favorites) and drops the Page int
  field that conflated those two notions.
- Client.Favorites iterator now walks cursors internally; StartPage
  is ignored for favorites (documented).
- detectNextPage / nextPageURL now parse the form action so the same
  helper works for both page-number and cursor pagination.
- Added regression test that fails on the infinite-loop bug.
- Example: examples/favorites_page demonstrates cursor walking.
This commit is contained in:
2026-06-02 22:44:14 +02:00
parent 8f4767966a
commit 83487e531a
7 changed files with 343 additions and 80 deletions

View File

@@ -8,20 +8,26 @@ import (
// ListingPage is one page of a listing endpoint (Gallery / Scraps /
// Favorites). It carries everything an external caller needs to drive
// pagination by hand: the items, the 1-based page number that produced
// them, and whether FA exposed a "next page" link.
// pagination by hand: the items, whether FA exposed a "next page" link,
// and an opaque NextPage token to pass back into the next per-page call.
//
// External scrapers that want to manage their own loop (resume from a
// checkpoint, run pages in parallel, throttle differently) should call
// the per-page methods ([Client.GalleryPage], [Client.ScrapsPage],
// [Client.FavoritesPage]) and stop when HasNext is false. Callers that
// just want every item in order should keep using the iter.Seq2-shaped
// methods ([Client.Gallery] et al.), which use the same primitive
// internally.
// methods ([Client.Gallery] et al.), which walk pages internally.
//
// NextPage's contents differ by endpoint — for Gallery / Scraps it is
// the next 1-based page number as a decimal string ("2", "3", …); for
// Favorites it is the fave-ID cursor FA emits on the "Next" form
// (because favorites pagination is cursor-based, not page-number-based).
// Treat the value as opaque: pass whatever you got back to the next
// call without parsing.
type ListingPage struct {
Items []*Submission
HasNext bool
Page int // 1-based page number this result corresponds to
Items []*Submission
HasNext bool
NextPage string // "" when !HasNext; otherwise the opaque token to pass back
}
// ListOptions configures the pagination of a simple iterator method like
@@ -60,17 +66,57 @@ func (o ListOptions) reachedLimit(pagesFetched int) bool {
// FA's beta theme renders pagination as either a Next form button or a
// hyperlink with a recognisable label.
func detectNextPage(doc *goquery.Document) bool {
if doc.Find("form button.button.standard:contains('Next')").Length() > 0 {
return true
url, _ := nextPageURL(doc)
return url != ""
}
// nextPageURL returns the action/href that the "Next" pagination control
// would navigate to, along with a flag indicating whether one was found.
// Returns ("", false) on the last page (FA emits no Next form/anchor, or
// emits it inside an HTML comment that doesn't parse as an element).
func nextPageURL(doc *goquery.Document) (string, bool) {
var action string
doc.Find("form").EachWithBreak(func(_ int, f *goquery.Selection) bool {
if f.Find("button.button.standard:contains('Next')").Length() == 0 {
return true
}
action, _ = f.Attr("action")
return false
})
if action != "" {
return action, true
}
hit := false
var href string
doc.Find("a.button.standard, a.button-link, a.pagination-next").EachWithBreak(func(_ int, sel *goquery.Selection) bool {
text := strings.ToLower(trimText(sel))
if strings.Contains(text, "next") || strings.Contains(text, "older") {
hit = true
href, _ = sel.Attr("href")
return false
}
return true
})
return hit
if href == "" {
return "", false
}
return href, true
}
// favoritesCursorFromURL extracts the fave-ID cursor segment from a
// /favorites/{user}/{cursor}/next URL. Returns "" if the URL does not
// match that shape (in which case the caller treats the listing as
// exhausted rather than chasing a malformed cursor).
func favoritesCursorFromURL(rawURL string) string {
// Strip query / fragment, then split. Favorites paths can be relative
// ("/favorites/u/123/next") or absolute — handle both.
rawURL = strings.TrimPrefix(rawURL, "https://www.furaffinity.net")
rawURL = strings.TrimPrefix(rawURL, "http://www.furaffinity.net")
if i := strings.IndexAny(rawURL, "?#"); i >= 0 {
rawURL = rawURL[:i]
}
parts := strings.Split(strings.Trim(rawURL, "/"), "/")
// Expect ["favorites", "{user}", "{cursor}", "next"].
if len(parts) != 4 || parts[0] != "favorites" || parts[3] != "next" {
return ""
}
return parts[2]
}