Files
go-fa-api/pagination.go
SoXX 83487e531a fix(favorites): use cursor-based pagination instead of page numbers
FA's /favorites/{user}/ pagination is cursor-addressed by the fave-ID
of the last item on the previous page (e.g.
/favorites/{user}/1951234825/next), not by sequential integers. The
previous URL builder generated /favorites/{user}/{N}/ for N>=2; FA
interpreted that as a malformed cursor and silently returned page 1,
which caused the Favorites iterator to loop forever and the new
FavoritesPage to report HasNext=true on every call.

Changes:
- urls.Favorites(name) returns the first-page URL; new
  urls.FavoritesCursor(name, cursor) builds /favorites/.../next URLs.
- FavoritesPage now takes a cursor string; empty = first page.
  Returns ListingPage.NextPage as the opaque fave-ID for the next call.
- ListingPage gains NextPage string (decimal page number for
  Gallery/Scraps, fave-ID cursor for Favorites) and drops the Page int
  field that conflated those two notions.
- Client.Favorites iterator now walks cursors internally; StartPage
  is ignored for favorites (documented).
- detectNextPage / nextPageURL now parse the form action so the same
  helper works for both page-number and cursor pagination.
- Added regression test that fails on the infinite-loop bug.
- Example: examples/favorites_page demonstrates cursor walking.
2026-06-02 22:44:14 +02:00

123 lines
4.6 KiB
Go

package fa
import (
"strings"
"github.com/PuerkitoBio/goquery"
)
// ListingPage is one page of a listing endpoint (Gallery / Scraps /
// Favorites). It carries everything an external caller needs to drive
// pagination by hand: the items, whether FA exposed a "next page" link,
// and an opaque NextPage token to pass back into the next per-page call.
//
// External scrapers that want to manage their own loop (resume from a
// checkpoint, run pages in parallel, throttle differently) should call
// the per-page methods ([Client.GalleryPage], [Client.ScrapsPage],
// [Client.FavoritesPage]) and stop when HasNext is false. Callers that
// just want every item in order should keep using the iter.Seq2-shaped
// methods ([Client.Gallery] et al.), which walk pages internally.
//
// NextPage's contents differ by endpoint — for Gallery / Scraps it is
// the next 1-based page number as a decimal string ("2", "3", …); for
// Favorites it is the fave-ID cursor FA emits on the "Next" form
// (because favorites pagination is cursor-based, not page-number-based).
// Treat the value as opaque: pass whatever you got back to the next
// call without parsing.
type ListingPage struct {
Items []*Submission
HasNext bool
NextPage string // "" when !HasNext; otherwise the opaque token to pass back
}
// ListOptions configures the pagination of a simple iterator method like
// [Client.Gallery] or [Client.Notes]. Filtered iterators ([Client.Search],
// [Client.Browse]) use their own option structs that fold the same fields
// in alongside their filter parameters.
//
// Zero values mean "use the SDK defaults": start at page 1, no upper bound
// on pages. Pass [ListOptions{MaxPages: 3}] to bound a crawl.
type ListOptions struct {
// StartPage is the 1-based page to begin iteration on. Zero or 1 = first
// page. Useful for resuming after a known-good page.
StartPage int
// MaxPages bounds the number of pages the iterator will request before
// stopping. Zero (the default) = unbounded; iteration stops when FA
// serves an empty page or omits the "next" link.
MaxPages int
}
// firstPage returns the effective starting page (≥ 1).
func (o ListOptions) firstPage() int {
if o.StartPage < 1 {
return 1
}
return o.StartPage
}
// reachedLimit reports whether the iterator has fetched MaxPages pages and
// should stop. Always false when MaxPages is 0 (unbounded).
func (o ListOptions) reachedLimit(pagesFetched int) bool {
return o.MaxPages > 0 && pagesFetched >= o.MaxPages
}
// detectNextPage returns true if doc shows there is a next page available.
// FA's beta theme renders pagination as either a Next form button or a
// hyperlink with a recognisable label.
func detectNextPage(doc *goquery.Document) bool {
url, _ := nextPageURL(doc)
return url != ""
}
// nextPageURL returns the action/href that the "Next" pagination control
// would navigate to, along with a flag indicating whether one was found.
// Returns ("", false) on the last page (FA emits no Next form/anchor, or
// emits it inside an HTML comment that doesn't parse as an element).
func nextPageURL(doc *goquery.Document) (string, bool) {
var action string
doc.Find("form").EachWithBreak(func(_ int, f *goquery.Selection) bool {
if f.Find("button.button.standard:contains('Next')").Length() == 0 {
return true
}
action, _ = f.Attr("action")
return false
})
if action != "" {
return action, true
}
var href string
doc.Find("a.button.standard, a.button-link, a.pagination-next").EachWithBreak(func(_ int, sel *goquery.Selection) bool {
text := strings.ToLower(trimText(sel))
if strings.Contains(text, "next") || strings.Contains(text, "older") {
href, _ = sel.Attr("href")
return false
}
return true
})
if href == "" {
return "", false
}
return href, true
}
// favoritesCursorFromURL extracts the fave-ID cursor segment from a
// /favorites/{user}/{cursor}/next URL. Returns "" if the URL does not
// match that shape (in which case the caller treats the listing as
// exhausted rather than chasing a malformed cursor).
func favoritesCursorFromURL(rawURL string) string {
// Strip query / fragment, then split. Favorites paths can be relative
// ("/favorites/u/123/next") or absolute — handle both.
rawURL = strings.TrimPrefix(rawURL, "https://www.furaffinity.net")
rawURL = strings.TrimPrefix(rawURL, "http://www.furaffinity.net")
if i := strings.IndexAny(rawURL, "?#"); i >= 0 {
rawURL = rawURL[:i]
}
parts := strings.Split(strings.Trim(rawURL, "/"), "/")
// Expect ["favorites", "{user}", "{cursor}", "next"].
if len(parts) != 4 || parts[0] != "favorites" || parts[3] != "next" {
return ""
}
return parts[2]
}