fix(favorites): use cursor-based pagination instead of page numbers
FA's /favorites/{user}/ pagination is cursor-addressed by the fave-ID
of the last item on the previous page (e.g.
/favorites/{user}/1951234825/next), not by sequential integers. The
previous URL builder generated /favorites/{user}/{N}/ for N>=2; FA
interpreted that as a malformed cursor and silently returned page 1,
which caused the Favorites iterator to loop forever and the new
FavoritesPage to report HasNext=true on every call.
Changes:
- urls.Favorites(name) returns the first-page URL; new
urls.FavoritesCursor(name, cursor) builds /favorites/.../next URLs.
- FavoritesPage now takes a cursor string; empty = first page.
Returns ListingPage.NextPage as the opaque fave-ID for the next call.
- ListingPage gains NextPage string (decimal page number for
Gallery/Scraps, fave-ID cursor for Favorites) and drops the Page int
field that conflated those two notions.
- Client.Favorites iterator now walks cursors internally; StartPage
is ignored for favorites (documented).
- detectNextPage / nextPageURL now parse the form action so the same
helper works for both page-number and cursor pagination.
- Added regression test that fails on the infinite-loop bug.
- Example: examples/favorites_page demonstrates cursor walking.
This commit is contained in:
@@ -8,20 +8,26 @@ import (
|
||||
|
||||
// ListingPage is one page of a listing endpoint (Gallery / Scraps /
|
||||
// Favorites). It carries everything an external caller needs to drive
|
||||
// pagination by hand: the items, the 1-based page number that produced
|
||||
// them, and whether FA exposed a "next page" link.
|
||||
// pagination by hand: the items, whether FA exposed a "next page" link,
|
||||
// and an opaque NextPage token to pass back into the next per-page call.
|
||||
//
|
||||
// External scrapers that want to manage their own loop (resume from a
|
||||
// checkpoint, run pages in parallel, throttle differently) should call
|
||||
// the per-page methods ([Client.GalleryPage], [Client.ScrapsPage],
|
||||
// [Client.FavoritesPage]) and stop when HasNext is false. Callers that
|
||||
// just want every item in order should keep using the iter.Seq2-shaped
|
||||
// methods ([Client.Gallery] et al.), which use the same primitive
|
||||
// internally.
|
||||
// methods ([Client.Gallery] et al.), which walk pages internally.
|
||||
//
|
||||
// NextPage's contents differ by endpoint — for Gallery / Scraps it is
|
||||
// the next 1-based page number as a decimal string ("2", "3", …); for
|
||||
// Favorites it is the fave-ID cursor FA emits on the "Next" form
|
||||
// (because favorites pagination is cursor-based, not page-number-based).
|
||||
// Treat the value as opaque: pass whatever you got back to the next
|
||||
// call without parsing.
|
||||
type ListingPage struct {
|
||||
Items []*Submission
|
||||
HasNext bool
|
||||
Page int // 1-based page number this result corresponds to
|
||||
Items []*Submission
|
||||
HasNext bool
|
||||
NextPage string // "" when !HasNext; otherwise the opaque token to pass back
|
||||
}
|
||||
|
||||
// ListOptions configures the pagination of a simple iterator method like
|
||||
@@ -60,17 +66,57 @@ func (o ListOptions) reachedLimit(pagesFetched int) bool {
|
||||
// FA's beta theme renders pagination as either a Next form button or a
|
||||
// hyperlink with a recognisable label.
|
||||
func detectNextPage(doc *goquery.Document) bool {
|
||||
if doc.Find("form button.button.standard:contains('Next')").Length() > 0 {
|
||||
return true
|
||||
url, _ := nextPageURL(doc)
|
||||
return url != ""
|
||||
}
|
||||
|
||||
// nextPageURL returns the action/href that the "Next" pagination control
|
||||
// would navigate to, along with a flag indicating whether one was found.
|
||||
// Returns ("", false) on the last page (FA emits no Next form/anchor, or
|
||||
// emits it inside an HTML comment that doesn't parse as an element).
|
||||
func nextPageURL(doc *goquery.Document) (string, bool) {
|
||||
var action string
|
||||
doc.Find("form").EachWithBreak(func(_ int, f *goquery.Selection) bool {
|
||||
if f.Find("button.button.standard:contains('Next')").Length() == 0 {
|
||||
return true
|
||||
}
|
||||
action, _ = f.Attr("action")
|
||||
return false
|
||||
})
|
||||
if action != "" {
|
||||
return action, true
|
||||
}
|
||||
hit := false
|
||||
var href string
|
||||
doc.Find("a.button.standard, a.button-link, a.pagination-next").EachWithBreak(func(_ int, sel *goquery.Selection) bool {
|
||||
text := strings.ToLower(trimText(sel))
|
||||
if strings.Contains(text, "next") || strings.Contains(text, "older") {
|
||||
hit = true
|
||||
href, _ = sel.Attr("href")
|
||||
return false
|
||||
}
|
||||
return true
|
||||
})
|
||||
return hit
|
||||
if href == "" {
|
||||
return "", false
|
||||
}
|
||||
return href, true
|
||||
}
|
||||
|
||||
// favoritesCursorFromURL extracts the fave-ID cursor segment from a
|
||||
// /favorites/{user}/{cursor}/next URL. Returns "" if the URL does not
|
||||
// match that shape (in which case the caller treats the listing as
|
||||
// exhausted rather than chasing a malformed cursor).
|
||||
func favoritesCursorFromURL(rawURL string) string {
|
||||
// Strip query / fragment, then split. Favorites paths can be relative
|
||||
// ("/favorites/u/123/next") or absolute — handle both.
|
||||
rawURL = strings.TrimPrefix(rawURL, "https://www.furaffinity.net")
|
||||
rawURL = strings.TrimPrefix(rawURL, "http://www.furaffinity.net")
|
||||
if i := strings.IndexAny(rawURL, "?#"); i >= 0 {
|
||||
rawURL = rawURL[:i]
|
||||
}
|
||||
parts := strings.Split(strings.Trim(rawURL, "/"), "/")
|
||||
// Expect ["favorites", "{user}", "{cursor}", "next"].
|
||||
if len(parts) != 4 || parts[0] != "favorites" || parts[3] != "next" {
|
||||
return ""
|
||||
}
|
||||
return parts[2]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user