FA's /favorites/{user}/ pagination is cursor-addressed by the fave-ID
of the last item on the previous page (e.g.
/favorites/{user}/1951234825/next), not by sequential integers. The
previous URL builder generated /favorites/{user}/{N}/ for N>=2; FA
interpreted that as a malformed cursor and silently returned page 1,
which caused the Favorites iterator to loop forever and the new
FavoritesPage to report HasNext=true on every call.
Changes:
- urls.Favorites(name) returns the first-page URL; new
urls.FavoritesCursor(name, cursor) builds /favorites/.../next URLs.
- FavoritesPage now takes a cursor string; empty = first page.
Returns ListingPage.NextPage as the opaque fave-ID for the next call.
- ListingPage gains NextPage string (decimal page number for
Gallery/Scraps, fave-ID cursor for Favorites) and drops the Page int
field that conflated those two notions.
- Client.Favorites iterator now walks cursors internally; StartPage
is ignored for favorites (documented).
- detectNextPage / nextPageURL now parse the form action so the same
helper works for both page-number and cursor pagination.
- Added regression test that fails on the infinite-loop bug.
- Example: examples/favorites_page demonstrates cursor walking.
123 lines
4.6 KiB
Go
123 lines
4.6 KiB
Go
package fa
|
|
|
|
import (
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
// ListingPage is one page of a listing endpoint (Gallery / Scraps /
|
|
// Favorites). It carries everything an external caller needs to drive
|
|
// pagination by hand: the items, whether FA exposed a "next page" link,
|
|
// and an opaque NextPage token to pass back into the next per-page call.
|
|
//
|
|
// External scrapers that want to manage their own loop (resume from a
|
|
// checkpoint, run pages in parallel, throttle differently) should call
|
|
// the per-page methods ([Client.GalleryPage], [Client.ScrapsPage],
|
|
// [Client.FavoritesPage]) and stop when HasNext is false. Callers that
|
|
// just want every item in order should keep using the iter.Seq2-shaped
|
|
// methods ([Client.Gallery] et al.), which walk pages internally.
|
|
//
|
|
// NextPage's contents differ by endpoint — for Gallery / Scraps it is
|
|
// the next 1-based page number as a decimal string ("2", "3", …); for
|
|
// Favorites it is the fave-ID cursor FA emits on the "Next" form
|
|
// (because favorites pagination is cursor-based, not page-number-based).
|
|
// Treat the value as opaque: pass whatever you got back to the next
|
|
// call without parsing.
|
|
type ListingPage struct {
|
|
Items []*Submission
|
|
HasNext bool
|
|
NextPage string // "" when !HasNext; otherwise the opaque token to pass back
|
|
}
|
|
|
|
// ListOptions configures the pagination of a simple iterator method like
|
|
// [Client.Gallery] or [Client.Notes]. Filtered iterators ([Client.Search],
|
|
// [Client.Browse]) use their own option structs that fold the same fields
|
|
// in alongside their filter parameters.
|
|
//
|
|
// Zero values mean "use the SDK defaults": start at page 1, no upper bound
|
|
// on pages. Pass [ListOptions{MaxPages: 3}] to bound a crawl.
|
|
type ListOptions struct {
|
|
// StartPage is the 1-based page to begin iteration on. Zero or 1 = first
|
|
// page. Useful for resuming after a known-good page.
|
|
StartPage int
|
|
|
|
// MaxPages bounds the number of pages the iterator will request before
|
|
// stopping. Zero (the default) = unbounded; iteration stops when FA
|
|
// serves an empty page or omits the "next" link.
|
|
MaxPages int
|
|
}
|
|
|
|
// firstPage returns the effective starting page (≥ 1).
|
|
func (o ListOptions) firstPage() int {
|
|
if o.StartPage < 1 {
|
|
return 1
|
|
}
|
|
return o.StartPage
|
|
}
|
|
|
|
// reachedLimit reports whether the iterator has fetched MaxPages pages and
|
|
// should stop. Always false when MaxPages is 0 (unbounded).
|
|
func (o ListOptions) reachedLimit(pagesFetched int) bool {
|
|
return o.MaxPages > 0 && pagesFetched >= o.MaxPages
|
|
}
|
|
|
|
// detectNextPage returns true if doc shows there is a next page available.
|
|
// FA's beta theme renders pagination as either a Next form button or a
|
|
// hyperlink with a recognisable label.
|
|
func detectNextPage(doc *goquery.Document) bool {
|
|
url, _ := nextPageURL(doc)
|
|
return url != ""
|
|
}
|
|
|
|
// nextPageURL returns the action/href that the "Next" pagination control
|
|
// would navigate to, along with a flag indicating whether one was found.
|
|
// Returns ("", false) on the last page (FA emits no Next form/anchor, or
|
|
// emits it inside an HTML comment that doesn't parse as an element).
|
|
func nextPageURL(doc *goquery.Document) (string, bool) {
|
|
var action string
|
|
doc.Find("form").EachWithBreak(func(_ int, f *goquery.Selection) bool {
|
|
if f.Find("button.button.standard:contains('Next')").Length() == 0 {
|
|
return true
|
|
}
|
|
action, _ = f.Attr("action")
|
|
return false
|
|
})
|
|
if action != "" {
|
|
return action, true
|
|
}
|
|
var href string
|
|
doc.Find("a.button.standard, a.button-link, a.pagination-next").EachWithBreak(func(_ int, sel *goquery.Selection) bool {
|
|
text := strings.ToLower(trimText(sel))
|
|
if strings.Contains(text, "next") || strings.Contains(text, "older") {
|
|
href, _ = sel.Attr("href")
|
|
return false
|
|
}
|
|
return true
|
|
})
|
|
if href == "" {
|
|
return "", false
|
|
}
|
|
return href, true
|
|
}
|
|
|
|
// favoritesCursorFromURL extracts the fave-ID cursor segment from a
|
|
// /favorites/{user}/{cursor}/next URL. Returns "" if the URL does not
|
|
// match that shape (in which case the caller treats the listing as
|
|
// exhausted rather than chasing a malformed cursor).
|
|
func favoritesCursorFromURL(rawURL string) string {
|
|
// Strip query / fragment, then split. Favorites paths can be relative
|
|
// ("/favorites/u/123/next") or absolute — handle both.
|
|
rawURL = strings.TrimPrefix(rawURL, "https://www.furaffinity.net")
|
|
rawURL = strings.TrimPrefix(rawURL, "http://www.furaffinity.net")
|
|
if i := strings.IndexAny(rawURL, "?#"); i >= 0 {
|
|
rawURL = rawURL[:i]
|
|
}
|
|
parts := strings.Split(strings.Trim(rawURL, "/"), "/")
|
|
// Expect ["favorites", "{user}", "{cursor}", "next"].
|
|
if len(parts) != 4 || parts[0] != "favorites" || parts[3] != "next" {
|
|
return ""
|
|
}
|
|
return parts[2]
|
|
}
|