GalleryPage / ScrapsPage / FavoritesPage return a ListingPage struct carrying the page items, the 1-based page number, and a HasNext flag that mirrors FA's "next page" link. This lets external scrapers drive their own pagination loop (checkpoint resume, parallel workers, custom throttling) without re-implementing the page-walking code. The existing iter.Seq2-shaped methods now share the same per-page primitive internally so behaviour stays in lock-step.
77 lines
2.7 KiB
Go
77 lines
2.7 KiB
Go
package fa
|
|
|
|
import (
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
// ListingPage is one page of a listing endpoint (Gallery / Scraps /
|
|
// Favorites). It carries everything an external caller needs to drive
|
|
// pagination by hand: the items, the 1-based page number that produced
|
|
// them, and whether FA exposed a "next page" link.
|
|
//
|
|
// External scrapers that want to manage their own loop (resume from a
|
|
// checkpoint, run pages in parallel, throttle differently) should call
|
|
// the per-page methods ([Client.GalleryPage], [Client.ScrapsPage],
|
|
// [Client.FavoritesPage]) and stop when HasNext is false. Callers that
|
|
// just want every item in order should keep using the iter.Seq2-shaped
|
|
// methods ([Client.Gallery] et al.), which use the same primitive
|
|
// internally.
|
|
type ListingPage struct {
|
|
Items []*Submission
|
|
HasNext bool
|
|
Page int // 1-based page number this result corresponds to
|
|
}
|
|
|
|
// ListOptions configures the pagination of a simple iterator method like
|
|
// [Client.Gallery] or [Client.Notes]. Filtered iterators ([Client.Search],
|
|
// [Client.Browse]) use their own option structs that fold the same fields
|
|
// in alongside their filter parameters.
|
|
//
|
|
// Zero values mean "use the SDK defaults": start at page 1, no upper bound
|
|
// on pages. Pass [ListOptions{MaxPages: 3}] to bound a crawl.
|
|
type ListOptions struct {
|
|
// StartPage is the 1-based page to begin iteration on. Zero or 1 = first
|
|
// page. Useful for resuming after a known-good page.
|
|
StartPage int
|
|
|
|
// MaxPages bounds the number of pages the iterator will request before
|
|
// stopping. Zero (the default) = unbounded; iteration stops when FA
|
|
// serves an empty page or omits the "next" link.
|
|
MaxPages int
|
|
}
|
|
|
|
// firstPage returns the effective starting page (≥ 1).
|
|
func (o ListOptions) firstPage() int {
|
|
if o.StartPage < 1 {
|
|
return 1
|
|
}
|
|
return o.StartPage
|
|
}
|
|
|
|
// reachedLimit reports whether the iterator has fetched MaxPages pages and
|
|
// should stop. Always false when MaxPages is 0 (unbounded).
|
|
func (o ListOptions) reachedLimit(pagesFetched int) bool {
|
|
return o.MaxPages > 0 && pagesFetched >= o.MaxPages
|
|
}
|
|
|
|
// detectNextPage returns true if doc shows there is a next page available.
|
|
// FA's beta theme renders pagination as either a Next form button or a
|
|
// hyperlink with a recognisable label.
|
|
func detectNextPage(doc *goquery.Document) bool {
|
|
if doc.Find("form button.button.standard:contains('Next')").Length() > 0 {
|
|
return true
|
|
}
|
|
hit := false
|
|
doc.Find("a.button.standard, a.button-link, a.pagination-next").EachWithBreak(func(_ int, sel *goquery.Selection) bool {
|
|
text := strings.ToLower(trimText(sel))
|
|
if strings.Contains(text, "next") || strings.Contains(text, "older") {
|
|
hit = true
|
|
return false
|
|
}
|
|
return true
|
|
})
|
|
return hit
|
|
}
|