package fa import ( "strings" "github.com/PuerkitoBio/goquery" ) // ListingPage is one page of a listing endpoint (Gallery / Scraps / // Favorites). It carries everything an external caller needs to drive // pagination by hand: the items, whether FA exposed a "next page" link, // and an opaque NextPage token to pass back into the next per-page call. // // External scrapers that want to manage their own loop (resume from a // checkpoint, run pages in parallel, throttle differently) should call // the per-page methods ([Client.GalleryPage], [Client.ScrapsPage], // [Client.FavoritesPage]) and stop when HasNext is false. Callers that // just want every item in order should keep using the iter.Seq2-shaped // methods ([Client.Gallery] et al.), which walk pages internally. // // NextPage's contents differ by endpoint — for Gallery / Scraps it is // the next 1-based page number as a decimal string ("2", "3", …); for // Favorites it is the fave-ID cursor FA emits on the "Next" form // (because favorites pagination is cursor-based, not page-number-based). // Treat the value as opaque: pass whatever you got back to the next // call without parsing. type ListingPage struct { Items []*Submission HasNext bool NextPage string // "" when !HasNext; otherwise the opaque token to pass back } // ListOptions configures the pagination of a simple iterator method like // [Client.Gallery] or [Client.Notes]. Filtered iterators ([Client.Search], // [Client.Browse]) use their own option structs that fold the same fields // in alongside their filter parameters. // // Zero values mean "use the SDK defaults": start at page 1, no upper bound // on pages. Pass [ListOptions{MaxPages: 3}] to bound a crawl. type ListOptions struct { // StartPage is the 1-based page to begin iteration on. Zero or 1 = first // page. Useful for resuming after a known-good page. StartPage int // MaxPages bounds the number of pages the iterator will request before // stopping. Zero (the default) = unbounded; iteration stops when FA // serves an empty page or omits the "next" link. MaxPages int } // firstPage returns the effective starting page (≥ 1). func (o ListOptions) firstPage() int { if o.StartPage < 1 { return 1 } return o.StartPage } // reachedLimit reports whether the iterator has fetched MaxPages pages and // should stop. Always false when MaxPages is 0 (unbounded). func (o ListOptions) reachedLimit(pagesFetched int) bool { return o.MaxPages > 0 && pagesFetched >= o.MaxPages } // detectNextPage returns true if doc shows there is a next page available. // FA's beta theme renders pagination as either a Next form button or a // hyperlink with a recognisable label. func detectNextPage(doc *goquery.Document) bool { url, _ := nextPageURL(doc) return url != "" } // nextPageURL returns the action/href that the "Next" pagination control // would navigate to, along with a flag indicating whether one was found. // Returns ("", false) on the last page (FA emits no Next form/anchor, or // emits it inside an HTML comment that doesn't parse as an element). func nextPageURL(doc *goquery.Document) (string, bool) { var action string doc.Find("form").EachWithBreak(func(_ int, f *goquery.Selection) bool { if f.Find("button.button.standard:contains('Next')").Length() == 0 { return true } action, _ = f.Attr("action") return false }) if action != "" { return action, true } var href string doc.Find("a.button.standard, a.button-link, a.pagination-next").EachWithBreak(func(_ int, sel *goquery.Selection) bool { text := strings.ToLower(trimText(sel)) if strings.Contains(text, "next") || strings.Contains(text, "older") { href, _ = sel.Attr("href") return false } return true }) if href == "" { return "", false } return href, true } // favoritesCursorFromURL extracts the fave-ID cursor segment from a // /favorites/{user}/{cursor}/next URL. Returns "" if the URL does not // match that shape (in which case the caller treats the listing as // exhausted rather than chasing a malformed cursor). func favoritesCursorFromURL(rawURL string) string { // Strip query / fragment, then split. Favorites paths can be relative // ("/favorites/u/123/next") or absolute — handle both. rawURL = strings.TrimPrefix(rawURL, "https://www.furaffinity.net") rawURL = strings.TrimPrefix(rawURL, "http://www.furaffinity.net") if i := strings.IndexAny(rawURL, "?#"); i >= 0 { rawURL = rawURL[:i] } parts := strings.Split(strings.Trim(rawURL, "/"), "/") // Expect ["favorites", "{user}", "{cursor}", "next"]. if len(parts) != 4 || parts[0] != "favorites" || parts[3] != "next" { return "" } return parts[2] }