From 83487e531aabbb53725b620195610a316c01a58a Mon Sep 17 00:00:00 2001 From: SoXX Date: Tue, 2 Jun 2026 22:44:14 +0200 Subject: [PATCH] fix(favorites): use cursor-based pagination instead of page numbers FA's /favorites/{user}/ pagination is cursor-addressed by the fave-ID of the last item on the previous page (e.g. /favorites/{user}/1951234825/next), not by sequential integers. The previous URL builder generated /favorites/{user}/{N}/ for N>=2; FA interpreted that as a malformed cursor and silently returned page 1, which caused the Favorites iterator to loop forever and the new FavoritesPage to report HasNext=true on every call. Changes: - urls.Favorites(name) returns the first-page URL; new urls.FavoritesCursor(name, cursor) builds /favorites/.../next URLs. - FavoritesPage now takes a cursor string; empty = first page. Returns ListingPage.NextPage as the opaque fave-ID for the next call. - ListingPage gains NextPage string (decimal page number for Gallery/Scraps, fave-ID cursor for Favorites) and drops the Page int field that conflated those two notions. - Client.Favorites iterator now walks cursors internally; StartPage is ignored for favorites (documented). - detectNextPage / nextPageURL now parse the form action so the same helper works for both page-number and cursor pagination. - Added regression test that fails on the infinite-loop bug. - Example: examples/favorites_page demonstrates cursor walking. --- examples/favorites_page/main.go | 79 +++++++++++++++++++ fixtures_refresh_test.go | 2 +- gallery.go | 103 ++++++++++++++++++++----- gallery_page_test.go | 133 ++++++++++++++++++++++---------- gallery_parser.go | 13 +++- internal/urls/routes.go | 23 +++++- pagination.go | 70 ++++++++++++++--- 7 files changed, 343 insertions(+), 80 deletions(-) create mode 100644 examples/favorites_page/main.go diff --git a/examples/favorites_page/main.go b/examples/favorites_page/main.go new file mode 100644 index 0000000..f2e7e24 --- /dev/null +++ b/examples/favorites_page/main.go @@ -0,0 +1,79 @@ +// favorites_page exercises the per-page favorites listing API +// ([Client.FavoritesPage]) against the live FA site so a caller can see +// exactly what fields come back: HasNext, NextPage, len(Items), and a +// sample of the tag data lifted from each figure's data-tags attribute. +// +// Favorites pagination is cursor-based: each page returns an opaque +// NextPage token that addresses the next page. Pass it back in on the +// next call; treat empty as end-of-pagination. +// +// Required environment variables: +// +// FA_A — the `a` session cookie +// FA_B — the `b` session cookie +// CF_CLEARANCE — (optional) cf_clearance cookie if Cloudflare challenges +// FA_UA — (optional) User-Agent matching CF_CLEARANCE +// +// Usage: +// +// go run ./examples/favorites_page [maxPages] +package main + +import ( + "context" + "fmt" + "log" + "os" + "strconv" + + fa "git.anthrove.art/public/go-fa-api" +) + +func main() { + if len(os.Args) < 2 { + log.Fatalf("usage: %s [maxPages]", os.Args[0]) + } + user := os.Args[1] + maxPages := 0 + if len(os.Args) >= 3 { + if n, err := strconv.Atoi(os.Args[2]); err == nil && n > 0 { + maxPages = n + } + } + + opts := []fa.Option{ + fa.WithCookies(fa.Cookies{A: os.Getenv("FA_A"), B: os.Getenv("FA_B")}), + } + if cf := os.Getenv("CF_CLEARANCE"); cf != "" { + opts = append(opts, fa.WithCloudflare(fa.CFCookies{Clearance: cf})) + } + if ua := os.Getenv("FA_UA"); ua != "" { + opts = append(opts, fa.WithUserAgent(ua)) + } + client := fa.New(opts...) + + cursor := "" + pageNum := 0 + for { + pageNum++ + lp, err := client.FavoritesPage(context.Background(), user, cursor) + if err != nil { + log.Fatalf("FavoritesPage(cursor=%q): %v", cursor, err) + } + fmt.Printf("=== page %d cursor=%q items=%d HasNext=%v NextPage=%q ===\n", + pageNum, cursor, len(lp.Items), lp.HasNext, lp.NextPage) + for i, sub := range lp.Items { + fmt.Printf(" [%d] id=%d rating=%s author=%s title=%q\n", + i, sub.ID, sub.Rating, sub.Author.Name, sub.Title) + } + if !lp.HasNext { + fmt.Printf("\nreached end of pagination after %d page(s)\n", pageNum) + return + } + if maxPages > 0 && pageNum >= maxPages { + fmt.Printf("\nstopped at maxPages=%d (HasNext was still true; next cursor=%q)\n", maxPages, lp.NextPage) + return + } + cursor = lp.NextPage + } +} diff --git a/fixtures_refresh_test.go b/fixtures_refresh_test.go index fe725de..a936205 100644 --- a/fixtures_refresh_test.go +++ b/fixtures_refresh_test.go @@ -161,7 +161,7 @@ func TestRefreshFixtures(t *testing.T) { }, { name: "favorites_page1.html", - url: urls.Favorites(favoritesUser, 1), + url: urls.Favorites(favoritesUser), requires: []string{favoritesUser}, notes: "favorites per-item Author should be the original artist", }, diff --git a/gallery.go b/gallery.go index ce9c45b..e72d884 100644 --- a/gallery.go +++ b/gallery.go @@ -3,6 +3,7 @@ package fa import ( "context" "iter" + "strconv" "github.com/PuerkitoBio/goquery" @@ -16,19 +17,50 @@ import ( // / CategorizedTags parsed from the figure's data-tags attribute. Call // [Client.GetSubmission] with the ID to load the full record. func (c *Client) Gallery(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] { - return c.listGallerySection(ctx, name, urls.Gallery, opts, reqOpts) + return c.listPagedSection(ctx, name, urls.Gallery, opts, reqOpts) } // Scraps iterates the user's scraps folder. Same yield shape as Gallery. func (c *Client) Scraps(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] { - return c.listGallerySection(ctx, name, urls.Scraps, opts, reqOpts) + return c.listPagedSection(ctx, name, urls.Scraps, opts, reqOpts) } // Favorites iterates the user's favorited submissions. The yielded // *Submission's Author field reflects the original artist (not the user // whose favorites we are walking). +// +// Favorites use a fave-ID cursor for pagination, not sequential page +// numbers, so [ListOptions.StartPage] is ignored — the walk always +// begins at the newest favorite. [ListOptions.MaxPages] still bounds +// the crawl. func (c *Client) Favorites(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] { - return c.listGallerySection(ctx, name, urls.Favorites, opts, reqOpts) + return func(yield func(*Submission, error) bool) { + cursor := "" + pagesFetched := 0 + for { + if opts.reachedLimit(pagesFetched) { + return + } + lp, err := c.FavoritesPage(ctx, name, cursor, reqOpts...) + if err != nil { + yield(nil, err) + return + } + pagesFetched++ + if len(lp.Items) == 0 { + return + } + for _, s := range lp.Items { + if !yield(s, nil) { + return + } + } + if !lp.HasNext { + return + } + cursor = lp.NextPage + } + } } // GalleryPage fetches a single page of /gallery/{name}/ and returns the @@ -36,25 +68,53 @@ func (c *Client) Favorites(ctx context.Context, name string, opts ListOptions, r // 1 for the first page. Use this when driving pagination manually // (resuming from a checkpoint, distributing pages across workers); use // [Client.Gallery] when you just want every item in order. +// +// On a non-final page the returned [ListingPage].NextPage is the next +// page number as a decimal string ("2", "3", …) — pass it back to the +// next call after [strconv.Atoi], or treat it as opaque. func (c *Client) GalleryPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) { - return c.fetchListingPage(ctx, name, page, urls.Gallery, reqOpts) + return c.fetchNumberedPage(ctx, name, page, urls.Gallery, reqOpts) } // ScrapsPage is the single-page counterpart to [Client.Scraps]. See // [Client.GalleryPage] for usage notes. func (c *Client) ScrapsPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) { - return c.fetchListingPage(ctx, name, page, urls.Scraps, reqOpts) + return c.fetchNumberedPage(ctx, name, page, urls.Scraps, reqOpts) } -// FavoritesPage is the single-page counterpart to [Client.Favorites]. See -// [Client.GalleryPage] for usage notes. -func (c *Client) FavoritesPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) { - return c.fetchListingPage(ctx, name, page, urls.Favorites, reqOpts) +// FavoritesPage fetches a single page of /favorites/{name}/, addressed +// by the cursor FA emitted on the previous page (empty string for the +// first page). FA paginates favorites with a fave-ID cursor — not a +// sequential page number — so the caller must walk forward by passing +// the returned [ListingPage].NextPage value into the next call. Passing +// a guessed cursor (e.g. "2") makes FA silently return the first page +// and the loop will not terminate. +func (c *Client) FavoritesPage(ctx context.Context, name string, cursor string, reqOpts ...Option) (*ListingPage, error) { + out := &ListingPage{} + err := c.fetch(ctx, urls.FavoritesCursor(name, cursor), func(doc *goquery.Document) error { + items, nextURL, hasNext := parseListingPage(doc, c.cfg.jsonListings) + out.Items = items + out.HasNext = hasNext + if hasNext { + out.NextPage = favoritesCursorFromURL(nextURL) + // If the markup was unrecognisable, refuse to claim a next + // page rather than re-fetching the first one in a loop. + if out.NextPage == "" { + out.HasNext = false + } + } + return nil + }, reqOpts...) + if err != nil { + return nil, err + } + return out, nil } -// fetchListingPage is the shared per-page primitive used by -// GalleryPage / ScrapsPage / FavoritesPage and the iterator engine. -func (c *Client) fetchListingPage( +// fetchNumberedPage is the shared primitive for page-number-based +// listings (Gallery / Scraps). urlFn picks the section-specific URL +// builder; the rest of the pagination machinery is identical. +func (c *Client) fetchNumberedPage( ctx context.Context, name string, page int, @@ -64,9 +124,14 @@ func (c *Client) fetchListingPage( if page < 1 { page = 1 } - out := &ListingPage{Page: page} + out := &ListingPage{} err := c.fetch(ctx, urlFn(name, page), func(doc *goquery.Document) error { - out.Items, out.HasNext = parseGalleryPage(doc, c.cfg.jsonListings) + items, _, hasNext := parseListingPage(doc, c.cfg.jsonListings) + out.Items = items + out.HasNext = hasNext + if hasNext { + out.NextPage = strconv.Itoa(page + 1) + } return nil }, reqOpts...) if err != nil { @@ -75,10 +140,10 @@ func (c *Client) fetchListingPage( return out, nil } -// listGallerySection is the shared engine for Gallery / Scraps / Favorites. -// urlFn picks the section-specific URL builder; the rest of the pagination -// machinery is identical across all three sections. -func (c *Client) listGallerySection( +// listPagedSection is the shared engine for the page-number-based +// listing iterators (Gallery / Scraps). Favorites has its own loop in +// [Client.Favorites] because its pagination is cursor-based. +func (c *Client) listPagedSection( ctx context.Context, name string, urlFn func(string, int) string, @@ -92,7 +157,7 @@ func (c *Client) listGallerySection( if opts.reachedLimit(pagesFetched) { return } - lp, err := c.fetchListingPage(ctx, name, page, urlFn, reqOpts) + lp, err := c.fetchNumberedPage(ctx, name, page, urlFn, reqOpts) if err != nil { yield(nil, err) return diff --git a/gallery_page_test.go b/gallery_page_test.go index b549de1..e69fb9a 100644 --- a/gallery_page_test.go +++ b/gallery_page_test.go @@ -6,14 +6,15 @@ import ( "net/http" "net/http/httptest" "strings" + "sync" "sync/atomic" "testing" ) // fakeGalleryPage builds a minimal gallery-page response with two figures. -// hasNext controls whether the "Next" anchor is included so detectNextPage -// flips. -func fakeGalleryPage(startID int, hasNext bool) string { +// nextHref is the next-page URL emitted in the Next form; empty means no +// Next button (last page). +func fakeGalleryPage(startID int, nextHref string) string { var b strings.Builder b.WriteString(``) for i := 0; i < 2; i++ { @@ -29,8 +30,8 @@ func fakeGalleryPage(startID int, hasNext bool) string { `, id, id, id, id, id) } - if hasNext { - b.WriteString(`Next`) + if nextHref != "" { + fmt.Fprintf(&b, `
`, nextHref) } b.WriteString(``) return b.String() @@ -41,11 +42,11 @@ func TestGalleryPage_HasNextPropagates(t *testing.T) { mux := http.NewServeMux() mux.HandleFunc("/gallery/u/", func(w http.ResponseWriter, _ *http.Request) { requests.Add(1) - _, _ = w.Write([]byte(fakeGalleryPage(1000, true))) + _, _ = w.Write([]byte(fakeGalleryPage(1000, "/gallery/u/2/"))) }) mux.HandleFunc("/gallery/u/2/", func(w http.ResponseWriter, _ *http.Request) { requests.Add(1) - _, _ = w.Write([]byte(fakeGalleryPage(2000, false))) + _, _ = w.Write([]byte(fakeGalleryPage(2000, ""))) }) srv := httptest.NewServer(mux) defer srv.Close() @@ -55,19 +56,18 @@ func TestGalleryPage_HasNextPropagates(t *testing.T) { if err != nil { t.Fatalf("GalleryPage(1): %v", err) } - if first.Page != 1 { - t.Errorf("first.Page = %d; want 1", first.Page) - } if !first.HasNext { t.Error("first.HasNext = false; want true") } + if first.NextPage != "2" { + t.Errorf("first.NextPage = %q; want \"2\"", first.NextPage) + } if len(first.Items) != 2 { t.Fatalf("first.Items len = %d; want 2", len(first.Items)) } if first.Items[0].ID != 1000 { t.Errorf("first.Items[0].ID = %d; want 1000", first.Items[0].ID) } - // data-tags routed through to the page method too. if len(first.Items[0].Tags) == 0 || len(first.Items[0].CategorizedTags.Species) == 0 { t.Errorf("first.Items[0]: tags not populated from data-tags: %+v", first.Items[0]) } @@ -79,8 +79,8 @@ func TestGalleryPage_HasNextPropagates(t *testing.T) { if last.HasNext { t.Error("last.HasNext = true; want false (last page)") } - if last.Page != 2 { - t.Errorf("last.Page = %d; want 2", last.Page) + if last.NextPage != "" { + t.Errorf("last.NextPage = %q; want empty", last.NextPage) } if requests.Load() != 2 { @@ -88,30 +88,12 @@ func TestGalleryPage_HasNextPropagates(t *testing.T) { } } -func TestGalleryPage_ZeroPageDefaultsToOne(t *testing.T) { - mux := http.NewServeMux() - mux.HandleFunc("/gallery/u/", func(w http.ResponseWriter, _ *http.Request) { - _, _ = w.Write([]byte(fakeGalleryPage(1, false))) - }) - srv := httptest.NewServer(mux) - defer srv.Close() - client := newE2EClient(t, srv) - - page, err := client.GalleryPage(context.Background(), "u", 0) - if err != nil { - t.Fatalf("GalleryPage(0): %v", err) - } - if page.Page != 1 { - t.Errorf("page.Page = %d; want 1 (zero should normalise)", page.Page) - } -} - func TestScrapsPage_HitsScrapsRoute(t *testing.T) { var gotPath string mux := http.NewServeMux() mux.HandleFunc("/scraps/u/", func(w http.ResponseWriter, r *http.Request) { gotPath = r.URL.Path - _, _ = w.Write([]byte(fakeGalleryPage(1, false))) + _, _ = w.Write([]byte(fakeGalleryPage(1, ""))) }) srv := httptest.NewServer(mux) defer srv.Close() @@ -125,25 +107,92 @@ func TestScrapsPage_HitsScrapsRoute(t *testing.T) { } } -func TestFavoritesPage_HitsFavoritesRoute(t *testing.T) { - var gotPath string +func TestFavoritesPage_CursorChain(t *testing.T) { + var requests []string + var mu sync.Mutex + record := func(p string) { + mu.Lock() + requests = append(requests, p) + mu.Unlock() + } mux := http.NewServeMux() mux.HandleFunc("/favorites/u/", func(w http.ResponseWriter, r *http.Request) { - gotPath = r.URL.Path - _, _ = w.Write([]byte(fakeGalleryPage(1, true))) + record(r.URL.Path) + _, _ = w.Write([]byte(fakeGalleryPage(1000, "/favorites/u/9999/next"))) + }) + mux.HandleFunc("/favorites/u/9999/next", func(w http.ResponseWriter, r *http.Request) { + record(r.URL.Path) + _, _ = w.Write([]byte(fakeGalleryPage(2000, ""))) }) srv := httptest.NewServer(mux) defer srv.Close() client := newE2EClient(t, srv) - p, err := client.FavoritesPage(context.Background(), "u", 1) + first, err := client.FavoritesPage(context.Background(), "u", "") if err != nil { - t.Fatalf("FavoritesPage: %v", err) + t.Fatalf("FavoritesPage(first): %v", err) } - if gotPath != "/favorites/u/" { - t.Errorf("gotPath = %q; want /favorites/u/", gotPath) + if !first.HasNext { + t.Fatal("first.HasNext = false; want true") } - if !p.HasNext { - t.Error("p.HasNext = false; want true") + if first.NextPage != "9999" { + t.Errorf("first.NextPage = %q; want \"9999\" (cursor)", first.NextPage) + } + + last, err := client.FavoritesPage(context.Background(), "u", first.NextPage) + if err != nil { + t.Fatalf("FavoritesPage(cursor): %v", err) + } + if last.HasNext { + t.Error("last.HasNext = true; want false") + } + if last.NextPage != "" { + t.Errorf("last.NextPage = %q; want empty", last.NextPage) + } + + want := []string{"/favorites/u/", "/favorites/u/9999/next"} + mu.Lock() + defer mu.Unlock() + if len(requests) != len(want) { + t.Fatalf("requests = %v; want %v", requests, want) + } + for i, w := range want { + if requests[i] != w { + t.Errorf("requests[%d] = %q; want %q", i, requests[i], w) + } + } +} + +// TestFavorites_IteratorTerminates guards against the cursor-loop +// regression that brought us here: with sequential page numbers, the +// Favorites iterator never terminated because FA fell back to page 1 +// for every fake-numbered cursor. +func TestFavorites_IteratorTerminates(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/favorites/u/", func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(fakeGalleryPage(1, "/favorites/u/42/next"))) + }) + mux.HandleFunc("/favorites/u/42/next", func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(fakeGalleryPage(3, ""))) + }) + srv := httptest.NewServer(mux) + defer srv.Close() + client := newE2EClient(t, srv) + + count := 0 + for sub, err := range client.Favorites(context.Background(), "u", ListOptions{}) { + if err != nil { + t.Fatalf("Favorites: %v", err) + } + if sub == nil { + t.Fatal("nil sub") + } + count++ + if count > 10 { + t.Fatalf("iterator did not terminate; count > 10") + } + } + if count != 4 { + t.Errorf("count = %d; want 4 (2 per page * 2 pages)", count) } } diff --git a/gallery_parser.go b/gallery_parser.go index 037ceca..113e3ce 100644 --- a/gallery_parser.go +++ b/gallery_parser.go @@ -19,6 +19,15 @@ import ( // pure HTML the same behaviour as before [WithExperimentalJSONListings] // existed. func parseGalleryPage(doc *goquery.Document, useJSON bool) (items []*Submission, hasNext bool) { + items, _, hasNext = parseListingPage(doc, useJSON) + return items, hasNext +} + +// parseListingPage parses one page of a listing endpoint and also returns +// the raw next-page URL FA emits in its "Next" pagination form. Callers +// that need to chain across cursor-based pages (Favorites) consume the +// URL; callers that don't (Gallery / Scraps) can ignore it. +func parseListingPage(doc *goquery.Document, useJSON bool) (items []*Submission, nextURL string, hasNext bool) { var jsonData listingJSONMap if useJSON { jsonData = readListingJSON(doc) @@ -28,8 +37,8 @@ func parseGalleryPage(doc *goquery.Document, useJSON bool) (items []*Submission, items = append(items, s) } }) - hasNext = detectNextPage(doc) - return items, hasNext + nextURL, hasNext = nextPageURL(doc) + return items, nextURL, hasNext } // parseGalleryFigure lifts a single submission preview from a diff --git a/internal/urls/routes.go b/internal/urls/routes.go index 4f07280..76c4847 100644 --- a/internal/urls/routes.go +++ b/internal/urls/routes.go @@ -36,10 +36,25 @@ func Scraps(name string, page int) string { return Host + "/scraps/" + safeName(name) + "/" + pageSegment(page) } -// Favorites returns the URL for a user's favorites page. FA uses a numeric -// page parameter; the first page is 1. -func Favorites(name string, page int) string { - return Host + "/favorites/" + safeName(name) + "/" + pageSegment(page) +// Favorites returns the URL for the first page of a user's favorites. +// FA paginates favorites with a fave-ID cursor (see [FavoritesCursor]), +// not sequential page numbers — passing /favorites/{user}/{N}/ with a +// small integer N silently falls back to the first page. Use this for +// the first page only; follow the cursor returned in [ListingPage].NextPage +// for subsequent pages. +func Favorites(name string) string { + return Host + "/favorites/" + safeName(name) + "/" +} + +// FavoritesCursor returns the URL for a follow-up favorites page, +// addressed by the fave-ID cursor FA emits on the previous page's "Next" +// form (e.g. /favorites/{user}/1951234825/next). The cursor is opaque +// to the SDK — pass through whatever [ListingPage].NextPage gave you. +func FavoritesCursor(name, cursor string) string { + if cursor == "" { + return Favorites(name) + } + return Host + "/favorites/" + safeName(name) + "/" + cursor + "/next" } // Journal returns the URL for a single journal entry. diff --git a/pagination.go b/pagination.go index 2788334..cd5f5c2 100644 --- a/pagination.go +++ b/pagination.go @@ -8,20 +8,26 @@ import ( // ListingPage is one page of a listing endpoint (Gallery / Scraps / // Favorites). It carries everything an external caller needs to drive -// pagination by hand: the items, the 1-based page number that produced -// them, and whether FA exposed a "next page" link. +// pagination by hand: the items, whether FA exposed a "next page" link, +// and an opaque NextPage token to pass back into the next per-page call. // // External scrapers that want to manage their own loop (resume from a // checkpoint, run pages in parallel, throttle differently) should call // the per-page methods ([Client.GalleryPage], [Client.ScrapsPage], // [Client.FavoritesPage]) and stop when HasNext is false. Callers that // just want every item in order should keep using the iter.Seq2-shaped -// methods ([Client.Gallery] et al.), which use the same primitive -// internally. +// methods ([Client.Gallery] et al.), which walk pages internally. +// +// NextPage's contents differ by endpoint — for Gallery / Scraps it is +// the next 1-based page number as a decimal string ("2", "3", …); for +// Favorites it is the fave-ID cursor FA emits on the "Next" form +// (because favorites pagination is cursor-based, not page-number-based). +// Treat the value as opaque: pass whatever you got back to the next +// call without parsing. type ListingPage struct { - Items []*Submission - HasNext bool - Page int // 1-based page number this result corresponds to + Items []*Submission + HasNext bool + NextPage string // "" when !HasNext; otherwise the opaque token to pass back } // ListOptions configures the pagination of a simple iterator method like @@ -60,17 +66,57 @@ func (o ListOptions) reachedLimit(pagesFetched int) bool { // FA's beta theme renders pagination as either a Next form button or a // hyperlink with a recognisable label. func detectNextPage(doc *goquery.Document) bool { - if doc.Find("form button.button.standard:contains('Next')").Length() > 0 { - return true + url, _ := nextPageURL(doc) + return url != "" +} + +// nextPageURL returns the action/href that the "Next" pagination control +// would navigate to, along with a flag indicating whether one was found. +// Returns ("", false) on the last page (FA emits no Next form/anchor, or +// emits it inside an HTML comment that doesn't parse as an element). +func nextPageURL(doc *goquery.Document) (string, bool) { + var action string + doc.Find("form").EachWithBreak(func(_ int, f *goquery.Selection) bool { + if f.Find("button.button.standard:contains('Next')").Length() == 0 { + return true + } + action, _ = f.Attr("action") + return false + }) + if action != "" { + return action, true } - hit := false + var href string doc.Find("a.button.standard, a.button-link, a.pagination-next").EachWithBreak(func(_ int, sel *goquery.Selection) bool { text := strings.ToLower(trimText(sel)) if strings.Contains(text, "next") || strings.Contains(text, "older") { - hit = true + href, _ = sel.Attr("href") return false } return true }) - return hit + if href == "" { + return "", false + } + return href, true +} + +// favoritesCursorFromURL extracts the fave-ID cursor segment from a +// /favorites/{user}/{cursor}/next URL. Returns "" if the URL does not +// match that shape (in which case the caller treats the listing as +// exhausted rather than chasing a malformed cursor). +func favoritesCursorFromURL(rawURL string) string { + // Strip query / fragment, then split. Favorites paths can be relative + // ("/favorites/u/123/next") or absolute — handle both. + rawURL = strings.TrimPrefix(rawURL, "https://www.furaffinity.net") + rawURL = strings.TrimPrefix(rawURL, "http://www.furaffinity.net") + if i := strings.IndexAny(rawURL, "?#"); i >= 0 { + rawURL = rawURL[:i] + } + parts := strings.Split(strings.Trim(rawURL, "/"), "/") + // Expect ["favorites", "{user}", "{cursor}", "next"]. + if len(parts) != 4 || parts[0] != "favorites" || parts[3] != "next" { + return "" + } + return parts[2] }