fix(favorites): use cursor-based pagination instead of page numbers
FA's /favorites/{user}/ pagination is cursor-addressed by the fave-ID
of the last item on the previous page (e.g.
/favorites/{user}/1951234825/next), not by sequential integers. The
previous URL builder generated /favorites/{user}/{N}/ for N>=2; FA
interpreted that as a malformed cursor and silently returned page 1,
which caused the Favorites iterator to loop forever and the new
FavoritesPage to report HasNext=true on every call.
Changes:
- urls.Favorites(name) returns the first-page URL; new
urls.FavoritesCursor(name, cursor) builds /favorites/.../next URLs.
- FavoritesPage now takes a cursor string; empty = first page.
Returns ListingPage.NextPage as the opaque fave-ID for the next call.
- ListingPage gains NextPage string (decimal page number for
Gallery/Scraps, fave-ID cursor for Favorites) and drops the Page int
field that conflated those two notions.
- Client.Favorites iterator now walks cursors internally; StartPage
is ignored for favorites (documented).
- detectNextPage / nextPageURL now parse the form action so the same
helper works for both page-number and cursor pagination.
- Added regression test that fails on the infinite-loop bug.
- Example: examples/favorites_page demonstrates cursor walking.
This commit is contained in:
79
examples/favorites_page/main.go
Normal file
79
examples/favorites_page/main.go
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
// favorites_page exercises the per-page favorites listing API
|
||||||
|
// ([Client.FavoritesPage]) against the live FA site so a caller can see
|
||||||
|
// exactly what fields come back: HasNext, NextPage, len(Items), and a
|
||||||
|
// sample of the tag data lifted from each figure's data-tags attribute.
|
||||||
|
//
|
||||||
|
// Favorites pagination is cursor-based: each page returns an opaque
|
||||||
|
// NextPage token that addresses the next page. Pass it back in on the
|
||||||
|
// next call; treat empty as end-of-pagination.
|
||||||
|
//
|
||||||
|
// Required environment variables:
|
||||||
|
//
|
||||||
|
// FA_A — the `a` session cookie
|
||||||
|
// FA_B — the `b` session cookie
|
||||||
|
// CF_CLEARANCE — (optional) cf_clearance cookie if Cloudflare challenges
|
||||||
|
// FA_UA — (optional) User-Agent matching CF_CLEARANCE
|
||||||
|
//
|
||||||
|
// Usage:
|
||||||
|
//
|
||||||
|
// go run ./examples/favorites_page <username> [maxPages]
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
fa "git.anthrove.art/public/go-fa-api"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
if len(os.Args) < 2 {
|
||||||
|
log.Fatalf("usage: %s <username> [maxPages]", os.Args[0])
|
||||||
|
}
|
||||||
|
user := os.Args[1]
|
||||||
|
maxPages := 0
|
||||||
|
if len(os.Args) >= 3 {
|
||||||
|
if n, err := strconv.Atoi(os.Args[2]); err == nil && n > 0 {
|
||||||
|
maxPages = n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := []fa.Option{
|
||||||
|
fa.WithCookies(fa.Cookies{A: os.Getenv("FA_A"), B: os.Getenv("FA_B")}),
|
||||||
|
}
|
||||||
|
if cf := os.Getenv("CF_CLEARANCE"); cf != "" {
|
||||||
|
opts = append(opts, fa.WithCloudflare(fa.CFCookies{Clearance: cf}))
|
||||||
|
}
|
||||||
|
if ua := os.Getenv("FA_UA"); ua != "" {
|
||||||
|
opts = append(opts, fa.WithUserAgent(ua))
|
||||||
|
}
|
||||||
|
client := fa.New(opts...)
|
||||||
|
|
||||||
|
cursor := ""
|
||||||
|
pageNum := 0
|
||||||
|
for {
|
||||||
|
pageNum++
|
||||||
|
lp, err := client.FavoritesPage(context.Background(), user, cursor)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("FavoritesPage(cursor=%q): %v", cursor, err)
|
||||||
|
}
|
||||||
|
fmt.Printf("=== page %d cursor=%q items=%d HasNext=%v NextPage=%q ===\n",
|
||||||
|
pageNum, cursor, len(lp.Items), lp.HasNext, lp.NextPage)
|
||||||
|
for i, sub := range lp.Items {
|
||||||
|
fmt.Printf(" [%d] id=%d rating=%s author=%s title=%q\n",
|
||||||
|
i, sub.ID, sub.Rating, sub.Author.Name, sub.Title)
|
||||||
|
}
|
||||||
|
if !lp.HasNext {
|
||||||
|
fmt.Printf("\nreached end of pagination after %d page(s)\n", pageNum)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if maxPages > 0 && pageNum >= maxPages {
|
||||||
|
fmt.Printf("\nstopped at maxPages=%d (HasNext was still true; next cursor=%q)\n", maxPages, lp.NextPage)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
cursor = lp.NextPage
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -161,7 +161,7 @@ func TestRefreshFixtures(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "favorites_page1.html",
|
name: "favorites_page1.html",
|
||||||
url: urls.Favorites(favoritesUser, 1),
|
url: urls.Favorites(favoritesUser),
|
||||||
requires: []string{favoritesUser},
|
requires: []string{favoritesUser},
|
||||||
notes: "favorites per-item Author should be the original artist",
|
notes: "favorites per-item Author should be the original artist",
|
||||||
},
|
},
|
||||||
|
|||||||
103
gallery.go
103
gallery.go
@@ -3,6 +3,7 @@ package fa
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"iter"
|
"iter"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
|
||||||
@@ -16,19 +17,50 @@ import (
|
|||||||
// / CategorizedTags parsed from the figure's data-tags attribute. Call
|
// / CategorizedTags parsed from the figure's data-tags attribute. Call
|
||||||
// [Client.GetSubmission] with the ID to load the full record.
|
// [Client.GetSubmission] with the ID to load the full record.
|
||||||
func (c *Client) Gallery(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] {
|
func (c *Client) Gallery(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] {
|
||||||
return c.listGallerySection(ctx, name, urls.Gallery, opts, reqOpts)
|
return c.listPagedSection(ctx, name, urls.Gallery, opts, reqOpts)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scraps iterates the user's scraps folder. Same yield shape as Gallery.
|
// Scraps iterates the user's scraps folder. Same yield shape as Gallery.
|
||||||
func (c *Client) Scraps(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] {
|
func (c *Client) Scraps(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] {
|
||||||
return c.listGallerySection(ctx, name, urls.Scraps, opts, reqOpts)
|
return c.listPagedSection(ctx, name, urls.Scraps, opts, reqOpts)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Favorites iterates the user's favorited submissions. The yielded
|
// Favorites iterates the user's favorited submissions. The yielded
|
||||||
// *Submission's Author field reflects the original artist (not the user
|
// *Submission's Author field reflects the original artist (not the user
|
||||||
// whose favorites we are walking).
|
// whose favorites we are walking).
|
||||||
|
//
|
||||||
|
// Favorites use a fave-ID cursor for pagination, not sequential page
|
||||||
|
// numbers, so [ListOptions.StartPage] is ignored — the walk always
|
||||||
|
// begins at the newest favorite. [ListOptions.MaxPages] still bounds
|
||||||
|
// the crawl.
|
||||||
func (c *Client) Favorites(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] {
|
func (c *Client) Favorites(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] {
|
||||||
return c.listGallerySection(ctx, name, urls.Favorites, opts, reqOpts)
|
return func(yield func(*Submission, error) bool) {
|
||||||
|
cursor := ""
|
||||||
|
pagesFetched := 0
|
||||||
|
for {
|
||||||
|
if opts.reachedLimit(pagesFetched) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
lp, err := c.FavoritesPage(ctx, name, cursor, reqOpts...)
|
||||||
|
if err != nil {
|
||||||
|
yield(nil, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
pagesFetched++
|
||||||
|
if len(lp.Items) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, s := range lp.Items {
|
||||||
|
if !yield(s, nil) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !lp.HasNext {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
cursor = lp.NextPage
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// GalleryPage fetches a single page of /gallery/{name}/ and returns the
|
// GalleryPage fetches a single page of /gallery/{name}/ and returns the
|
||||||
@@ -36,25 +68,53 @@ func (c *Client) Favorites(ctx context.Context, name string, opts ListOptions, r
|
|||||||
// 1 for the first page. Use this when driving pagination manually
|
// 1 for the first page. Use this when driving pagination manually
|
||||||
// (resuming from a checkpoint, distributing pages across workers); use
|
// (resuming from a checkpoint, distributing pages across workers); use
|
||||||
// [Client.Gallery] when you just want every item in order.
|
// [Client.Gallery] when you just want every item in order.
|
||||||
|
//
|
||||||
|
// On a non-final page the returned [ListingPage].NextPage is the next
|
||||||
|
// page number as a decimal string ("2", "3", …) — pass it back to the
|
||||||
|
// next call after [strconv.Atoi], or treat it as opaque.
|
||||||
func (c *Client) GalleryPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) {
|
func (c *Client) GalleryPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) {
|
||||||
return c.fetchListingPage(ctx, name, page, urls.Gallery, reqOpts)
|
return c.fetchNumberedPage(ctx, name, page, urls.Gallery, reqOpts)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ScrapsPage is the single-page counterpart to [Client.Scraps]. See
|
// ScrapsPage is the single-page counterpart to [Client.Scraps]. See
|
||||||
// [Client.GalleryPage] for usage notes.
|
// [Client.GalleryPage] for usage notes.
|
||||||
func (c *Client) ScrapsPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) {
|
func (c *Client) ScrapsPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) {
|
||||||
return c.fetchListingPage(ctx, name, page, urls.Scraps, reqOpts)
|
return c.fetchNumberedPage(ctx, name, page, urls.Scraps, reqOpts)
|
||||||
}
|
}
|
||||||
|
|
||||||
// FavoritesPage is the single-page counterpart to [Client.Favorites]. See
|
// FavoritesPage fetches a single page of /favorites/{name}/, addressed
|
||||||
// [Client.GalleryPage] for usage notes.
|
// by the cursor FA emitted on the previous page (empty string for the
|
||||||
func (c *Client) FavoritesPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) {
|
// first page). FA paginates favorites with a fave-ID cursor — not a
|
||||||
return c.fetchListingPage(ctx, name, page, urls.Favorites, reqOpts)
|
// sequential page number — so the caller must walk forward by passing
|
||||||
|
// the returned [ListingPage].NextPage value into the next call. Passing
|
||||||
|
// a guessed cursor (e.g. "2") makes FA silently return the first page
|
||||||
|
// and the loop will not terminate.
|
||||||
|
func (c *Client) FavoritesPage(ctx context.Context, name string, cursor string, reqOpts ...Option) (*ListingPage, error) {
|
||||||
|
out := &ListingPage{}
|
||||||
|
err := c.fetch(ctx, urls.FavoritesCursor(name, cursor), func(doc *goquery.Document) error {
|
||||||
|
items, nextURL, hasNext := parseListingPage(doc, c.cfg.jsonListings)
|
||||||
|
out.Items = items
|
||||||
|
out.HasNext = hasNext
|
||||||
|
if hasNext {
|
||||||
|
out.NextPage = favoritesCursorFromURL(nextURL)
|
||||||
|
// If the markup was unrecognisable, refuse to claim a next
|
||||||
|
// page rather than re-fetching the first one in a loop.
|
||||||
|
if out.NextPage == "" {
|
||||||
|
out.HasNext = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}, reqOpts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// fetchListingPage is the shared per-page primitive used by
|
// fetchNumberedPage is the shared primitive for page-number-based
|
||||||
// GalleryPage / ScrapsPage / FavoritesPage and the iterator engine.
|
// listings (Gallery / Scraps). urlFn picks the section-specific URL
|
||||||
func (c *Client) fetchListingPage(
|
// builder; the rest of the pagination machinery is identical.
|
||||||
|
func (c *Client) fetchNumberedPage(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
name string,
|
name string,
|
||||||
page int,
|
page int,
|
||||||
@@ -64,9 +124,14 @@ func (c *Client) fetchListingPage(
|
|||||||
if page < 1 {
|
if page < 1 {
|
||||||
page = 1
|
page = 1
|
||||||
}
|
}
|
||||||
out := &ListingPage{Page: page}
|
out := &ListingPage{}
|
||||||
err := c.fetch(ctx, urlFn(name, page), func(doc *goquery.Document) error {
|
err := c.fetch(ctx, urlFn(name, page), func(doc *goquery.Document) error {
|
||||||
out.Items, out.HasNext = parseGalleryPage(doc, c.cfg.jsonListings)
|
items, _, hasNext := parseListingPage(doc, c.cfg.jsonListings)
|
||||||
|
out.Items = items
|
||||||
|
out.HasNext = hasNext
|
||||||
|
if hasNext {
|
||||||
|
out.NextPage = strconv.Itoa(page + 1)
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}, reqOpts...)
|
}, reqOpts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -75,10 +140,10 @@ func (c *Client) fetchListingPage(
|
|||||||
return out, nil
|
return out, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// listGallerySection is the shared engine for Gallery / Scraps / Favorites.
|
// listPagedSection is the shared engine for the page-number-based
|
||||||
// urlFn picks the section-specific URL builder; the rest of the pagination
|
// listing iterators (Gallery / Scraps). Favorites has its own loop in
|
||||||
// machinery is identical across all three sections.
|
// [Client.Favorites] because its pagination is cursor-based.
|
||||||
func (c *Client) listGallerySection(
|
func (c *Client) listPagedSection(
|
||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
name string,
|
name string,
|
||||||
urlFn func(string, int) string,
|
urlFn func(string, int) string,
|
||||||
@@ -92,7 +157,7 @@ func (c *Client) listGallerySection(
|
|||||||
if opts.reachedLimit(pagesFetched) {
|
if opts.reachedLimit(pagesFetched) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
lp, err := c.fetchListingPage(ctx, name, page, urlFn, reqOpts)
|
lp, err := c.fetchNumberedPage(ctx, name, page, urlFn, reqOpts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
yield(nil, err)
|
yield(nil, err)
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -6,14 +6,15 @@ import (
|
|||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
// fakeGalleryPage builds a minimal gallery-page response with two figures.
|
// fakeGalleryPage builds a minimal gallery-page response with two figures.
|
||||||
// hasNext controls whether the "Next" anchor is included so detectNextPage
|
// nextHref is the next-page URL emitted in the Next form; empty means no
|
||||||
// flips.
|
// Next button (last page).
|
||||||
func fakeGalleryPage(startID int, hasNext bool) string {
|
func fakeGalleryPage(startID int, nextHref string) string {
|
||||||
var b strings.Builder
|
var b strings.Builder
|
||||||
b.WriteString(`<html><body>`)
|
b.WriteString(`<html><body>`)
|
||||||
for i := 0; i < 2; i++ {
|
for i := 0; i < 2; i++ {
|
||||||
@@ -29,8 +30,8 @@ func fakeGalleryPage(startID int, hasNext bool) string {
|
|||||||
</figcaption>
|
</figcaption>
|
||||||
</figure>`, id, id, id, id, id)
|
</figure>`, id, id, id, id, id)
|
||||||
}
|
}
|
||||||
if hasNext {
|
if nextHref != "" {
|
||||||
b.WriteString(`<a class="button standard" href="/gallery/u/2/">Next</a>`)
|
fmt.Fprintf(&b, `<form action=%q method="get"><button class="button standard" type="submit">Next</button></form>`, nextHref)
|
||||||
}
|
}
|
||||||
b.WriteString(`</body></html>`)
|
b.WriteString(`</body></html>`)
|
||||||
return b.String()
|
return b.String()
|
||||||
@@ -41,11 +42,11 @@ func TestGalleryPage_HasNextPropagates(t *testing.T) {
|
|||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
mux.HandleFunc("/gallery/u/", func(w http.ResponseWriter, _ *http.Request) {
|
mux.HandleFunc("/gallery/u/", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
requests.Add(1)
|
requests.Add(1)
|
||||||
_, _ = w.Write([]byte(fakeGalleryPage(1000, true)))
|
_, _ = w.Write([]byte(fakeGalleryPage(1000, "/gallery/u/2/")))
|
||||||
})
|
})
|
||||||
mux.HandleFunc("/gallery/u/2/", func(w http.ResponseWriter, _ *http.Request) {
|
mux.HandleFunc("/gallery/u/2/", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
requests.Add(1)
|
requests.Add(1)
|
||||||
_, _ = w.Write([]byte(fakeGalleryPage(2000, false)))
|
_, _ = w.Write([]byte(fakeGalleryPage(2000, "")))
|
||||||
})
|
})
|
||||||
srv := httptest.NewServer(mux)
|
srv := httptest.NewServer(mux)
|
||||||
defer srv.Close()
|
defer srv.Close()
|
||||||
@@ -55,19 +56,18 @@ func TestGalleryPage_HasNextPropagates(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("GalleryPage(1): %v", err)
|
t.Fatalf("GalleryPage(1): %v", err)
|
||||||
}
|
}
|
||||||
if first.Page != 1 {
|
|
||||||
t.Errorf("first.Page = %d; want 1", first.Page)
|
|
||||||
}
|
|
||||||
if !first.HasNext {
|
if !first.HasNext {
|
||||||
t.Error("first.HasNext = false; want true")
|
t.Error("first.HasNext = false; want true")
|
||||||
}
|
}
|
||||||
|
if first.NextPage != "2" {
|
||||||
|
t.Errorf("first.NextPage = %q; want \"2\"", first.NextPage)
|
||||||
|
}
|
||||||
if len(first.Items) != 2 {
|
if len(first.Items) != 2 {
|
||||||
t.Fatalf("first.Items len = %d; want 2", len(first.Items))
|
t.Fatalf("first.Items len = %d; want 2", len(first.Items))
|
||||||
}
|
}
|
||||||
if first.Items[0].ID != 1000 {
|
if first.Items[0].ID != 1000 {
|
||||||
t.Errorf("first.Items[0].ID = %d; want 1000", first.Items[0].ID)
|
t.Errorf("first.Items[0].ID = %d; want 1000", first.Items[0].ID)
|
||||||
}
|
}
|
||||||
// data-tags routed through to the page method too.
|
|
||||||
if len(first.Items[0].Tags) == 0 || len(first.Items[0].CategorizedTags.Species) == 0 {
|
if len(first.Items[0].Tags) == 0 || len(first.Items[0].CategorizedTags.Species) == 0 {
|
||||||
t.Errorf("first.Items[0]: tags not populated from data-tags: %+v", first.Items[0])
|
t.Errorf("first.Items[0]: tags not populated from data-tags: %+v", first.Items[0])
|
||||||
}
|
}
|
||||||
@@ -79,8 +79,8 @@ func TestGalleryPage_HasNextPropagates(t *testing.T) {
|
|||||||
if last.HasNext {
|
if last.HasNext {
|
||||||
t.Error("last.HasNext = true; want false (last page)")
|
t.Error("last.HasNext = true; want false (last page)")
|
||||||
}
|
}
|
||||||
if last.Page != 2 {
|
if last.NextPage != "" {
|
||||||
t.Errorf("last.Page = %d; want 2", last.Page)
|
t.Errorf("last.NextPage = %q; want empty", last.NextPage)
|
||||||
}
|
}
|
||||||
|
|
||||||
if requests.Load() != 2 {
|
if requests.Load() != 2 {
|
||||||
@@ -88,30 +88,12 @@ func TestGalleryPage_HasNextPropagates(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGalleryPage_ZeroPageDefaultsToOne(t *testing.T) {
|
|
||||||
mux := http.NewServeMux()
|
|
||||||
mux.HandleFunc("/gallery/u/", func(w http.ResponseWriter, _ *http.Request) {
|
|
||||||
_, _ = w.Write([]byte(fakeGalleryPage(1, false)))
|
|
||||||
})
|
|
||||||
srv := httptest.NewServer(mux)
|
|
||||||
defer srv.Close()
|
|
||||||
client := newE2EClient(t, srv)
|
|
||||||
|
|
||||||
page, err := client.GalleryPage(context.Background(), "u", 0)
|
|
||||||
if err != nil {
|
|
||||||
t.Fatalf("GalleryPage(0): %v", err)
|
|
||||||
}
|
|
||||||
if page.Page != 1 {
|
|
||||||
t.Errorf("page.Page = %d; want 1 (zero should normalise)", page.Page)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestScrapsPage_HitsScrapsRoute(t *testing.T) {
|
func TestScrapsPage_HitsScrapsRoute(t *testing.T) {
|
||||||
var gotPath string
|
var gotPath string
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
mux.HandleFunc("/scraps/u/", func(w http.ResponseWriter, r *http.Request) {
|
mux.HandleFunc("/scraps/u/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
gotPath = r.URL.Path
|
gotPath = r.URL.Path
|
||||||
_, _ = w.Write([]byte(fakeGalleryPage(1, false)))
|
_, _ = w.Write([]byte(fakeGalleryPage(1, "")))
|
||||||
})
|
})
|
||||||
srv := httptest.NewServer(mux)
|
srv := httptest.NewServer(mux)
|
||||||
defer srv.Close()
|
defer srv.Close()
|
||||||
@@ -125,25 +107,92 @@ func TestScrapsPage_HitsScrapsRoute(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFavoritesPage_HitsFavoritesRoute(t *testing.T) {
|
func TestFavoritesPage_CursorChain(t *testing.T) {
|
||||||
var gotPath string
|
var requests []string
|
||||||
|
var mu sync.Mutex
|
||||||
|
record := func(p string) {
|
||||||
|
mu.Lock()
|
||||||
|
requests = append(requests, p)
|
||||||
|
mu.Unlock()
|
||||||
|
}
|
||||||
mux := http.NewServeMux()
|
mux := http.NewServeMux()
|
||||||
mux.HandleFunc("/favorites/u/", func(w http.ResponseWriter, r *http.Request) {
|
mux.HandleFunc("/favorites/u/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
gotPath = r.URL.Path
|
record(r.URL.Path)
|
||||||
_, _ = w.Write([]byte(fakeGalleryPage(1, true)))
|
_, _ = w.Write([]byte(fakeGalleryPage(1000, "/favorites/u/9999/next")))
|
||||||
|
})
|
||||||
|
mux.HandleFunc("/favorites/u/9999/next", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
record(r.URL.Path)
|
||||||
|
_, _ = w.Write([]byte(fakeGalleryPage(2000, "")))
|
||||||
})
|
})
|
||||||
srv := httptest.NewServer(mux)
|
srv := httptest.NewServer(mux)
|
||||||
defer srv.Close()
|
defer srv.Close()
|
||||||
client := newE2EClient(t, srv)
|
client := newE2EClient(t, srv)
|
||||||
|
|
||||||
p, err := client.FavoritesPage(context.Background(), "u", 1)
|
first, err := client.FavoritesPage(context.Background(), "u", "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("FavoritesPage: %v", err)
|
t.Fatalf("FavoritesPage(first): %v", err)
|
||||||
}
|
}
|
||||||
if gotPath != "/favorites/u/" {
|
if !first.HasNext {
|
||||||
t.Errorf("gotPath = %q; want /favorites/u/", gotPath)
|
t.Fatal("first.HasNext = false; want true")
|
||||||
}
|
}
|
||||||
if !p.HasNext {
|
if first.NextPage != "9999" {
|
||||||
t.Error("p.HasNext = false; want true")
|
t.Errorf("first.NextPage = %q; want \"9999\" (cursor)", first.NextPage)
|
||||||
|
}
|
||||||
|
|
||||||
|
last, err := client.FavoritesPage(context.Background(), "u", first.NextPage)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("FavoritesPage(cursor): %v", err)
|
||||||
|
}
|
||||||
|
if last.HasNext {
|
||||||
|
t.Error("last.HasNext = true; want false")
|
||||||
|
}
|
||||||
|
if last.NextPage != "" {
|
||||||
|
t.Errorf("last.NextPage = %q; want empty", last.NextPage)
|
||||||
|
}
|
||||||
|
|
||||||
|
want := []string{"/favorites/u/", "/favorites/u/9999/next"}
|
||||||
|
mu.Lock()
|
||||||
|
defer mu.Unlock()
|
||||||
|
if len(requests) != len(want) {
|
||||||
|
t.Fatalf("requests = %v; want %v", requests, want)
|
||||||
|
}
|
||||||
|
for i, w := range want {
|
||||||
|
if requests[i] != w {
|
||||||
|
t.Errorf("requests[%d] = %q; want %q", i, requests[i], w)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestFavorites_IteratorTerminates guards against the cursor-loop
|
||||||
|
// regression that brought us here: with sequential page numbers, the
|
||||||
|
// Favorites iterator never terminated because FA fell back to page 1
|
||||||
|
// for every fake-numbered cursor.
|
||||||
|
func TestFavorites_IteratorTerminates(t *testing.T) {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
mux.HandleFunc("/favorites/u/", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
_, _ = w.Write([]byte(fakeGalleryPage(1, "/favorites/u/42/next")))
|
||||||
|
})
|
||||||
|
mux.HandleFunc("/favorites/u/42/next", func(w http.ResponseWriter, _ *http.Request) {
|
||||||
|
_, _ = w.Write([]byte(fakeGalleryPage(3, "")))
|
||||||
|
})
|
||||||
|
srv := httptest.NewServer(mux)
|
||||||
|
defer srv.Close()
|
||||||
|
client := newE2EClient(t, srv)
|
||||||
|
|
||||||
|
count := 0
|
||||||
|
for sub, err := range client.Favorites(context.Background(), "u", ListOptions{}) {
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Favorites: %v", err)
|
||||||
|
}
|
||||||
|
if sub == nil {
|
||||||
|
t.Fatal("nil sub")
|
||||||
|
}
|
||||||
|
count++
|
||||||
|
if count > 10 {
|
||||||
|
t.Fatalf("iterator did not terminate; count > 10")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if count != 4 {
|
||||||
|
t.Errorf("count = %d; want 4 (2 per page * 2 pages)", count)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,6 +19,15 @@ import (
|
|||||||
// pure HTML the same behaviour as before [WithExperimentalJSONListings]
|
// pure HTML the same behaviour as before [WithExperimentalJSONListings]
|
||||||
// existed.
|
// existed.
|
||||||
func parseGalleryPage(doc *goquery.Document, useJSON bool) (items []*Submission, hasNext bool) {
|
func parseGalleryPage(doc *goquery.Document, useJSON bool) (items []*Submission, hasNext bool) {
|
||||||
|
items, _, hasNext = parseListingPage(doc, useJSON)
|
||||||
|
return items, hasNext
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseListingPage parses one page of a listing endpoint and also returns
|
||||||
|
// the raw next-page URL FA emits in its "Next" pagination form. Callers
|
||||||
|
// that need to chain across cursor-based pages (Favorites) consume the
|
||||||
|
// URL; callers that don't (Gallery / Scraps) can ignore it.
|
||||||
|
func parseListingPage(doc *goquery.Document, useJSON bool) (items []*Submission, nextURL string, hasNext bool) {
|
||||||
var jsonData listingJSONMap
|
var jsonData listingJSONMap
|
||||||
if useJSON {
|
if useJSON {
|
||||||
jsonData = readListingJSON(doc)
|
jsonData = readListingJSON(doc)
|
||||||
@@ -28,8 +37,8 @@ func parseGalleryPage(doc *goquery.Document, useJSON bool) (items []*Submission,
|
|||||||
items = append(items, s)
|
items = append(items, s)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
hasNext = detectNextPage(doc)
|
nextURL, hasNext = nextPageURL(doc)
|
||||||
return items, hasNext
|
return items, nextURL, hasNext
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseGalleryFigure lifts a single submission preview from a
|
// parseGalleryFigure lifts a single submission preview from a
|
||||||
|
|||||||
@@ -36,10 +36,25 @@ func Scraps(name string, page int) string {
|
|||||||
return Host + "/scraps/" + safeName(name) + "/" + pageSegment(page)
|
return Host + "/scraps/" + safeName(name) + "/" + pageSegment(page)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Favorites returns the URL for a user's favorites page. FA uses a numeric
|
// Favorites returns the URL for the first page of a user's favorites.
|
||||||
// page parameter; the first page is 1.
|
// FA paginates favorites with a fave-ID cursor (see [FavoritesCursor]),
|
||||||
func Favorites(name string, page int) string {
|
// not sequential page numbers — passing /favorites/{user}/{N}/ with a
|
||||||
return Host + "/favorites/" + safeName(name) + "/" + pageSegment(page)
|
// small integer N silently falls back to the first page. Use this for
|
||||||
|
// the first page only; follow the cursor returned in [ListingPage].NextPage
|
||||||
|
// for subsequent pages.
|
||||||
|
func Favorites(name string) string {
|
||||||
|
return Host + "/favorites/" + safeName(name) + "/"
|
||||||
|
}
|
||||||
|
|
||||||
|
// FavoritesCursor returns the URL for a follow-up favorites page,
|
||||||
|
// addressed by the fave-ID cursor FA emits on the previous page's "Next"
|
||||||
|
// form (e.g. /favorites/{user}/1951234825/next). The cursor is opaque
|
||||||
|
// to the SDK — pass through whatever [ListingPage].NextPage gave you.
|
||||||
|
func FavoritesCursor(name, cursor string) string {
|
||||||
|
if cursor == "" {
|
||||||
|
return Favorites(name)
|
||||||
|
}
|
||||||
|
return Host + "/favorites/" + safeName(name) + "/" + cursor + "/next"
|
||||||
}
|
}
|
||||||
|
|
||||||
// Journal returns the URL for a single journal entry.
|
// Journal returns the URL for a single journal entry.
|
||||||
|
|||||||
@@ -8,20 +8,26 @@ import (
|
|||||||
|
|
||||||
// ListingPage is one page of a listing endpoint (Gallery / Scraps /
|
// ListingPage is one page of a listing endpoint (Gallery / Scraps /
|
||||||
// Favorites). It carries everything an external caller needs to drive
|
// Favorites). It carries everything an external caller needs to drive
|
||||||
// pagination by hand: the items, the 1-based page number that produced
|
// pagination by hand: the items, whether FA exposed a "next page" link,
|
||||||
// them, and whether FA exposed a "next page" link.
|
// and an opaque NextPage token to pass back into the next per-page call.
|
||||||
//
|
//
|
||||||
// External scrapers that want to manage their own loop (resume from a
|
// External scrapers that want to manage their own loop (resume from a
|
||||||
// checkpoint, run pages in parallel, throttle differently) should call
|
// checkpoint, run pages in parallel, throttle differently) should call
|
||||||
// the per-page methods ([Client.GalleryPage], [Client.ScrapsPage],
|
// the per-page methods ([Client.GalleryPage], [Client.ScrapsPage],
|
||||||
// [Client.FavoritesPage]) and stop when HasNext is false. Callers that
|
// [Client.FavoritesPage]) and stop when HasNext is false. Callers that
|
||||||
// just want every item in order should keep using the iter.Seq2-shaped
|
// just want every item in order should keep using the iter.Seq2-shaped
|
||||||
// methods ([Client.Gallery] et al.), which use the same primitive
|
// methods ([Client.Gallery] et al.), which walk pages internally.
|
||||||
// internally.
|
//
|
||||||
|
// NextPage's contents differ by endpoint — for Gallery / Scraps it is
|
||||||
|
// the next 1-based page number as a decimal string ("2", "3", …); for
|
||||||
|
// Favorites it is the fave-ID cursor FA emits on the "Next" form
|
||||||
|
// (because favorites pagination is cursor-based, not page-number-based).
|
||||||
|
// Treat the value as opaque: pass whatever you got back to the next
|
||||||
|
// call without parsing.
|
||||||
type ListingPage struct {
|
type ListingPage struct {
|
||||||
Items []*Submission
|
Items []*Submission
|
||||||
HasNext bool
|
HasNext bool
|
||||||
Page int // 1-based page number this result corresponds to
|
NextPage string // "" when !HasNext; otherwise the opaque token to pass back
|
||||||
}
|
}
|
||||||
|
|
||||||
// ListOptions configures the pagination of a simple iterator method like
|
// ListOptions configures the pagination of a simple iterator method like
|
||||||
@@ -60,17 +66,57 @@ func (o ListOptions) reachedLimit(pagesFetched int) bool {
|
|||||||
// FA's beta theme renders pagination as either a Next form button or a
|
// FA's beta theme renders pagination as either a Next form button or a
|
||||||
// hyperlink with a recognisable label.
|
// hyperlink with a recognisable label.
|
||||||
func detectNextPage(doc *goquery.Document) bool {
|
func detectNextPage(doc *goquery.Document) bool {
|
||||||
if doc.Find("form button.button.standard:contains('Next')").Length() > 0 {
|
url, _ := nextPageURL(doc)
|
||||||
return true
|
return url != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// nextPageURL returns the action/href that the "Next" pagination control
|
||||||
|
// would navigate to, along with a flag indicating whether one was found.
|
||||||
|
// Returns ("", false) on the last page (FA emits no Next form/anchor, or
|
||||||
|
// emits it inside an HTML comment that doesn't parse as an element).
|
||||||
|
func nextPageURL(doc *goquery.Document) (string, bool) {
|
||||||
|
var action string
|
||||||
|
doc.Find("form").EachWithBreak(func(_ int, f *goquery.Selection) bool {
|
||||||
|
if f.Find("button.button.standard:contains('Next')").Length() == 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
action, _ = f.Attr("action")
|
||||||
|
return false
|
||||||
|
})
|
||||||
|
if action != "" {
|
||||||
|
return action, true
|
||||||
}
|
}
|
||||||
hit := false
|
var href string
|
||||||
doc.Find("a.button.standard, a.button-link, a.pagination-next").EachWithBreak(func(_ int, sel *goquery.Selection) bool {
|
doc.Find("a.button.standard, a.button-link, a.pagination-next").EachWithBreak(func(_ int, sel *goquery.Selection) bool {
|
||||||
text := strings.ToLower(trimText(sel))
|
text := strings.ToLower(trimText(sel))
|
||||||
if strings.Contains(text, "next") || strings.Contains(text, "older") {
|
if strings.Contains(text, "next") || strings.Contains(text, "older") {
|
||||||
hit = true
|
href, _ = sel.Attr("href")
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
})
|
})
|
||||||
return hit
|
if href == "" {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
return href, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// favoritesCursorFromURL extracts the fave-ID cursor segment from a
|
||||||
|
// /favorites/{user}/{cursor}/next URL. Returns "" if the URL does not
|
||||||
|
// match that shape (in which case the caller treats the listing as
|
||||||
|
// exhausted rather than chasing a malformed cursor).
|
||||||
|
func favoritesCursorFromURL(rawURL string) string {
|
||||||
|
// Strip query / fragment, then split. Favorites paths can be relative
|
||||||
|
// ("/favorites/u/123/next") or absolute — handle both.
|
||||||
|
rawURL = strings.TrimPrefix(rawURL, "https://www.furaffinity.net")
|
||||||
|
rawURL = strings.TrimPrefix(rawURL, "http://www.furaffinity.net")
|
||||||
|
if i := strings.IndexAny(rawURL, "?#"); i >= 0 {
|
||||||
|
rawURL = rawURL[:i]
|
||||||
|
}
|
||||||
|
parts := strings.Split(strings.Trim(rawURL, "/"), "/")
|
||||||
|
// Expect ["favorites", "{user}", "{cursor}", "next"].
|
||||||
|
if len(parts) != 4 || parts[0] != "favorites" || parts[3] != "next" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return parts[2]
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user