feat(listing): add per-page methods with HasNext flag
GalleryPage / ScrapsPage / FavoritesPage return a ListingPage struct carrying the page items, the 1-based page number, and a HasNext flag that mirrors FA's "next page" link. This lets external scrapers drive their own pagination loop (checkpoint resume, parallel workers, custom throttling) without re-implementing the page-walking code. The existing iter.Seq2-shaped methods now share the same per-page primitive internally so behaviour stays in lock-step.
This commit is contained in:
62
gallery.go
62
gallery.go
@@ -12,7 +12,8 @@ import (
|
||||
// Gallery iterates the submissions in a user's main gallery, newest first.
|
||||
//
|
||||
// Each yielded *Submission carries only the fields visible on the listing
|
||||
// page: ID, Title, Author (for favorites), ThumbURL, and Rating. Call
|
||||
// page: ID, Title, Author (for favorites), ThumbURL, Rating, and the Tags
|
||||
// / CategorizedTags parsed from the figure's data-tags attribute. Call
|
||||
// [Client.GetSubmission] with the ID to load the full record.
|
||||
func (c *Client) Gallery(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] {
|
||||
return c.listGallerySection(ctx, name, urls.Gallery, opts, reqOpts)
|
||||
@@ -30,6 +31,50 @@ func (c *Client) Favorites(ctx context.Context, name string, opts ListOptions, r
|
||||
return c.listGallerySection(ctx, name, urls.Favorites, opts, reqOpts)
|
||||
}
|
||||
|
||||
// GalleryPage fetches a single page of /gallery/{name}/ and returns the
|
||||
// items along with whether more pages exist. Pages are 1-based; pass 0 or
|
||||
// 1 for the first page. Use this when driving pagination manually
|
||||
// (resuming from a checkpoint, distributing pages across workers); use
|
||||
// [Client.Gallery] when you just want every item in order.
|
||||
func (c *Client) GalleryPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) {
|
||||
return c.fetchListingPage(ctx, name, page, urls.Gallery, reqOpts)
|
||||
}
|
||||
|
||||
// ScrapsPage is the single-page counterpart to [Client.Scraps]. See
|
||||
// [Client.GalleryPage] for usage notes.
|
||||
func (c *Client) ScrapsPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) {
|
||||
return c.fetchListingPage(ctx, name, page, urls.Scraps, reqOpts)
|
||||
}
|
||||
|
||||
// FavoritesPage is the single-page counterpart to [Client.Favorites]. See
|
||||
// [Client.GalleryPage] for usage notes.
|
||||
func (c *Client) FavoritesPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) {
|
||||
return c.fetchListingPage(ctx, name, page, urls.Favorites, reqOpts)
|
||||
}
|
||||
|
||||
// fetchListingPage is the shared per-page primitive used by
|
||||
// GalleryPage / ScrapsPage / FavoritesPage and the iterator engine.
|
||||
func (c *Client) fetchListingPage(
|
||||
ctx context.Context,
|
||||
name string,
|
||||
page int,
|
||||
urlFn func(string, int) string,
|
||||
reqOpts []Option,
|
||||
) (*ListingPage, error) {
|
||||
if page < 1 {
|
||||
page = 1
|
||||
}
|
||||
out := &ListingPage{Page: page}
|
||||
err := c.fetch(ctx, urlFn(name, page), func(doc *goquery.Document) error {
|
||||
out.Items, out.HasNext = parseGalleryPage(doc, c.cfg.jsonListings)
|
||||
return nil
|
||||
}, reqOpts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// listGallerySection is the shared engine for Gallery / Scraps / Favorites.
|
||||
// urlFn picks the section-specific URL builder; the rest of the pagination
|
||||
// machinery is identical across all three sections.
|
||||
@@ -47,28 +92,21 @@ func (c *Client) listGallerySection(
|
||||
if opts.reachedLimit(pagesFetched) {
|
||||
return
|
||||
}
|
||||
var (
|
||||
items []*Submission
|
||||
hasNext bool
|
||||
)
|
||||
err := c.fetch(ctx, urlFn(name, page), func(doc *goquery.Document) error {
|
||||
items, hasNext = parseGalleryPage(doc, c.cfg.jsonListings)
|
||||
return nil
|
||||
}, reqOpts...)
|
||||
lp, err := c.fetchListingPage(ctx, name, page, urlFn, reqOpts)
|
||||
if err != nil {
|
||||
yield(nil, err)
|
||||
return
|
||||
}
|
||||
pagesFetched++
|
||||
if len(items) == 0 {
|
||||
if len(lp.Items) == 0 {
|
||||
return
|
||||
}
|
||||
for _, s := range items {
|
||||
for _, s := range lp.Items {
|
||||
if !yield(s, nil) {
|
||||
return
|
||||
}
|
||||
}
|
||||
if !hasNext {
|
||||
if !lp.HasNext {
|
||||
return
|
||||
}
|
||||
page++
|
||||
|
||||
149
gallery_page_test.go
Normal file
149
gallery_page_test.go
Normal file
@@ -0,0 +1,149 @@
|
||||
package fa
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// fakeGalleryPage builds a minimal gallery-page response with two figures.
|
||||
// hasNext controls whether the "Next" anchor is included so detectNextPage
|
||||
// flips.
|
||||
func fakeGalleryPage(startID int, hasNext bool) string {
|
||||
var b strings.Builder
|
||||
b.WriteString(`<html><body>`)
|
||||
for i := 0; i < 2; i++ {
|
||||
id := startID + i
|
||||
fmt.Fprintf(&b, `
|
||||
<figure id="sid-%d" class="t-image r-general">
|
||||
<a href="/view/%d/" title="Sub %d">
|
||||
<img data-tags="u_someartist c_artwork_digital t_all s_wolf wolf" src="//d.example/t/%d.png"/>
|
||||
</a>
|
||||
<figcaption>
|
||||
<p>Sub %d</p>
|
||||
<a href="/user/someartist/">someartist</a>
|
||||
</figcaption>
|
||||
</figure>`, id, id, id, id, id)
|
||||
}
|
||||
if hasNext {
|
||||
b.WriteString(`<a class="button standard" href="/gallery/u/2/">Next</a>`)
|
||||
}
|
||||
b.WriteString(`</body></html>`)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func TestGalleryPage_HasNextPropagates(t *testing.T) {
|
||||
var requests atomic.Int32
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/gallery/u/", func(w http.ResponseWriter, _ *http.Request) {
|
||||
requests.Add(1)
|
||||
_, _ = w.Write([]byte(fakeGalleryPage(1000, true)))
|
||||
})
|
||||
mux.HandleFunc("/gallery/u/2/", func(w http.ResponseWriter, _ *http.Request) {
|
||||
requests.Add(1)
|
||||
_, _ = w.Write([]byte(fakeGalleryPage(2000, false)))
|
||||
})
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
client := newE2EClient(t, srv)
|
||||
|
||||
first, err := client.GalleryPage(context.Background(), "u", 1)
|
||||
if err != nil {
|
||||
t.Fatalf("GalleryPage(1): %v", err)
|
||||
}
|
||||
if first.Page != 1 {
|
||||
t.Errorf("first.Page = %d; want 1", first.Page)
|
||||
}
|
||||
if !first.HasNext {
|
||||
t.Error("first.HasNext = false; want true")
|
||||
}
|
||||
if len(first.Items) != 2 {
|
||||
t.Fatalf("first.Items len = %d; want 2", len(first.Items))
|
||||
}
|
||||
if first.Items[0].ID != 1000 {
|
||||
t.Errorf("first.Items[0].ID = %d; want 1000", first.Items[0].ID)
|
||||
}
|
||||
// data-tags routed through to the page method too.
|
||||
if len(first.Items[0].Tags) == 0 || len(first.Items[0].CategorizedTags.Species) == 0 {
|
||||
t.Errorf("first.Items[0]: tags not populated from data-tags: %+v", first.Items[0])
|
||||
}
|
||||
|
||||
last, err := client.GalleryPage(context.Background(), "u", 2)
|
||||
if err != nil {
|
||||
t.Fatalf("GalleryPage(2): %v", err)
|
||||
}
|
||||
if last.HasNext {
|
||||
t.Error("last.HasNext = true; want false (last page)")
|
||||
}
|
||||
if last.Page != 2 {
|
||||
t.Errorf("last.Page = %d; want 2", last.Page)
|
||||
}
|
||||
|
||||
if requests.Load() != 2 {
|
||||
t.Errorf("requests = %d; want 2", requests.Load())
|
||||
}
|
||||
}
|
||||
|
||||
func TestGalleryPage_ZeroPageDefaultsToOne(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/gallery/u/", func(w http.ResponseWriter, _ *http.Request) {
|
||||
_, _ = w.Write([]byte(fakeGalleryPage(1, false)))
|
||||
})
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
client := newE2EClient(t, srv)
|
||||
|
||||
page, err := client.GalleryPage(context.Background(), "u", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("GalleryPage(0): %v", err)
|
||||
}
|
||||
if page.Page != 1 {
|
||||
t.Errorf("page.Page = %d; want 1 (zero should normalise)", page.Page)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScrapsPage_HitsScrapsRoute(t *testing.T) {
|
||||
var gotPath string
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/scraps/u/", func(w http.ResponseWriter, r *http.Request) {
|
||||
gotPath = r.URL.Path
|
||||
_, _ = w.Write([]byte(fakeGalleryPage(1, false)))
|
||||
})
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
client := newE2EClient(t, srv)
|
||||
|
||||
if _, err := client.ScrapsPage(context.Background(), "u", 1); err != nil {
|
||||
t.Fatalf("ScrapsPage: %v", err)
|
||||
}
|
||||
if gotPath != "/scraps/u/" {
|
||||
t.Errorf("gotPath = %q; want /scraps/u/", gotPath)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFavoritesPage_HitsFavoritesRoute(t *testing.T) {
|
||||
var gotPath string
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/favorites/u/", func(w http.ResponseWriter, r *http.Request) {
|
||||
gotPath = r.URL.Path
|
||||
_, _ = w.Write([]byte(fakeGalleryPage(1, true)))
|
||||
})
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
client := newE2EClient(t, srv)
|
||||
|
||||
p, err := client.FavoritesPage(context.Background(), "u", 1)
|
||||
if err != nil {
|
||||
t.Fatalf("FavoritesPage: %v", err)
|
||||
}
|
||||
if gotPath != "/favorites/u/" {
|
||||
t.Errorf("gotPath = %q; want /favorites/u/", gotPath)
|
||||
}
|
||||
if !p.HasNext {
|
||||
t.Error("p.HasNext = false; want true")
|
||||
}
|
||||
}
|
||||
@@ -6,6 +6,24 @@ import (
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// ListingPage is one page of a listing endpoint (Gallery / Scraps /
|
||||
// Favorites). It carries everything an external caller needs to drive
|
||||
// pagination by hand: the items, the 1-based page number that produced
|
||||
// them, and whether FA exposed a "next page" link.
|
||||
//
|
||||
// External scrapers that want to manage their own loop (resume from a
|
||||
// checkpoint, run pages in parallel, throttle differently) should call
|
||||
// the per-page methods ([Client.GalleryPage], [Client.ScrapsPage],
|
||||
// [Client.FavoritesPage]) and stop when HasNext is false. Callers that
|
||||
// just want every item in order should keep using the iter.Seq2-shaped
|
||||
// methods ([Client.Gallery] et al.), which use the same primitive
|
||||
// internally.
|
||||
type ListingPage struct {
|
||||
Items []*Submission
|
||||
HasNext bool
|
||||
Page int // 1-based page number this result corresponds to
|
||||
}
|
||||
|
||||
// ListOptions configures the pagination of a simple iterator method like
|
||||
// [Client.Gallery] or [Client.Notes]. Filtered iterators ([Client.Search],
|
||||
// [Client.Browse]) use their own option structs that fold the same fields
|
||||
|
||||
Reference in New Issue
Block a user