4 Commits
v0.0.3 ... main

Author SHA1 Message Date
95193fb66d fix(notes): take last numeric segment of href, not the first
The notes listing renders each thread link as
/msg/pms/{folder}/{noteID}/#message. extractIntFromHref returned the
first numeric segment it found, which was always the folder index (1
for the inbox), so every NotePreview.ID came out as 1 and any
follow-up GetNote(np.ID) call failed with "this message has either
been deleted or is not yours".

Surfaced by an end-to-end smoke run against the live site. Limited
to the notes parser; the other extractIntFromHref callers
(/view/{id}/, /journal/{id}/) only ever have a single numeric segment
so they are unaffected.
2026-06-02 22:52:50 +02:00
83487e531a fix(favorites): use cursor-based pagination instead of page numbers
FA's /favorites/{user}/ pagination is cursor-addressed by the fave-ID
of the last item on the previous page (e.g.
/favorites/{user}/1951234825/next), not by sequential integers. The
previous URL builder generated /favorites/{user}/{N}/ for N>=2; FA
interpreted that as a malformed cursor and silently returned page 1,
which caused the Favorites iterator to loop forever and the new
FavoritesPage to report HasNext=true on every call.

Changes:
- urls.Favorites(name) returns the first-page URL; new
  urls.FavoritesCursor(name, cursor) builds /favorites/.../next URLs.
- FavoritesPage now takes a cursor string; empty = first page.
  Returns ListingPage.NextPage as the opaque fave-ID for the next call.
- ListingPage gains NextPage string (decimal page number for
  Gallery/Scraps, fave-ID cursor for Favorites) and drops the Page int
  field that conflated those two notions.
- Client.Favorites iterator now walks cursors internally; StartPage
  is ignored for favorites (documented).
- detectNextPage / nextPageURL now parse the form action so the same
  helper works for both page-number and cursor pagination.
- Added regression test that fails on the infinite-loop bug.
- Example: examples/favorites_page demonstrates cursor walking.
2026-06-02 22:44:14 +02:00
8f4767966a feat(listing): add per-page methods with HasNext flag
GalleryPage / ScrapsPage / FavoritesPage return a ListingPage struct
carrying the page items, the 1-based page number, and a HasNext flag
that mirrors FA's "next page" link. This lets external scrapers drive
their own pagination loop (checkpoint resume, parallel workers,
custom throttling) without re-implementing the page-walking code.

The existing iter.Seq2-shaped methods now share the same per-page
primitive internally so behaviour stays in lock-step.
2026-06-02 22:28:49 +02:00
a2fc1b7e32 feat(listing): populate Tags and CategorizedTags from figure data-tags
FA's beta listing pages emit each submission's tag list on the
figure's <img data-tags="..."> attribute, mixing prefixed system tags
(s_/c_/a_/u_/t_) with the unprefixed keyword list. Reading it during
gallery-page parse lets callers classify favorites/gallery/scraps/
browse/search/inbox items at scrape time, avoiding a /view/{id}
round-trip per submission.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-02 21:53:56 +02:00
10 changed files with 653 additions and 36 deletions

View File

@@ -0,0 +1,79 @@
// favorites_page exercises the per-page favorites listing API
// ([Client.FavoritesPage]) against the live FA site so a caller can see
// exactly what fields come back: HasNext, NextPage, len(Items), and a
// sample of the tag data lifted from each figure's data-tags attribute.
//
// Favorites pagination is cursor-based: each page returns an opaque
// NextPage token that addresses the next page. Pass it back in on the
// next call; treat empty as end-of-pagination.
//
// Required environment variables:
//
// FA_A — the `a` session cookie
// FA_B — the `b` session cookie
// CF_CLEARANCE — (optional) cf_clearance cookie if Cloudflare challenges
// FA_UA — (optional) User-Agent matching CF_CLEARANCE
//
// Usage:
//
// go run ./examples/favorites_page <username> [maxPages]
package main
import (
"context"
"fmt"
"log"
"os"
"strconv"
fa "git.anthrove.art/public/go-fa-api"
)
func main() {
if len(os.Args) < 2 {
log.Fatalf("usage: %s <username> [maxPages]", os.Args[0])
}
user := os.Args[1]
maxPages := 0
if len(os.Args) >= 3 {
if n, err := strconv.Atoi(os.Args[2]); err == nil && n > 0 {
maxPages = n
}
}
opts := []fa.Option{
fa.WithCookies(fa.Cookies{A: os.Getenv("FA_A"), B: os.Getenv("FA_B")}),
}
if cf := os.Getenv("CF_CLEARANCE"); cf != "" {
opts = append(opts, fa.WithCloudflare(fa.CFCookies{Clearance: cf}))
}
if ua := os.Getenv("FA_UA"); ua != "" {
opts = append(opts, fa.WithUserAgent(ua))
}
client := fa.New(opts...)
cursor := ""
pageNum := 0
for {
pageNum++
lp, err := client.FavoritesPage(context.Background(), user, cursor)
if err != nil {
log.Fatalf("FavoritesPage(cursor=%q): %v", cursor, err)
}
fmt.Printf("=== page %d cursor=%q items=%d HasNext=%v NextPage=%q ===\n",
pageNum, cursor, len(lp.Items), lp.HasNext, lp.NextPage)
for i, sub := range lp.Items {
fmt.Printf(" [%d] id=%d rating=%s author=%s title=%q\n",
i, sub.ID, sub.Rating, sub.Author.Name, sub.Title)
}
if !lp.HasNext {
fmt.Printf("\nreached end of pagination after %d page(s)\n", pageNum)
return
}
if maxPages > 0 && pageNum >= maxPages {
fmt.Printf("\nstopped at maxPages=%d (HasNext was still true; next cursor=%q)\n", maxPages, lp.NextPage)
return
}
cursor = lp.NextPage
}
}

View File

@@ -161,7 +161,7 @@ func TestRefreshFixtures(t *testing.T) {
},
{
name: "favorites_page1.html",
url: urls.Favorites(favoritesUser, 1),
url: urls.Favorites(favoritesUser),
requires: []string{favoritesUser},
notes: "favorites per-item Author should be the original artist",
},

View File

@@ -3,6 +3,7 @@ package fa
import (
"context"
"iter"
"strconv"
"github.com/PuerkitoBio/goquery"
@@ -12,28 +13,137 @@ import (
// Gallery iterates the submissions in a user's main gallery, newest first.
//
// Each yielded *Submission carries only the fields visible on the listing
// page: ID, Title, Author (for favorites), ThumbURL, and Rating. Call
// page: ID, Title, Author (for favorites), ThumbURL, Rating, and the Tags
// / CategorizedTags parsed from the figure's data-tags attribute. Call
// [Client.GetSubmission] with the ID to load the full record.
func (c *Client) Gallery(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] {
return c.listGallerySection(ctx, name, urls.Gallery, opts, reqOpts)
return c.listPagedSection(ctx, name, urls.Gallery, opts, reqOpts)
}
// Scraps iterates the user's scraps folder. Same yield shape as Gallery.
func (c *Client) Scraps(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] {
return c.listGallerySection(ctx, name, urls.Scraps, opts, reqOpts)
return c.listPagedSection(ctx, name, urls.Scraps, opts, reqOpts)
}
// Favorites iterates the user's favorited submissions. The yielded
// *Submission's Author field reflects the original artist (not the user
// whose favorites we are walking).
//
// Favorites use a fave-ID cursor for pagination, not sequential page
// numbers, so [ListOptions.StartPage] is ignored — the walk always
// begins at the newest favorite. [ListOptions.MaxPages] still bounds
// the crawl.
func (c *Client) Favorites(ctx context.Context, name string, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] {
return c.listGallerySection(ctx, name, urls.Favorites, opts, reqOpts)
return func(yield func(*Submission, error) bool) {
cursor := ""
pagesFetched := 0
for {
if opts.reachedLimit(pagesFetched) {
return
}
lp, err := c.FavoritesPage(ctx, name, cursor, reqOpts...)
if err != nil {
yield(nil, err)
return
}
pagesFetched++
if len(lp.Items) == 0 {
return
}
for _, s := range lp.Items {
if !yield(s, nil) {
return
}
}
if !lp.HasNext {
return
}
cursor = lp.NextPage
}
}
}
// listGallerySection is the shared engine for Gallery / Scraps / Favorites.
// urlFn picks the section-specific URL builder; the rest of the pagination
// machinery is identical across all three sections.
func (c *Client) listGallerySection(
// GalleryPage fetches a single page of /gallery/{name}/ and returns the
// items along with whether more pages exist. Pages are 1-based; pass 0 or
// 1 for the first page. Use this when driving pagination manually
// (resuming from a checkpoint, distributing pages across workers); use
// [Client.Gallery] when you just want every item in order.
//
// On a non-final page the returned [ListingPage].NextPage is the next
// page number as a decimal string ("2", "3", …) — pass it back to the
// next call after [strconv.Atoi], or treat it as opaque.
func (c *Client) GalleryPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) {
return c.fetchNumberedPage(ctx, name, page, urls.Gallery, reqOpts)
}
// ScrapsPage is the single-page counterpart to [Client.Scraps]. See
// [Client.GalleryPage] for usage notes.
func (c *Client) ScrapsPage(ctx context.Context, name string, page int, reqOpts ...Option) (*ListingPage, error) {
return c.fetchNumberedPage(ctx, name, page, urls.Scraps, reqOpts)
}
// FavoritesPage fetches a single page of /favorites/{name}/, addressed
// by the cursor FA emitted on the previous page (empty string for the
// first page). FA paginates favorites with a fave-ID cursor — not a
// sequential page number — so the caller must walk forward by passing
// the returned [ListingPage].NextPage value into the next call. Passing
// a guessed cursor (e.g. "2") makes FA silently return the first page
// and the loop will not terminate.
func (c *Client) FavoritesPage(ctx context.Context, name string, cursor string, reqOpts ...Option) (*ListingPage, error) {
out := &ListingPage{}
err := c.fetch(ctx, urls.FavoritesCursor(name, cursor), func(doc *goquery.Document) error {
items, nextURL, hasNext := parseListingPage(doc, c.cfg.jsonListings)
out.Items = items
out.HasNext = hasNext
if hasNext {
out.NextPage = favoritesCursorFromURL(nextURL)
// If the markup was unrecognisable, refuse to claim a next
// page rather than re-fetching the first one in a loop.
if out.NextPage == "" {
out.HasNext = false
}
}
return nil
}, reqOpts...)
if err != nil {
return nil, err
}
return out, nil
}
// fetchNumberedPage is the shared primitive for page-number-based
// listings (Gallery / Scraps). urlFn picks the section-specific URL
// builder; the rest of the pagination machinery is identical.
func (c *Client) fetchNumberedPage(
ctx context.Context,
name string,
page int,
urlFn func(string, int) string,
reqOpts []Option,
) (*ListingPage, error) {
if page < 1 {
page = 1
}
out := &ListingPage{}
err := c.fetch(ctx, urlFn(name, page), func(doc *goquery.Document) error {
items, _, hasNext := parseListingPage(doc, c.cfg.jsonListings)
out.Items = items
out.HasNext = hasNext
if hasNext {
out.NextPage = strconv.Itoa(page + 1)
}
return nil
}, reqOpts...)
if err != nil {
return nil, err
}
return out, nil
}
// listPagedSection is the shared engine for the page-number-based
// listing iterators (Gallery / Scraps). Favorites has its own loop in
// [Client.Favorites] because its pagination is cursor-based.
func (c *Client) listPagedSection(
ctx context.Context,
name string,
urlFn func(string, int) string,
@@ -47,28 +157,21 @@ func (c *Client) listGallerySection(
if opts.reachedLimit(pagesFetched) {
return
}
var (
items []*Submission
hasNext bool
)
err := c.fetch(ctx, urlFn(name, page), func(doc *goquery.Document) error {
items, hasNext = parseGalleryPage(doc, c.cfg.jsonListings)
return nil
}, reqOpts...)
lp, err := c.fetchNumberedPage(ctx, name, page, urlFn, reqOpts)
if err != nil {
yield(nil, err)
return
}
pagesFetched++
if len(items) == 0 {
if len(lp.Items) == 0 {
return
}
for _, s := range items {
for _, s := range lp.Items {
if !yield(s, nil) {
return
}
}
if !hasNext {
if !lp.HasNext {
return
}
page++

198
gallery_page_test.go Normal file
View File

@@ -0,0 +1,198 @@
package fa
import (
"context"
"fmt"
"net/http"
"net/http/httptest"
"strings"
"sync"
"sync/atomic"
"testing"
)
// fakeGalleryPage builds a minimal gallery-page response with two figures.
// nextHref is the next-page URL emitted in the Next form; empty means no
// Next button (last page).
func fakeGalleryPage(startID int, nextHref string) string {
var b strings.Builder
b.WriteString(`<html><body>`)
for i := 0; i < 2; i++ {
id := startID + i
fmt.Fprintf(&b, `
<figure id="sid-%d" class="t-image r-general">
<a href="/view/%d/" title="Sub %d">
<img data-tags="u_someartist c_artwork_digital t_all s_wolf wolf" src="//d.example/t/%d.png"/>
</a>
<figcaption>
<p>Sub %d</p>
<a href="/user/someartist/">someartist</a>
</figcaption>
</figure>`, id, id, id, id, id)
}
if nextHref != "" {
fmt.Fprintf(&b, `<form action=%q method="get"><button class="button standard" type="submit">Next</button></form>`, nextHref)
}
b.WriteString(`</body></html>`)
return b.String()
}
func TestGalleryPage_HasNextPropagates(t *testing.T) {
var requests atomic.Int32
mux := http.NewServeMux()
mux.HandleFunc("/gallery/u/", func(w http.ResponseWriter, _ *http.Request) {
requests.Add(1)
_, _ = w.Write([]byte(fakeGalleryPage(1000, "/gallery/u/2/")))
})
mux.HandleFunc("/gallery/u/2/", func(w http.ResponseWriter, _ *http.Request) {
requests.Add(1)
_, _ = w.Write([]byte(fakeGalleryPage(2000, "")))
})
srv := httptest.NewServer(mux)
defer srv.Close()
client := newE2EClient(t, srv)
first, err := client.GalleryPage(context.Background(), "u", 1)
if err != nil {
t.Fatalf("GalleryPage(1): %v", err)
}
if !first.HasNext {
t.Error("first.HasNext = false; want true")
}
if first.NextPage != "2" {
t.Errorf("first.NextPage = %q; want \"2\"", first.NextPage)
}
if len(first.Items) != 2 {
t.Fatalf("first.Items len = %d; want 2", len(first.Items))
}
if first.Items[0].ID != 1000 {
t.Errorf("first.Items[0].ID = %d; want 1000", first.Items[0].ID)
}
if len(first.Items[0].Tags) == 0 || len(first.Items[0].CategorizedTags.Species) == 0 {
t.Errorf("first.Items[0]: tags not populated from data-tags: %+v", first.Items[0])
}
last, err := client.GalleryPage(context.Background(), "u", 2)
if err != nil {
t.Fatalf("GalleryPage(2): %v", err)
}
if last.HasNext {
t.Error("last.HasNext = true; want false (last page)")
}
if last.NextPage != "" {
t.Errorf("last.NextPage = %q; want empty", last.NextPage)
}
if requests.Load() != 2 {
t.Errorf("requests = %d; want 2", requests.Load())
}
}
func TestScrapsPage_HitsScrapsRoute(t *testing.T) {
var gotPath string
mux := http.NewServeMux()
mux.HandleFunc("/scraps/u/", func(w http.ResponseWriter, r *http.Request) {
gotPath = r.URL.Path
_, _ = w.Write([]byte(fakeGalleryPage(1, "")))
})
srv := httptest.NewServer(mux)
defer srv.Close()
client := newE2EClient(t, srv)
if _, err := client.ScrapsPage(context.Background(), "u", 1); err != nil {
t.Fatalf("ScrapsPage: %v", err)
}
if gotPath != "/scraps/u/" {
t.Errorf("gotPath = %q; want /scraps/u/", gotPath)
}
}
func TestFavoritesPage_CursorChain(t *testing.T) {
var requests []string
var mu sync.Mutex
record := func(p string) {
mu.Lock()
requests = append(requests, p)
mu.Unlock()
}
mux := http.NewServeMux()
mux.HandleFunc("/favorites/u/", func(w http.ResponseWriter, r *http.Request) {
record(r.URL.Path)
_, _ = w.Write([]byte(fakeGalleryPage(1000, "/favorites/u/9999/next")))
})
mux.HandleFunc("/favorites/u/9999/next", func(w http.ResponseWriter, r *http.Request) {
record(r.URL.Path)
_, _ = w.Write([]byte(fakeGalleryPage(2000, "")))
})
srv := httptest.NewServer(mux)
defer srv.Close()
client := newE2EClient(t, srv)
first, err := client.FavoritesPage(context.Background(), "u", "")
if err != nil {
t.Fatalf("FavoritesPage(first): %v", err)
}
if !first.HasNext {
t.Fatal("first.HasNext = false; want true")
}
if first.NextPage != "9999" {
t.Errorf("first.NextPage = %q; want \"9999\" (cursor)", first.NextPage)
}
last, err := client.FavoritesPage(context.Background(), "u", first.NextPage)
if err != nil {
t.Fatalf("FavoritesPage(cursor): %v", err)
}
if last.HasNext {
t.Error("last.HasNext = true; want false")
}
if last.NextPage != "" {
t.Errorf("last.NextPage = %q; want empty", last.NextPage)
}
want := []string{"/favorites/u/", "/favorites/u/9999/next"}
mu.Lock()
defer mu.Unlock()
if len(requests) != len(want) {
t.Fatalf("requests = %v; want %v", requests, want)
}
for i, w := range want {
if requests[i] != w {
t.Errorf("requests[%d] = %q; want %q", i, requests[i], w)
}
}
}
// TestFavorites_IteratorTerminates guards against the cursor-loop
// regression that brought us here: with sequential page numbers, the
// Favorites iterator never terminated because FA fell back to page 1
// for every fake-numbered cursor.
func TestFavorites_IteratorTerminates(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/favorites/u/", func(w http.ResponseWriter, _ *http.Request) {
_, _ = w.Write([]byte(fakeGalleryPage(1, "/favorites/u/42/next")))
})
mux.HandleFunc("/favorites/u/42/next", func(w http.ResponseWriter, _ *http.Request) {
_, _ = w.Write([]byte(fakeGalleryPage(3, "")))
})
srv := httptest.NewServer(mux)
defer srv.Close()
client := newE2EClient(t, srv)
count := 0
for sub, err := range client.Favorites(context.Background(), "u", ListOptions{}) {
if err != nil {
t.Fatalf("Favorites: %v", err)
}
if sub == nil {
t.Fatal("nil sub")
}
count++
if count > 10 {
t.Fatalf("iterator did not terminate; count > 10")
}
}
if count != 4 {
t.Errorf("count = %d; want 4 (2 per page * 2 pages)", count)
}
}

View File

@@ -19,6 +19,15 @@ import (
// pure HTML the same behaviour as before [WithExperimentalJSONListings]
// existed.
func parseGalleryPage(doc *goquery.Document, useJSON bool) (items []*Submission, hasNext bool) {
items, _, hasNext = parseListingPage(doc, useJSON)
return items, hasNext
}
// parseListingPage parses one page of a listing endpoint and also returns
// the raw next-page URL FA emits in its "Next" pagination form. Callers
// that need to chain across cursor-based pages (Favorites) consume the
// URL; callers that don't (Gallery / Scraps) can ignore it.
func parseListingPage(doc *goquery.Document, useJSON bool) (items []*Submission, nextURL string, hasNext bool) {
var jsonData listingJSONMap
if useJSON {
jsonData = readListingJSON(doc)
@@ -28,8 +37,8 @@ func parseGalleryPage(doc *goquery.Document, useJSON bool) (items []*Submission,
items = append(items, s)
}
})
hasNext = detectNextPage(doc)
return items, hasNext
nextURL, hasNext = nextPageURL(doc)
return items, nextURL, hasNext
}
// parseGalleryFigure lifts a single submission preview from a
@@ -85,6 +94,15 @@ func parseGalleryFigure(sel *goquery.Selection, jsonData listingJSONMap) *Submis
}
}
// data-tags on the figure's <img> carries both the unprefixed keyword
// list and the prefixed system tags (s_/c_/a_/u_/t_). Splitting it lets
// callers classify listing items without an extra /view/ fetch.
if img := sel.Find("img[data-tags]").First(); img.Length() > 0 {
if raw, ok := img.Attr("data-tags"); ok {
applyListingDataTags(s, raw)
}
}
// JSON enrichment preferred sources for the fields it carries.
if jsonData != nil {
if entry, ok := jsonData[id]; ok {
@@ -105,3 +123,35 @@ func parseGalleryFigure(sel *goquery.Selection, jsonData listingJSONMap) *Submis
return s
}
// applyListingDataTags splits the whitespace-separated data-tags attribute
// FA emits on listing-page <img> elements and routes each token to either
// CategorizedTags (when the token has a known single-letter prefix
// s_/c_/a_/u_/t_) or Tags (everything else).
//
// The prefix mapping mirrors the /view/ parser in submission_parser.go so a
// listing-path Submission carries the same categorisation a /view/-path one
// would, modulo tokens FA can't represent in this flat attribute (multi-word
// tags, the a_ vs u_ distinction).
func applyListingDataTags(s *Submission, raw string) {
for _, tok := range strings.Fields(raw) {
if len(tok) >= 3 && tok[1] == '_' {
name := tok[2:]
switch tok[0] {
case 's':
s.CategorizedTags.Species = append(s.CategorizedTags.Species, name)
continue
case 'c':
s.CategorizedTags.Characters = append(s.CategorizedTags.Characters, name)
continue
case 'a', 'u':
s.CategorizedTags.Artists = append(s.CategorizedTags.Artists, name)
continue
case 't':
s.CategorizedTags.Types = append(s.CategorizedTags.Types, name)
continue
}
}
s.Tags = append(s.Tags, tok)
}
}

View File

@@ -62,6 +62,99 @@ func TestParseGalleryPage_Synthetic(t *testing.T) {
}
}
func TestParseGalleryFigure_DataTags(t *testing.T) {
const html = `<html><body>
<figure id="sid-2001" class="t-image r-general">
<a href="/view/2001/" title="Mixed Tags">
<img data-tags="u_someartist c_artwork_digital t_all s_wolf wolf solo digital landscape" src="//d.example/thumb/2001.png"/>
</a>
</figure>
<figure id="sid-2002" class="t-image r-general">
<a href="/view/2002/" title="No Tags">
<img src="//d.example/thumb/2002.png"/>
</a>
</figure>
<figure id="sid-2003" class="t-image r-general">
<a href="/view/2003/" title="Only Keywords">
<img data-tags="wolf solo" src="//d.example/thumb/2003.png"/>
</a>
</figure>
</body></html>`
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
t.Fatalf("setup: %v", err)
}
items, _ := parseGalleryPage(doc, false)
if len(items) != 3 {
t.Fatalf("items = %d; want 3", len(items))
}
// Mixed prefixed + unprefixed.
mixed := items[0]
wantTags := []string{"wolf", "solo", "digital", "landscape"}
if !equalStrings(mixed.Tags, wantTags) {
t.Errorf("items[0].Tags = %v; want %v", mixed.Tags, wantTags)
}
if !equalStrings(mixed.CategorizedTags.Species, []string{"wolf"}) {
t.Errorf("items[0].Species = %v", mixed.CategorizedTags.Species)
}
if !equalStrings(mixed.CategorizedTags.Characters, []string{"artwork_digital"}) {
t.Errorf("items[0].Characters = %v", mixed.CategorizedTags.Characters)
}
if !equalStrings(mixed.CategorizedTags.Types, []string{"all"}) {
t.Errorf("items[0].Types = %v", mixed.CategorizedTags.Types)
}
if !equalStrings(mixed.CategorizedTags.Artists, []string{"someartist"}) {
t.Errorf("items[0].Artists = %v", mixed.CategorizedTags.Artists)
}
// Missing data-tags: both slices stay nil.
if items[1].Tags != nil {
t.Errorf("items[1].Tags = %v; want nil", items[1].Tags)
}
if items[1].CategorizedTags.Species != nil ||
items[1].CategorizedTags.Characters != nil ||
items[1].CategorizedTags.Artists != nil ||
items[1].CategorizedTags.Types != nil {
t.Errorf("items[1].CategorizedTags = %+v; want zero", items[1].CategorizedTags)
}
// Unprefixed-only: everything lands in Tags.
if !equalStrings(items[2].Tags, []string{"wolf", "solo"}) {
t.Errorf("items[2].Tags = %v", items[2].Tags)
}
if items[2].CategorizedTags.Species != nil {
t.Errorf("items[2].Species = %v; want nil", items[2].CategorizedTags.Species)
}
}
func TestParseGalleryPage_RealFixtureTags(t *testing.T) {
raw := loadFixture(t, "gallery_page1.html")
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
if err != nil {
t.Fatalf("read doc: %v", err)
}
items, _ := parseGalleryPage(doc, false)
if len(items) == 0 {
t.Fatal("real fixture: no items parsed")
}
var withTags, withSpecies int
for _, it := range items {
if len(it.Tags) > 0 {
withTags++
}
if len(it.CategorizedTags.Species) > 0 {
withSpecies++
}
}
if withTags == 0 {
t.Error("no items got Tags populated from data-tags")
}
if withSpecies == 0 {
t.Error("no items got CategorizedTags.Species populated from data-tags")
}
}
func TestParseGalleryPage_RealFixture(t *testing.T) {
raw := loadFixture(t, "gallery_page1.html")
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))

View File

@@ -36,10 +36,25 @@ func Scraps(name string, page int) string {
return Host + "/scraps/" + safeName(name) + "/" + pageSegment(page)
}
// Favorites returns the URL for a user's favorites page. FA uses a numeric
// page parameter; the first page is 1.
func Favorites(name string, page int) string {
return Host + "/favorites/" + safeName(name) + "/" + pageSegment(page)
// Favorites returns the URL for the first page of a user's favorites.
// FA paginates favorites with a fave-ID cursor (see [FavoritesCursor]),
// not sequential page numbers — passing /favorites/{user}/{N}/ with a
// small integer N silently falls back to the first page. Use this for
// the first page only; follow the cursor returned in [ListingPage].NextPage
// for subsequent pages.
func Favorites(name string) string {
return Host + "/favorites/" + safeName(name) + "/"
}
// FavoritesCursor returns the URL for a follow-up favorites page,
// addressed by the fave-ID cursor FA emits on the previous page's "Next"
// form (e.g. /favorites/{user}/1951234825/next). The cursor is opaque
// to the SDK — pass through whatever [ListingPage].NextPage gave you.
func FavoritesCursor(name, cursor string) string {
if cursor == "" {
return Favorites(name)
}
return Host + "/favorites/" + safeName(name) + "/" + cursor + "/next"
}
// Journal returns the URL for a single journal entry.

View File

@@ -52,11 +52,17 @@ func parseNoteListItem(item *goquery.Selection) *NotePreview {
}
// Note ID lives in the href: /msg/pms/{folder}/{id}/#message. Strip the
// fragment first so extractIntFromHref picks the trailing numeric path.
// fragment first, then take the *last* numeric segment — the folder
// number (e.g. 1) appears before the note ID and would otherwise win
// the "first numeric segment" race in extractIntFromHref.
if i := strings.Index(href, "#"); i != -1 {
href = href[:i]
}
np.ID = NoteID(extractIntFromHref(href))
for _, seg := range strings.Split(href, "/") {
if n, err := parseID[NoteID](seg); err == nil && n != 0 {
np.ID = n
}
}
// Read/unread: classes on the subject link.
if class, _ := subjectLink.Attr("class"); strings.Contains(class, "note-unread") || strings.Contains(class, "unread") && !strings.Contains(class, "note-read") {

View File

@@ -6,6 +6,30 @@ import (
"github.com/PuerkitoBio/goquery"
)
// ListingPage is one page of a listing endpoint (Gallery / Scraps /
// Favorites). It carries everything an external caller needs to drive
// pagination by hand: the items, whether FA exposed a "next page" link,
// and an opaque NextPage token to pass back into the next per-page call.
//
// External scrapers that want to manage their own loop (resume from a
// checkpoint, run pages in parallel, throttle differently) should call
// the per-page methods ([Client.GalleryPage], [Client.ScrapsPage],
// [Client.FavoritesPage]) and stop when HasNext is false. Callers that
// just want every item in order should keep using the iter.Seq2-shaped
// methods ([Client.Gallery] et al.), which walk pages internally.
//
// NextPage's contents differ by endpoint — for Gallery / Scraps it is
// the next 1-based page number as a decimal string ("2", "3", …); for
// Favorites it is the fave-ID cursor FA emits on the "Next" form
// (because favorites pagination is cursor-based, not page-number-based).
// Treat the value as opaque: pass whatever you got back to the next
// call without parsing.
type ListingPage struct {
Items []*Submission
HasNext bool
NextPage string // "" when !HasNext; otherwise the opaque token to pass back
}
// ListOptions configures the pagination of a simple iterator method like
// [Client.Gallery] or [Client.Notes]. Filtered iterators ([Client.Search],
// [Client.Browse]) use their own option structs that fold the same fields
@@ -42,17 +66,57 @@ func (o ListOptions) reachedLimit(pagesFetched int) bool {
// FA's beta theme renders pagination as either a Next form button or a
// hyperlink with a recognisable label.
func detectNextPage(doc *goquery.Document) bool {
if doc.Find("form button.button.standard:contains('Next')").Length() > 0 {
return true
url, _ := nextPageURL(doc)
return url != ""
}
// nextPageURL returns the action/href that the "Next" pagination control
// would navigate to, along with a flag indicating whether one was found.
// Returns ("", false) on the last page (FA emits no Next form/anchor, or
// emits it inside an HTML comment that doesn't parse as an element).
func nextPageURL(doc *goquery.Document) (string, bool) {
var action string
doc.Find("form").EachWithBreak(func(_ int, f *goquery.Selection) bool {
if f.Find("button.button.standard:contains('Next')").Length() == 0 {
return true
}
action, _ = f.Attr("action")
return false
})
if action != "" {
return action, true
}
hit := false
var href string
doc.Find("a.button.standard, a.button-link, a.pagination-next").EachWithBreak(func(_ int, sel *goquery.Selection) bool {
text := strings.ToLower(trimText(sel))
if strings.Contains(text, "next") || strings.Contains(text, "older") {
hit = true
href, _ = sel.Attr("href")
return false
}
return true
})
return hit
if href == "" {
return "", false
}
return href, true
}
// favoritesCursorFromURL extracts the fave-ID cursor segment from a
// /favorites/{user}/{cursor}/next URL. Returns "" if the URL does not
// match that shape (in which case the caller treats the listing as
// exhausted rather than chasing a malformed cursor).
func favoritesCursorFromURL(rawURL string) string {
// Strip query / fragment, then split. Favorites paths can be relative
// ("/favorites/u/123/next") or absolute — handle both.
rawURL = strings.TrimPrefix(rawURL, "https://www.furaffinity.net")
rawURL = strings.TrimPrefix(rawURL, "http://www.furaffinity.net")
if i := strings.IndexAny(rawURL, "?#"); i >= 0 {
rawURL = rawURL[:i]
}
parts := strings.Split(strings.Trim(rawURL, "/"), "/")
// Expect ["favorites", "{user}", "{cursor}", "next"].
if len(parts) != 4 || parts[0] != "favorites" || parts[3] != "next" {
return ""
}
return parts[2]
}

View File

@@ -35,10 +35,19 @@ type Submission struct {
Gender Gender
Description string // raw HTML; sanitise before rendering to a browser
DescriptionText string // plaintext convenience
Tags []string
// Tags holds the user-supplied keyword tags. On /view/-path Submissions
// these come from div.submission-tags anchors. On listing-path
// Submissions (Gallery/Scraps/Favorites/Browse/Search/SubmissionInbox)
// they come from the figure's data-tags attribute, which carries the
// same keywords FA renders on /view/ for that submission.
Tags []string
// CategorizedTags groups FA's prefixed system tags by category.
// FA emits these as tag-block entries inside div.submission-tags with
// prefixes s_ (species), c_ (character), a_/u_ (artist), and t_ (type).
// On /view/-path Submissions FA emits these as tag-block entries inside
// div.submission-tags with prefixes s_ (species), c_ (character),
// a_/u_ (artist), and t_ (type). On listing-path Submissions the same
// prefixed tokens are parsed out of the figure's data-tags attribute;
// the a_ vs u_ distinction is lost there because FA collapses both into
// u_ in that flat list.
CategorizedTags CategorizedTags
FileURL string // absolute CDN URL; pass to Download
ThumbURL string