FA's /favorites/{user}/ pagination is cursor-addressed by the fave-ID
of the last item on the previous page (e.g.
/favorites/{user}/1951234825/next), not by sequential integers. The
previous URL builder generated /favorites/{user}/{N}/ for N>=2; FA
interpreted that as a malformed cursor and silently returned page 1,
which caused the Favorites iterator to loop forever and the new
FavoritesPage to report HasNext=true on every call.
Changes:
- urls.Favorites(name) returns the first-page URL; new
urls.FavoritesCursor(name, cursor) builds /favorites/.../next URLs.
- FavoritesPage now takes a cursor string; empty = first page.
Returns ListingPage.NextPage as the opaque fave-ID for the next call.
- ListingPage gains NextPage string (decimal page number for
Gallery/Scraps, fave-ID cursor for Favorites) and drops the Page int
field that conflated those two notions.
- Client.Favorites iterator now walks cursors internally; StartPage
is ignored for favorites (documented).
- detectNextPage / nextPageURL now parse the form action so the same
helper works for both page-number and cursor pagination.
- Added regression test that fails on the infinite-loop bug.
- Example: examples/favorites_page demonstrates cursor walking.
331 lines
10 KiB
Go
331 lines
10 KiB
Go
//go:build fixtures
|
|
|
|
// This file is compiled only when the `fixtures` build tag is set:
|
|
//
|
|
// go test -tags=fixtures -run TestRefreshFixtures ./...
|
|
//
|
|
// It hits live FurAffinity with the cookies in your environment and snapshots
|
|
// the response body of each curated page into testdata/html/. The regular
|
|
// parser tests read from those snapshots, so this is how we keep the parser
|
|
// in sync with the live site without baking sample data into the repo.
|
|
//
|
|
// Each fixture is its own subtest. A failure on one (network blip, dead
|
|
// target, fresh CF challenge) does not abort the rest.
|
|
//
|
|
// # Required environment variables
|
|
//
|
|
// FA_A — `a` session cookie
|
|
// FA_B — `b` session cookie
|
|
// CF_CLEARANCE — cf_clearance cookie from the same browser session
|
|
// FA_UA — User-Agent string that produced CF_CLEARANCE
|
|
//
|
|
// # Per-fixture targets
|
|
//
|
|
// All of these have defaults that fall back to FA_TEST_USER (your own login
|
|
// name) where possible. Set them explicitly to capture data from somewhere
|
|
// other than your own profile.
|
|
//
|
|
// FA_TEST_USER base username (yours)
|
|
// FA_TEST_SUB_ID image submission ID (default: 12345678)
|
|
// FA_TEST_SUB_STORY_ID non-image submission ID (story/music/PDF)
|
|
// FA_TEST_GALLERY_USER gallery owner (default: FA_TEST_USER)
|
|
// FA_TEST_GALLERY_LAST_PAGE page index near/at the end of that gallery
|
|
// FA_TEST_SCRAPS_USER scraps owner (default: FA_TEST_GALLERY_USER)
|
|
// FA_TEST_FAVORITES_USER favorites owner (default: FA_TEST_USER)
|
|
// FA_TEST_JOURNALS_USER journals listing owner (default: FA_TEST_USER)
|
|
// FA_TEST_JOURNAL_ID single journal ID
|
|
// FA_TEST_USER_WITH_SHOUTS profile that has visible shouts
|
|
// FA_TEST_USER_WITH_BANNER profile that has a custom site banner uploaded
|
|
// FA_TEST_NOTE_ID single note (PM) ID (M2 prep)
|
|
// FA_TEST_SEARCH_QUERY search keyword (M4 prep)
|
|
// FA_TEST_NONEXISTENT_SUB_ID ID guaranteed to 404 (default: 9999999999)
|
|
|
|
package fa
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"errors"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
"github.com/gocolly/colly/v2"
|
|
|
|
"git.anthrove.art/public/go-fa-api/internal/urls"
|
|
)
|
|
|
|
// fixtureTarget defines one HTML file to capture. requires lists env-var
|
|
// names that must be set (after defaults are resolved) for this fixture to
|
|
// be attempted; targets with missing prerequisites are skipped, not failed.
|
|
type fixtureTarget struct {
|
|
name string
|
|
url string
|
|
requires []string // already-resolved values to check non-empty
|
|
notes string
|
|
}
|
|
|
|
func TestRefreshFixtures(t *testing.T) {
|
|
a := os.Getenv("FA_A")
|
|
b := os.Getenv("FA_B")
|
|
if a == "" || b == "" {
|
|
t.Skip("FA_A / FA_B not set; cannot refresh fixtures")
|
|
}
|
|
cf := os.Getenv("CF_CLEARANCE")
|
|
ua := os.Getenv("FA_UA")
|
|
if cf == "" || ua == "" {
|
|
t.Log("warning: CF_CLEARANCE or FA_UA not set; refresh likely to hit a Cloudflare challenge")
|
|
}
|
|
|
|
if err := os.MkdirAll(fixturesDir, 0o755); err != nil {
|
|
t.Fatalf("mkdir %s: %v", fixturesDir, err)
|
|
}
|
|
|
|
client := New(
|
|
WithCookies(Cookies{A: a, B: b}),
|
|
WithCloudflare(CFCookies{Clearance: cf}),
|
|
WithUserAgent(ua),
|
|
)
|
|
|
|
// Resolve targets every fixture is gated on the relevant env-derived
|
|
// values being non-empty so an incomplete env still gets you the
|
|
// fixtures you can capture.
|
|
user := os.Getenv("FA_TEST_USER")
|
|
galleryUser := envOr("FA_TEST_GALLERY_USER", user)
|
|
scrapsUser := envOr("FA_TEST_SCRAPS_USER", galleryUser)
|
|
favoritesUser := envOr("FA_TEST_FAVORITES_USER", user)
|
|
journalsUser := envOr("FA_TEST_JOURNALS_USER", user)
|
|
shoutsUser := os.Getenv("FA_TEST_USER_WITH_SHOUTS")
|
|
bannerUser := os.Getenv("FA_TEST_USER_WITH_BANNER")
|
|
searchQuery := os.Getenv("FA_TEST_SEARCH_QUERY")
|
|
|
|
subID := atoi64Default(os.Getenv("FA_TEST_SUB_ID"), 12345678)
|
|
storyID := atoi64Default(os.Getenv("FA_TEST_SUB_STORY_ID"), 0)
|
|
journalID := atoi64Default(os.Getenv("FA_TEST_JOURNAL_ID"), 0)
|
|
noteID := atoi64Default(os.Getenv("FA_TEST_NOTE_ID"), 0)
|
|
galleryLastPage := atoiDefault(os.Getenv("FA_TEST_GALLERY_LAST_PAGE"), 0)
|
|
missingSubID := atoi64Default(os.Getenv("FA_TEST_NONEXISTENT_SUB_ID"), 9999999999)
|
|
|
|
targets := []fixtureTarget{
|
|
// ---- M1: read API verifiable today --------------------------------
|
|
{
|
|
name: "submission.html",
|
|
url: urls.Submission(subID),
|
|
requires: []string{strconv.FormatInt(subID, 10)},
|
|
notes: "image submission used by parseSubmission tests + comments parser",
|
|
},
|
|
{
|
|
name: "submission_story.html",
|
|
url: urls.Submission(storyID),
|
|
requires: []string{strconv.FormatInt(storyID, 10)},
|
|
notes: "non-image submission (story/music/PDF) exercises FileURL fallback to Download button",
|
|
},
|
|
{
|
|
name: "user.html",
|
|
url: urls.User(user),
|
|
requires: []string{user},
|
|
notes: "user profile used by parseUser tests",
|
|
},
|
|
{
|
|
name: "user_with_shouts.html",
|
|
url: urls.User(shoutsUser),
|
|
requires: []string{shoutsUser},
|
|
notes: "profile that exposes shouts used to validate shouts parser",
|
|
},
|
|
{
|
|
name: "user_with_banner.html",
|
|
url: urls.User(bannerUser),
|
|
requires: []string{bannerUser},
|
|
notes: "profile that has a custom uploaded site banner used to validate SiteBanner.IsCustom",
|
|
},
|
|
{
|
|
name: "gallery_page1.html",
|
|
url: urls.Gallery(galleryUser, 1),
|
|
requires: []string{galleryUser},
|
|
notes: "first gallery page figure[id^=sid-] iteration",
|
|
},
|
|
{
|
|
name: "gallery_page_last.html",
|
|
url: urls.Gallery(galleryUser, galleryLastPage),
|
|
requires: []string{galleryUser, strconv.Itoa(galleryLastPage)},
|
|
notes: "last gallery page verifies detectNextPage returns false at the end",
|
|
},
|
|
{
|
|
name: "scraps_page1.html",
|
|
url: urls.Scraps(scrapsUser, 1),
|
|
requires: []string{scrapsUser},
|
|
notes: "scraps listing same parser as gallery; sanity-check shape",
|
|
},
|
|
{
|
|
name: "favorites_page1.html",
|
|
url: urls.Favorites(favoritesUser),
|
|
requires: []string{favoritesUser},
|
|
notes: "favorites per-item Author should be the original artist",
|
|
},
|
|
{
|
|
name: "journals_listing_page1.html",
|
|
url: urls.UserJournals(journalsUser, 1),
|
|
requires: []string{journalsUser},
|
|
notes: "journals listing used by UserJournals iterator",
|
|
},
|
|
{
|
|
name: "journal.html",
|
|
url: urls.Journal(journalID),
|
|
requires: []string{strconv.FormatInt(journalID, 10)},
|
|
notes: "single journal entry parseJournal target",
|
|
},
|
|
{
|
|
name: "comments_submission.html",
|
|
url: urls.Submission(subID),
|
|
requires: []string{strconv.FormatInt(subID, 10)},
|
|
notes: "submission page captured a second time for comment-parser fixture (comments are inline)",
|
|
},
|
|
{
|
|
name: "comments_journal.html",
|
|
url: urls.Journal(journalID),
|
|
requires: []string{strconv.FormatInt(journalID, 10)},
|
|
notes: "journal page captured for journal comments parsing",
|
|
},
|
|
{
|
|
name: "system_message_not_found.html",
|
|
url: urls.Submission(missingSubID),
|
|
requires: []string{strconv.FormatInt(missingSubID, 10)},
|
|
notes: "captures FA's system-message page for ErrNotFound classifier validation",
|
|
},
|
|
|
|
// ---- M2: inbox/notes (parsers not yet written; captures for prep) -
|
|
{
|
|
name: "msg_submissions.html",
|
|
url: urls.MsgSubmissions(),
|
|
requires: []string{a},
|
|
notes: "M2 prep: new-submission inbox (auth required)",
|
|
},
|
|
{
|
|
name: "msg_others.html",
|
|
url: urls.MsgOthers(),
|
|
requires: []string{a},
|
|
notes: "M2 prep: watch/journal/comment/fav notifications",
|
|
},
|
|
{
|
|
name: "msg_pms.html",
|
|
url: urls.MsgPMs(),
|
|
requires: []string{a},
|
|
notes: "M2 prep: private-message inbox",
|
|
},
|
|
{
|
|
name: "note_view.html",
|
|
url: urls.ViewMessage(noteID),
|
|
requires: []string{strconv.FormatInt(noteID, 10)},
|
|
notes: "M2 prep: single note view (needs FA_TEST_NOTE_ID)",
|
|
},
|
|
|
|
// ---- M4: search/browse (parsers not yet written; captures for prep)
|
|
{
|
|
name: "search_results.html",
|
|
url: urls.Search(searchQuery, 1),
|
|
requires: []string{searchQuery},
|
|
notes: "M4 prep: search results page",
|
|
},
|
|
{
|
|
name: "browse.html",
|
|
url: urls.Browse(1),
|
|
requires: []string{a},
|
|
notes: "M4 prep: /browse/ page",
|
|
},
|
|
}
|
|
|
|
for _, tg := range targets {
|
|
t.Run(tg.name, func(t *testing.T) {
|
|
for _, r := range tg.requires {
|
|
if strings.TrimSpace(r) == "" || r == "0" {
|
|
t.Skipf("required input not set; skipping (%s)", tg.notes)
|
|
return
|
|
}
|
|
}
|
|
raw, err := fetchRaw(t.Context(), client, tg.url)
|
|
if err != nil {
|
|
t.Fatalf("fetch %s (%s): %v", tg.name, tg.url, err)
|
|
}
|
|
if doc, derr := goquery.NewDocumentFromReader(bytes.NewReader(raw)); derr == nil {
|
|
if title := strings.TrimSpace(doc.Find("title").First().Text()); title == "Just a moment..." {
|
|
t.Fatalf("got Cloudflare challenge page; refresh CF_CLEARANCE / FA_UA")
|
|
}
|
|
}
|
|
out := filepath.Join(fixturesDir, tg.name)
|
|
if err := os.WriteFile(out, raw, 0o644); err != nil {
|
|
t.Fatalf("write %s: %v", out, err)
|
|
}
|
|
t.Logf("wrote %s (%d bytes) %s", out, len(raw), tg.notes)
|
|
})
|
|
}
|
|
}
|
|
|
|
// fetchRaw fetches the URL through the same Colly+transport pipeline the SDK
|
|
// uses for parsed calls, but hands back the raw response body instead of
|
|
// running a parser. Lives in the test build so we don't expose a public
|
|
// raw-fetch API just for fixture refreshing.
|
|
func fetchRaw(ctx context.Context, c *Client, rawURL string) ([]byte, error) {
|
|
clone := c.collector.Clone()
|
|
clone.SetClient(c.http)
|
|
clone.SetCookieJar(c.jar)
|
|
clone.Context = ctx
|
|
|
|
var body []byte
|
|
var respErr error
|
|
|
|
clone.OnResponse(func(r *colly.Response) {
|
|
// Copy: r.Body is reused by Colly across responses.
|
|
body = append(body[:0], r.Body...)
|
|
})
|
|
clone.OnError(func(r *colly.Response, err error) {
|
|
respErr = err
|
|
})
|
|
if err := clone.Visit(rawURL); err != nil {
|
|
if respErr != nil {
|
|
return nil, respErr
|
|
}
|
|
return nil, err
|
|
}
|
|
if respErr != nil {
|
|
return nil, respErr
|
|
}
|
|
if len(body) == 0 {
|
|
return nil, errors.New("fetchRaw: empty body")
|
|
}
|
|
return body, nil
|
|
}
|
|
|
|
func envOr(key, fallback string) string {
|
|
if v := os.Getenv(key); v != "" {
|
|
return v
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
// atoi64Default parses s as an int64; on any failure returns fallback.
|
|
func atoi64Default(s string, fallback int64) int64 {
|
|
if s == "" {
|
|
return fallback
|
|
}
|
|
n, err := strconv.ParseInt(s, 10, 64)
|
|
if err != nil {
|
|
return fallback
|
|
}
|
|
return n
|
|
}
|
|
|
|
// atoiDefault parses s as an int; on any failure returns fallback.
|
|
func atoiDefault(s string, fallback int) int {
|
|
if s == "" {
|
|
return fallback
|
|
}
|
|
n, err := strconv.Atoi(s)
|
|
if err != nil {
|
|
return fallback
|
|
}
|
|
return n
|
|
}
|