Files
go-fa-api/fixtures_refresh_test.go
2026-05-25 22:27:18 +02:00

331 lines
10 KiB
Go

//go:build fixtures
// This file is compiled only when the `fixtures` build tag is set:
//
// go test -tags=fixtures -run TestRefreshFixtures ./...
//
// It hits live FurAffinity with the cookies in your environment and snapshots
// the response body of each curated page into testdata/html/. The regular
// parser tests read from those snapshots, so this is how we keep the parser
// in sync with the live site without baking sample data into the repo.
//
// Each fixture is its own subtest. A failure on one (network blip, dead
// target, fresh CF challenge) does not abort the rest.
//
// # Required environment variables
//
// FA_A — `a` session cookie
// FA_B — `b` session cookie
// CF_CLEARANCE — cf_clearance cookie from the same browser session
// FA_UA — User-Agent string that produced CF_CLEARANCE
//
// # Per-fixture targets
//
// All of these have defaults that fall back to FA_TEST_USER (your own login
// name) where possible. Set them explicitly to capture data from somewhere
// other than your own profile.
//
// FA_TEST_USER base username (yours)
// FA_TEST_SUB_ID image submission ID (default: 12345678)
// FA_TEST_SUB_STORY_ID non-image submission ID (story/music/PDF)
// FA_TEST_GALLERY_USER gallery owner (default: FA_TEST_USER)
// FA_TEST_GALLERY_LAST_PAGE page index near/at the end of that gallery
// FA_TEST_SCRAPS_USER scraps owner (default: FA_TEST_GALLERY_USER)
// FA_TEST_FAVORITES_USER favorites owner (default: FA_TEST_USER)
// FA_TEST_JOURNALS_USER journals listing owner (default: FA_TEST_USER)
// FA_TEST_JOURNAL_ID single journal ID
// FA_TEST_USER_WITH_SHOUTS profile that has visible shouts
// FA_TEST_USER_WITH_BANNER profile that has a custom site banner uploaded
// FA_TEST_NOTE_ID single note (PM) ID (M2 prep)
// FA_TEST_SEARCH_QUERY search keyword (M4 prep)
// FA_TEST_NONEXISTENT_SUB_ID ID guaranteed to 404 (default: 9999999999)
package fa
import (
"bytes"
"context"
"errors"
"os"
"path/filepath"
"strconv"
"strings"
"testing"
"github.com/PuerkitoBio/goquery"
"github.com/gocolly/colly/v2"
"git.anthrove.art/public/go-fa-api/internal/urls"
)
// fixtureTarget defines one HTML file to capture. requires lists env-var
// names that must be set (after defaults are resolved) for this fixture to
// be attempted; targets with missing prerequisites are skipped, not failed.
type fixtureTarget struct {
name string
url string
requires []string // already-resolved values to check non-empty
notes string
}
func TestRefreshFixtures(t *testing.T) {
a := os.Getenv("FA_A")
b := os.Getenv("FA_B")
if a == "" || b == "" {
t.Skip("FA_A / FA_B not set; cannot refresh fixtures")
}
cf := os.Getenv("CF_CLEARANCE")
ua := os.Getenv("FA_UA")
if cf == "" || ua == "" {
t.Log("warning: CF_CLEARANCE or FA_UA not set; refresh likely to hit a Cloudflare challenge")
}
if err := os.MkdirAll(fixturesDir, 0o755); err != nil {
t.Fatalf("mkdir %s: %v", fixturesDir, err)
}
client := New(
WithCookies(Cookies{A: a, B: b}),
WithCloudflare(CFCookies{Clearance: cf}),
WithUserAgent(ua),
)
// Resolve targets every fixture is gated on the relevant env-derived
// values being non-empty so an incomplete env still gets you the
// fixtures you can capture.
user := os.Getenv("FA_TEST_USER")
galleryUser := envOr("FA_TEST_GALLERY_USER", user)
scrapsUser := envOr("FA_TEST_SCRAPS_USER", galleryUser)
favoritesUser := envOr("FA_TEST_FAVORITES_USER", user)
journalsUser := envOr("FA_TEST_JOURNALS_USER", user)
shoutsUser := os.Getenv("FA_TEST_USER_WITH_SHOUTS")
bannerUser := os.Getenv("FA_TEST_USER_WITH_BANNER")
searchQuery := os.Getenv("FA_TEST_SEARCH_QUERY")
subID := atoi64Default(os.Getenv("FA_TEST_SUB_ID"), 12345678)
storyID := atoi64Default(os.Getenv("FA_TEST_SUB_STORY_ID"), 0)
journalID := atoi64Default(os.Getenv("FA_TEST_JOURNAL_ID"), 0)
noteID := atoi64Default(os.Getenv("FA_TEST_NOTE_ID"), 0)
galleryLastPage := atoiDefault(os.Getenv("FA_TEST_GALLERY_LAST_PAGE"), 0)
missingSubID := atoi64Default(os.Getenv("FA_TEST_NONEXISTENT_SUB_ID"), 9999999999)
targets := []fixtureTarget{
// ---- M1: read API verifiable today --------------------------------
{
name: "submission.html",
url: urls.Submission(subID),
requires: []string{strconv.FormatInt(subID, 10)},
notes: "image submission used by parseSubmission tests + comments parser",
},
{
name: "submission_story.html",
url: urls.Submission(storyID),
requires: []string{strconv.FormatInt(storyID, 10)},
notes: "non-image submission (story/music/PDF) exercises FileURL fallback to Download button",
},
{
name: "user.html",
url: urls.User(user),
requires: []string{user},
notes: "user profile used by parseUser tests",
},
{
name: "user_with_shouts.html",
url: urls.User(shoutsUser),
requires: []string{shoutsUser},
notes: "profile that exposes shouts used to validate shouts parser",
},
{
name: "user_with_banner.html",
url: urls.User(bannerUser),
requires: []string{bannerUser},
notes: "profile that has a custom uploaded site banner used to validate SiteBanner.IsCustom",
},
{
name: "gallery_page1.html",
url: urls.Gallery(galleryUser, 1),
requires: []string{galleryUser},
notes: "first gallery page figure[id^=sid-] iteration",
},
{
name: "gallery_page_last.html",
url: urls.Gallery(galleryUser, galleryLastPage),
requires: []string{galleryUser, strconv.Itoa(galleryLastPage)},
notes: "last gallery page verifies detectNextPage returns false at the end",
},
{
name: "scraps_page1.html",
url: urls.Scraps(scrapsUser, 1),
requires: []string{scrapsUser},
notes: "scraps listing same parser as gallery; sanity-check shape",
},
{
name: "favorites_page1.html",
url: urls.Favorites(favoritesUser, 1),
requires: []string{favoritesUser},
notes: "favorites per-item Author should be the original artist",
},
{
name: "journals_listing_page1.html",
url: urls.UserJournals(journalsUser, 1),
requires: []string{journalsUser},
notes: "journals listing used by UserJournals iterator",
},
{
name: "journal.html",
url: urls.Journal(journalID),
requires: []string{strconv.FormatInt(journalID, 10)},
notes: "single journal entry parseJournal target",
},
{
name: "comments_submission.html",
url: urls.Submission(subID),
requires: []string{strconv.FormatInt(subID, 10)},
notes: "submission page captured a second time for comment-parser fixture (comments are inline)",
},
{
name: "comments_journal.html",
url: urls.Journal(journalID),
requires: []string{strconv.FormatInt(journalID, 10)},
notes: "journal page captured for journal comments parsing",
},
{
name: "system_message_not_found.html",
url: urls.Submission(missingSubID),
requires: []string{strconv.FormatInt(missingSubID, 10)},
notes: "captures FA's system-message page for ErrNotFound classifier validation",
},
// ---- M2: inbox/notes (parsers not yet written; captures for prep) -
{
name: "msg_submissions.html",
url: urls.MsgSubmissions(),
requires: []string{a},
notes: "M2 prep: new-submission inbox (auth required)",
},
{
name: "msg_others.html",
url: urls.MsgOthers(),
requires: []string{a},
notes: "M2 prep: watch/journal/comment/fav notifications",
},
{
name: "msg_pms.html",
url: urls.MsgPMs(),
requires: []string{a},
notes: "M2 prep: private-message inbox",
},
{
name: "note_view.html",
url: urls.ViewMessage(noteID),
requires: []string{strconv.FormatInt(noteID, 10)},
notes: "M2 prep: single note view (needs FA_TEST_NOTE_ID)",
},
// ---- M4: search/browse (parsers not yet written; captures for prep)
{
name: "search_results.html",
url: urls.Search(searchQuery, 1),
requires: []string{searchQuery},
notes: "M4 prep: search results page",
},
{
name: "browse.html",
url: urls.Browse(1),
requires: []string{a},
notes: "M4 prep: /browse/ page",
},
}
for _, tg := range targets {
t.Run(tg.name, func(t *testing.T) {
for _, r := range tg.requires {
if strings.TrimSpace(r) == "" || r == "0" {
t.Skipf("required input not set; skipping (%s)", tg.notes)
return
}
}
raw, err := fetchRaw(t.Context(), client, tg.url)
if err != nil {
t.Fatalf("fetch %s (%s): %v", tg.name, tg.url, err)
}
if doc, derr := goquery.NewDocumentFromReader(bytes.NewReader(raw)); derr == nil {
if title := strings.TrimSpace(doc.Find("title").First().Text()); title == "Just a moment..." {
t.Fatalf("got Cloudflare challenge page; refresh CF_CLEARANCE / FA_UA")
}
}
out := filepath.Join(fixturesDir, tg.name)
if err := os.WriteFile(out, raw, 0o644); err != nil {
t.Fatalf("write %s: %v", out, err)
}
t.Logf("wrote %s (%d bytes) %s", out, len(raw), tg.notes)
})
}
}
// fetchRaw fetches the URL through the same Colly+transport pipeline the SDK
// uses for parsed calls, but hands back the raw response body instead of
// running a parser. Lives in the test build so we don't expose a public
// raw-fetch API just for fixture refreshing.
func fetchRaw(ctx context.Context, c *Client, rawURL string) ([]byte, error) {
clone := c.collector.Clone()
clone.SetClient(c.http)
clone.SetCookieJar(c.jar)
clone.Context = ctx
var body []byte
var respErr error
clone.OnResponse(func(r *colly.Response) {
// Copy: r.Body is reused by Colly across responses.
body = append(body[:0], r.Body...)
})
clone.OnError(func(r *colly.Response, err error) {
respErr = err
})
if err := clone.Visit(rawURL); err != nil {
if respErr != nil {
return nil, respErr
}
return nil, err
}
if respErr != nil {
return nil, respErr
}
if len(body) == 0 {
return nil, errors.New("fetchRaw: empty body")
}
return body, nil
}
func envOr(key, fallback string) string {
if v := os.Getenv(key); v != "" {
return v
}
return fallback
}
// atoi64Default parses s as an int64; on any failure returns fallback.
func atoi64Default(s string, fallback int64) int64 {
if s == "" {
return fallback
}
n, err := strconv.ParseInt(s, 10, 64)
if err != nil {
return fallback
}
return n
}
// atoiDefault parses s as an int; on any failure returns fallback.
func atoiDefault(s string, fallback int) int {
if s == "" {
return fallback
}
n, err := strconv.Atoi(s)
if err != nil {
return fallback
}
return n
}