//go:build fixtures // This file is compiled only when the `fixtures` build tag is set: // // go test -tags=fixtures -run TestRefreshFixtures ./... // // It hits live FurAffinity with the cookies in your environment and snapshots // the response body of each curated page into testdata/html/. The regular // parser tests read from those snapshots, so this is how we keep the parser // in sync with the live site without baking sample data into the repo. // // Each fixture is its own subtest. A failure on one (network blip, dead // target, fresh CF challenge) does not abort the rest. // // # Required environment variables // // FA_A — `a` session cookie // FA_B — `b` session cookie // CF_CLEARANCE — cf_clearance cookie from the same browser session // FA_UA — User-Agent string that produced CF_CLEARANCE // // # Per-fixture targets // // All of these have defaults that fall back to FA_TEST_USER (your own login // name) where possible. Set them explicitly to capture data from somewhere // other than your own profile. // // FA_TEST_USER base username (yours) // FA_TEST_SUB_ID image submission ID (default: 12345678) // FA_TEST_SUB_STORY_ID non-image submission ID (story/music/PDF) // FA_TEST_GALLERY_USER gallery owner (default: FA_TEST_USER) // FA_TEST_GALLERY_LAST_PAGE page index near/at the end of that gallery // FA_TEST_SCRAPS_USER scraps owner (default: FA_TEST_GALLERY_USER) // FA_TEST_FAVORITES_USER favorites owner (default: FA_TEST_USER) // FA_TEST_JOURNALS_USER journals listing owner (default: FA_TEST_USER) // FA_TEST_JOURNAL_ID single journal ID // FA_TEST_USER_WITH_SHOUTS profile that has visible shouts // FA_TEST_USER_WITH_BANNER profile that has a custom site banner uploaded // FA_TEST_NOTE_ID single note (PM) ID (M2 prep) // FA_TEST_SEARCH_QUERY search keyword (M4 prep) // FA_TEST_NONEXISTENT_SUB_ID ID guaranteed to 404 (default: 9999999999) package fa import ( "bytes" "context" "errors" "os" "path/filepath" "strconv" "strings" "testing" "github.com/PuerkitoBio/goquery" "github.com/gocolly/colly/v2" "git.anthrove.art/public/go-fa-api/internal/urls" ) // fixtureTarget defines one HTML file to capture. requires lists env-var // names that must be set (after defaults are resolved) for this fixture to // be attempted; targets with missing prerequisites are skipped, not failed. type fixtureTarget struct { name string url string requires []string // already-resolved values to check non-empty notes string } func TestRefreshFixtures(t *testing.T) { a := os.Getenv("FA_A") b := os.Getenv("FA_B") if a == "" || b == "" { t.Skip("FA_A / FA_B not set; cannot refresh fixtures") } cf := os.Getenv("CF_CLEARANCE") ua := os.Getenv("FA_UA") if cf == "" || ua == "" { t.Log("warning: CF_CLEARANCE or FA_UA not set; refresh likely to hit a Cloudflare challenge") } if err := os.MkdirAll(fixturesDir, 0o755); err != nil { t.Fatalf("mkdir %s: %v", fixturesDir, err) } client := New( WithCookies(Cookies{A: a, B: b}), WithCloudflare(CFCookies{Clearance: cf}), WithUserAgent(ua), ) // Resolve targets every fixture is gated on the relevant env-derived // values being non-empty so an incomplete env still gets you the // fixtures you can capture. user := os.Getenv("FA_TEST_USER") galleryUser := envOr("FA_TEST_GALLERY_USER", user) scrapsUser := envOr("FA_TEST_SCRAPS_USER", galleryUser) favoritesUser := envOr("FA_TEST_FAVORITES_USER", user) journalsUser := envOr("FA_TEST_JOURNALS_USER", user) shoutsUser := os.Getenv("FA_TEST_USER_WITH_SHOUTS") bannerUser := os.Getenv("FA_TEST_USER_WITH_BANNER") searchQuery := os.Getenv("FA_TEST_SEARCH_QUERY") subID := atoi64Default(os.Getenv("FA_TEST_SUB_ID"), 12345678) storyID := atoi64Default(os.Getenv("FA_TEST_SUB_STORY_ID"), 0) journalID := atoi64Default(os.Getenv("FA_TEST_JOURNAL_ID"), 0) noteID := atoi64Default(os.Getenv("FA_TEST_NOTE_ID"), 0) galleryLastPage := atoiDefault(os.Getenv("FA_TEST_GALLERY_LAST_PAGE"), 0) missingSubID := atoi64Default(os.Getenv("FA_TEST_NONEXISTENT_SUB_ID"), 9999999999) targets := []fixtureTarget{ // ---- M1: read API verifiable today -------------------------------- { name: "submission.html", url: urls.Submission(subID), requires: []string{strconv.FormatInt(subID, 10)}, notes: "image submission used by parseSubmission tests + comments parser", }, { name: "submission_story.html", url: urls.Submission(storyID), requires: []string{strconv.FormatInt(storyID, 10)}, notes: "non-image submission (story/music/PDF) exercises FileURL fallback to Download button", }, { name: "user.html", url: urls.User(user), requires: []string{user}, notes: "user profile used by parseUser tests", }, { name: "user_with_shouts.html", url: urls.User(shoutsUser), requires: []string{shoutsUser}, notes: "profile that exposes shouts used to validate shouts parser", }, { name: "user_with_banner.html", url: urls.User(bannerUser), requires: []string{bannerUser}, notes: "profile that has a custom uploaded site banner used to validate SiteBanner.IsCustom", }, { name: "gallery_page1.html", url: urls.Gallery(galleryUser, 1), requires: []string{galleryUser}, notes: "first gallery page figure[id^=sid-] iteration", }, { name: "gallery_page_last.html", url: urls.Gallery(galleryUser, galleryLastPage), requires: []string{galleryUser, strconv.Itoa(galleryLastPage)}, notes: "last gallery page verifies detectNextPage returns false at the end", }, { name: "scraps_page1.html", url: urls.Scraps(scrapsUser, 1), requires: []string{scrapsUser}, notes: "scraps listing same parser as gallery; sanity-check shape", }, { name: "favorites_page1.html", url: urls.Favorites(favoritesUser), requires: []string{favoritesUser}, notes: "favorites per-item Author should be the original artist", }, { name: "journals_listing_page1.html", url: urls.UserJournals(journalsUser, 1), requires: []string{journalsUser}, notes: "journals listing used by UserJournals iterator", }, { name: "journal.html", url: urls.Journal(journalID), requires: []string{strconv.FormatInt(journalID, 10)}, notes: "single journal entry parseJournal target", }, { name: "comments_submission.html", url: urls.Submission(subID), requires: []string{strconv.FormatInt(subID, 10)}, notes: "submission page captured a second time for comment-parser fixture (comments are inline)", }, { name: "comments_journal.html", url: urls.Journal(journalID), requires: []string{strconv.FormatInt(journalID, 10)}, notes: "journal page captured for journal comments parsing", }, { name: "system_message_not_found.html", url: urls.Submission(missingSubID), requires: []string{strconv.FormatInt(missingSubID, 10)}, notes: "captures FA's system-message page for ErrNotFound classifier validation", }, // ---- M2: inbox/notes (parsers not yet written; captures for prep) - { name: "msg_submissions.html", url: urls.MsgSubmissions(), requires: []string{a}, notes: "M2 prep: new-submission inbox (auth required)", }, { name: "msg_others.html", url: urls.MsgOthers(), requires: []string{a}, notes: "M2 prep: watch/journal/comment/fav notifications", }, { name: "msg_pms.html", url: urls.MsgPMs(), requires: []string{a}, notes: "M2 prep: private-message inbox", }, { name: "note_view.html", url: urls.ViewMessage(noteID), requires: []string{strconv.FormatInt(noteID, 10)}, notes: "M2 prep: single note view (needs FA_TEST_NOTE_ID)", }, // ---- M4: search/browse (parsers not yet written; captures for prep) { name: "search_results.html", url: urls.Search(searchQuery, 1), requires: []string{searchQuery}, notes: "M4 prep: search results page", }, { name: "browse.html", url: urls.Browse(1), requires: []string{a}, notes: "M4 prep: /browse/ page", }, } for _, tg := range targets { t.Run(tg.name, func(t *testing.T) { for _, r := range tg.requires { if strings.TrimSpace(r) == "" || r == "0" { t.Skipf("required input not set; skipping (%s)", tg.notes) return } } raw, err := fetchRaw(t.Context(), client, tg.url) if err != nil { t.Fatalf("fetch %s (%s): %v", tg.name, tg.url, err) } if doc, derr := goquery.NewDocumentFromReader(bytes.NewReader(raw)); derr == nil { if title := strings.TrimSpace(doc.Find("title").First().Text()); title == "Just a moment..." { t.Fatalf("got Cloudflare challenge page; refresh CF_CLEARANCE / FA_UA") } } out := filepath.Join(fixturesDir, tg.name) if err := os.WriteFile(out, raw, 0o644); err != nil { t.Fatalf("write %s: %v", out, err) } t.Logf("wrote %s (%d bytes) %s", out, len(raw), tg.notes) }) } } // fetchRaw fetches the URL through the same Colly+transport pipeline the SDK // uses for parsed calls, but hands back the raw response body instead of // running a parser. Lives in the test build so we don't expose a public // raw-fetch API just for fixture refreshing. func fetchRaw(ctx context.Context, c *Client, rawURL string) ([]byte, error) { clone := c.collector.Clone() clone.SetClient(c.http) clone.SetCookieJar(c.jar) clone.Context = ctx var body []byte var respErr error clone.OnResponse(func(r *colly.Response) { // Copy: r.Body is reused by Colly across responses. body = append(body[:0], r.Body...) }) clone.OnError(func(r *colly.Response, err error) { respErr = err }) if err := clone.Visit(rawURL); err != nil { if respErr != nil { return nil, respErr } return nil, err } if respErr != nil { return nil, respErr } if len(body) == 0 { return nil, errors.New("fetchRaw: empty body") } return body, nil } func envOr(key, fallback string) string { if v := os.Getenv(key); v != "" { return v } return fallback } // atoi64Default parses s as an int64; on any failure returns fallback. func atoi64Default(s string, fallback int64) int64 { if s == "" { return fallback } n, err := strconv.ParseInt(s, 10, 64) if err != nil { return fallback } return n } // atoiDefault parses s as an int; on any failure returns fallback. func atoiDefault(s string, fallback int) int { if s == "" { return fallback } n, err := strconv.Atoi(s) if err != nil { return fallback } return n }