inital commit
This commit is contained in:
330
fixtures_refresh_test.go
Normal file
330
fixtures_refresh_test.go
Normal file
@@ -0,0 +1,330 @@
|
||||
//go:build fixtures
|
||||
|
||||
// This file is compiled only when the `fixtures` build tag is set:
|
||||
//
|
||||
// go test -tags=fixtures -run TestRefreshFixtures ./...
|
||||
//
|
||||
// It hits live FurAffinity with the cookies in your environment and snapshots
|
||||
// the response body of each curated page into testdata/html/. The regular
|
||||
// parser tests read from those snapshots, so this is how we keep the parser
|
||||
// in sync with the live site without baking sample data into the repo.
|
||||
//
|
||||
// Each fixture is its own subtest. A failure on one (network blip, dead
|
||||
// target, fresh CF challenge) does not abort the rest.
|
||||
//
|
||||
// # Required environment variables
|
||||
//
|
||||
// FA_A — `a` session cookie
|
||||
// FA_B — `b` session cookie
|
||||
// CF_CLEARANCE — cf_clearance cookie from the same browser session
|
||||
// FA_UA — User-Agent string that produced CF_CLEARANCE
|
||||
//
|
||||
// # Per-fixture targets
|
||||
//
|
||||
// All of these have defaults that fall back to FA_TEST_USER (your own login
|
||||
// name) where possible. Set them explicitly to capture data from somewhere
|
||||
// other than your own profile.
|
||||
//
|
||||
// FA_TEST_USER base username (yours)
|
||||
// FA_TEST_SUB_ID image submission ID (default: 12345678)
|
||||
// FA_TEST_SUB_STORY_ID non-image submission ID (story/music/PDF)
|
||||
// FA_TEST_GALLERY_USER gallery owner (default: FA_TEST_USER)
|
||||
// FA_TEST_GALLERY_LAST_PAGE page index near/at the end of that gallery
|
||||
// FA_TEST_SCRAPS_USER scraps owner (default: FA_TEST_GALLERY_USER)
|
||||
// FA_TEST_FAVORITES_USER favorites owner (default: FA_TEST_USER)
|
||||
// FA_TEST_JOURNALS_USER journals listing owner (default: FA_TEST_USER)
|
||||
// FA_TEST_JOURNAL_ID single journal ID
|
||||
// FA_TEST_USER_WITH_SHOUTS profile that has visible shouts
|
||||
// FA_TEST_USER_WITH_BANNER profile that has a custom site banner uploaded
|
||||
// FA_TEST_NOTE_ID single note (PM) ID (M2 prep)
|
||||
// FA_TEST_SEARCH_QUERY search keyword (M4 prep)
|
||||
// FA_TEST_NONEXISTENT_SUB_ID ID guaranteed to 404 (default: 9999999999)
|
||||
|
||||
package fa
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/gocolly/colly/v2"
|
||||
|
||||
"git.anthrove.art/public/go-fa-api/internal/urls"
|
||||
)
|
||||
|
||||
// fixtureTarget defines one HTML file to capture. requires lists env-var
|
||||
// names that must be set (after defaults are resolved) for this fixture to
|
||||
// be attempted; targets with missing prerequisites are skipped, not failed.
|
||||
type fixtureTarget struct {
|
||||
name string
|
||||
url string
|
||||
requires []string // already-resolved values to check non-empty
|
||||
notes string
|
||||
}
|
||||
|
||||
func TestRefreshFixtures(t *testing.T) {
|
||||
a := os.Getenv("FA_A")
|
||||
b := os.Getenv("FA_B")
|
||||
if a == "" || b == "" {
|
||||
t.Skip("FA_A / FA_B not set; cannot refresh fixtures")
|
||||
}
|
||||
cf := os.Getenv("CF_CLEARANCE")
|
||||
ua := os.Getenv("FA_UA")
|
||||
if cf == "" || ua == "" {
|
||||
t.Log("warning: CF_CLEARANCE or FA_UA not set; refresh likely to hit a Cloudflare challenge")
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(fixturesDir, 0o755); err != nil {
|
||||
t.Fatalf("mkdir %s: %v", fixturesDir, err)
|
||||
}
|
||||
|
||||
client := New(
|
||||
WithCookies(Cookies{A: a, B: b}),
|
||||
WithCloudflare(CFCookies{Clearance: cf}),
|
||||
WithUserAgent(ua),
|
||||
)
|
||||
|
||||
// Resolve targets every fixture is gated on the relevant env-derived
|
||||
// values being non-empty so an incomplete env still gets you the
|
||||
// fixtures you can capture.
|
||||
user := os.Getenv("FA_TEST_USER")
|
||||
galleryUser := envOr("FA_TEST_GALLERY_USER", user)
|
||||
scrapsUser := envOr("FA_TEST_SCRAPS_USER", galleryUser)
|
||||
favoritesUser := envOr("FA_TEST_FAVORITES_USER", user)
|
||||
journalsUser := envOr("FA_TEST_JOURNALS_USER", user)
|
||||
shoutsUser := os.Getenv("FA_TEST_USER_WITH_SHOUTS")
|
||||
bannerUser := os.Getenv("FA_TEST_USER_WITH_BANNER")
|
||||
searchQuery := os.Getenv("FA_TEST_SEARCH_QUERY")
|
||||
|
||||
subID := atoi64Default(os.Getenv("FA_TEST_SUB_ID"), 12345678)
|
||||
storyID := atoi64Default(os.Getenv("FA_TEST_SUB_STORY_ID"), 0)
|
||||
journalID := atoi64Default(os.Getenv("FA_TEST_JOURNAL_ID"), 0)
|
||||
noteID := atoi64Default(os.Getenv("FA_TEST_NOTE_ID"), 0)
|
||||
galleryLastPage := atoiDefault(os.Getenv("FA_TEST_GALLERY_LAST_PAGE"), 0)
|
||||
missingSubID := atoi64Default(os.Getenv("FA_TEST_NONEXISTENT_SUB_ID"), 9999999999)
|
||||
|
||||
targets := []fixtureTarget{
|
||||
// ---- M1: read API verifiable today --------------------------------
|
||||
{
|
||||
name: "submission.html",
|
||||
url: urls.Submission(subID),
|
||||
requires: []string{strconv.FormatInt(subID, 10)},
|
||||
notes: "image submission used by parseSubmission tests + comments parser",
|
||||
},
|
||||
{
|
||||
name: "submission_story.html",
|
||||
url: urls.Submission(storyID),
|
||||
requires: []string{strconv.FormatInt(storyID, 10)},
|
||||
notes: "non-image submission (story/music/PDF) exercises FileURL fallback to Download button",
|
||||
},
|
||||
{
|
||||
name: "user.html",
|
||||
url: urls.User(user),
|
||||
requires: []string{user},
|
||||
notes: "user profile used by parseUser tests",
|
||||
},
|
||||
{
|
||||
name: "user_with_shouts.html",
|
||||
url: urls.User(shoutsUser),
|
||||
requires: []string{shoutsUser},
|
||||
notes: "profile that exposes shouts used to validate shouts parser",
|
||||
},
|
||||
{
|
||||
name: "user_with_banner.html",
|
||||
url: urls.User(bannerUser),
|
||||
requires: []string{bannerUser},
|
||||
notes: "profile that has a custom uploaded site banner used to validate SiteBanner.IsCustom",
|
||||
},
|
||||
{
|
||||
name: "gallery_page1.html",
|
||||
url: urls.Gallery(galleryUser, 1),
|
||||
requires: []string{galleryUser},
|
||||
notes: "first gallery page figure[id^=sid-] iteration",
|
||||
},
|
||||
{
|
||||
name: "gallery_page_last.html",
|
||||
url: urls.Gallery(galleryUser, galleryLastPage),
|
||||
requires: []string{galleryUser, strconv.Itoa(galleryLastPage)},
|
||||
notes: "last gallery page verifies detectNextPage returns false at the end",
|
||||
},
|
||||
{
|
||||
name: "scraps_page1.html",
|
||||
url: urls.Scraps(scrapsUser, 1),
|
||||
requires: []string{scrapsUser},
|
||||
notes: "scraps listing same parser as gallery; sanity-check shape",
|
||||
},
|
||||
{
|
||||
name: "favorites_page1.html",
|
||||
url: urls.Favorites(favoritesUser, 1),
|
||||
requires: []string{favoritesUser},
|
||||
notes: "favorites per-item Author should be the original artist",
|
||||
},
|
||||
{
|
||||
name: "journals_listing_page1.html",
|
||||
url: urls.UserJournals(journalsUser, 1),
|
||||
requires: []string{journalsUser},
|
||||
notes: "journals listing used by UserJournals iterator",
|
||||
},
|
||||
{
|
||||
name: "journal.html",
|
||||
url: urls.Journal(journalID),
|
||||
requires: []string{strconv.FormatInt(journalID, 10)},
|
||||
notes: "single journal entry parseJournal target",
|
||||
},
|
||||
{
|
||||
name: "comments_submission.html",
|
||||
url: urls.Submission(subID),
|
||||
requires: []string{strconv.FormatInt(subID, 10)},
|
||||
notes: "submission page captured a second time for comment-parser fixture (comments are inline)",
|
||||
},
|
||||
{
|
||||
name: "comments_journal.html",
|
||||
url: urls.Journal(journalID),
|
||||
requires: []string{strconv.FormatInt(journalID, 10)},
|
||||
notes: "journal page captured for journal comments parsing",
|
||||
},
|
||||
{
|
||||
name: "system_message_not_found.html",
|
||||
url: urls.Submission(missingSubID),
|
||||
requires: []string{strconv.FormatInt(missingSubID, 10)},
|
||||
notes: "captures FA's system-message page for ErrNotFound classifier validation",
|
||||
},
|
||||
|
||||
// ---- M2: inbox/notes (parsers not yet written; captures for prep) -
|
||||
{
|
||||
name: "msg_submissions.html",
|
||||
url: urls.MsgSubmissions(),
|
||||
requires: []string{a},
|
||||
notes: "M2 prep: new-submission inbox (auth required)",
|
||||
},
|
||||
{
|
||||
name: "msg_others.html",
|
||||
url: urls.MsgOthers(),
|
||||
requires: []string{a},
|
||||
notes: "M2 prep: watch/journal/comment/fav notifications",
|
||||
},
|
||||
{
|
||||
name: "msg_pms.html",
|
||||
url: urls.MsgPMs(),
|
||||
requires: []string{a},
|
||||
notes: "M2 prep: private-message inbox",
|
||||
},
|
||||
{
|
||||
name: "note_view.html",
|
||||
url: urls.ViewMessage(noteID),
|
||||
requires: []string{strconv.FormatInt(noteID, 10)},
|
||||
notes: "M2 prep: single note view (needs FA_TEST_NOTE_ID)",
|
||||
},
|
||||
|
||||
// ---- M4: search/browse (parsers not yet written; captures for prep)
|
||||
{
|
||||
name: "search_results.html",
|
||||
url: urls.Search(searchQuery, 1),
|
||||
requires: []string{searchQuery},
|
||||
notes: "M4 prep: search results page",
|
||||
},
|
||||
{
|
||||
name: "browse.html",
|
||||
url: urls.Browse(1),
|
||||
requires: []string{a},
|
||||
notes: "M4 prep: /browse/ page",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tg := range targets {
|
||||
t.Run(tg.name, func(t *testing.T) {
|
||||
for _, r := range tg.requires {
|
||||
if strings.TrimSpace(r) == "" || r == "0" {
|
||||
t.Skipf("required input not set; skipping (%s)", tg.notes)
|
||||
return
|
||||
}
|
||||
}
|
||||
raw, err := fetchRaw(t.Context(), client, tg.url)
|
||||
if err != nil {
|
||||
t.Fatalf("fetch %s (%s): %v", tg.name, tg.url, err)
|
||||
}
|
||||
if doc, derr := goquery.NewDocumentFromReader(bytes.NewReader(raw)); derr == nil {
|
||||
if title := strings.TrimSpace(doc.Find("title").First().Text()); title == "Just a moment..." {
|
||||
t.Fatalf("got Cloudflare challenge page; refresh CF_CLEARANCE / FA_UA")
|
||||
}
|
||||
}
|
||||
out := filepath.Join(fixturesDir, tg.name)
|
||||
if err := os.WriteFile(out, raw, 0o644); err != nil {
|
||||
t.Fatalf("write %s: %v", out, err)
|
||||
}
|
||||
t.Logf("wrote %s (%d bytes) %s", out, len(raw), tg.notes)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// fetchRaw fetches the URL through the same Colly+transport pipeline the SDK
|
||||
// uses for parsed calls, but hands back the raw response body instead of
|
||||
// running a parser. Lives in the test build so we don't expose a public
|
||||
// raw-fetch API just for fixture refreshing.
|
||||
func fetchRaw(ctx context.Context, c *Client, rawURL string) ([]byte, error) {
|
||||
clone := c.collector.Clone()
|
||||
clone.SetClient(c.http)
|
||||
clone.SetCookieJar(c.jar)
|
||||
clone.Context = ctx
|
||||
|
||||
var body []byte
|
||||
var respErr error
|
||||
|
||||
clone.OnResponse(func(r *colly.Response) {
|
||||
// Copy: r.Body is reused by Colly across responses.
|
||||
body = append(body[:0], r.Body...)
|
||||
})
|
||||
clone.OnError(func(r *colly.Response, err error) {
|
||||
respErr = err
|
||||
})
|
||||
if err := clone.Visit(rawURL); err != nil {
|
||||
if respErr != nil {
|
||||
return nil, respErr
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
if respErr != nil {
|
||||
return nil, respErr
|
||||
}
|
||||
if len(body) == 0 {
|
||||
return nil, errors.New("fetchRaw: empty body")
|
||||
}
|
||||
return body, nil
|
||||
}
|
||||
|
||||
func envOr(key, fallback string) string {
|
||||
if v := os.Getenv(key); v != "" {
|
||||
return v
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
// atoi64Default parses s as an int64; on any failure returns fallback.
|
||||
func atoi64Default(s string, fallback int64) int64 {
|
||||
if s == "" {
|
||||
return fallback
|
||||
}
|
||||
n, err := strconv.ParseInt(s, 10, 64)
|
||||
if err != nil {
|
||||
return fallback
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// atoiDefault parses s as an int; on any failure returns fallback.
|
||||
func atoiDefault(s string, fallback int) int {
|
||||
if s == "" {
|
||||
return fallback
|
||||
}
|
||||
n, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
return fallback
|
||||
}
|
||||
return n
|
||||
}
|
||||
Reference in New Issue
Block a user