package fa import ( "bytes" "context" "fmt" "log/slog" "net/http" "net/http/cookiejar" "net/url" "time" "github.com/PuerkitoBio/goquery" "github.com/gocolly/colly/v2" farouting "git.anthrove.art/public/go-fa-api/internal/urls" ) // Client is the entry point of the SDK. It is safe for concurrent use; the // internal rate limiter serializes outbound requests regardless of the // number of calling goroutines. // // One Client corresponds to one FA session. Construct anonymous and // authenticated clients separately rather than mutating one in-flight. type Client struct { cfg config limiter *rateLimiter logger *slog.Logger collector *colly.Collector http *http.Client jar http.CookieJar } // New returns a configured Client. Pass options to override defaults. // // client := fa.New( // fa.WithCookies(fa.Cookies{A: aCookie, B: bCookie}), // fa.WithUserAgent("myapp/1.0"), // ) func New(opts ...Option) *Client { cfg := config{ userAgent: defaultUserAgent, // One request per second steady-state, but allow a small burst so // that e.g. avatar enrichment (one fetch per distinct author) can // fire a few requests back-to-back before the 1/s pacing kicks in. rateInterval: time.Second, rateBurst: 3, logger: slog.Default(), maxRetries: defaultMaxRetries, } for _, o := range opts { o(&cfg) } limiter := newRateLimiter(cfg.rateInterval, cfg.rateBurst, cfg.priorityRL) // Build the base RoundTripper. If caller supplied an http.Client, reuse // its transport as the "base" so that any TLS customisation (uTLS, // chromedp, etc.) still applies. Otherwise wrap the stdlib default. var baseRT http.RoundTripper = http.DefaultTransport if cfg.httpClient != nil && cfg.httpClient.Transport != nil { baseRT = cfg.httpClient.Transport } rt := &transport{ base: baseRT, limiter: limiter, userAgent: cfg.userAgent, maxRetries: cfg.maxRetries, logger: cfg.logger, } jar, _ := cookiejar.New(nil) seedJar(jar, cfg.cookies, cfg.cf, cfg.sfw) httpClient := &http.Client{ Transport: rt, Jar: jar, } if cfg.httpClient != nil { httpClient.Timeout = cfg.httpClient.Timeout httpClient.CheckRedirect = cfg.httpClient.CheckRedirect } base := colly.NewCollector( colly.UserAgent(cfg.userAgent), colly.AllowURLRevisit(), ) base.SetClient(httpClient) base.SetCookieJar(jar) // Colly's own LimitRule would compose with our transport limiter and // double-throttle requests; instead, leave Colly unthrottled and let the // transport be the single source of pacing truth. return &Client{ cfg: cfg, limiter: limiter, logger: cfg.logger, collector: base, http: httpClient, jar: jar, } } // seedJar installs the FA session and Cloudflare clearance cookies onto the // cookie jar so every outbound request to the host picks them up. The // stdlib jar requires a URL to scope cookies; we use the FA host root. // // When sfw is [SFWOn] or [SFWOff] the `sfw` cookie is set to "1" or "0" // respectively, matching what FA's navbar slider writes client-side. // [SFWAuto] leaves the cookie unset so the account default applies. func seedJar(jar http.CookieJar, fa Cookies, cf CFCookies, sfw SFWMode) { hostURL, err := url.Parse(farouting.Host) if err != nil { return } var cookies []*http.Cookie if fa.A != "" { cookies = append(cookies, &http.Cookie{Name: "a", Value: fa.A, Path: "/"}) } if fa.B != "" { cookies = append(cookies, &http.Cookie{Name: "b", Value: fa.B, Path: "/"}) } if cf.Clearance != "" { cookies = append(cookies, &http.Cookie{Name: "cf_clearance", Value: cf.Clearance, Path: "/"}) } switch sfw { case SFWOn: cookies = append(cookies, &http.Cookie{Name: "sfw", Value: "1", Path: "/"}) case SFWOff: cookies = append(cookies, &http.Cookie{Name: "sfw", Value: "0", Path: "/"}) } if len(cookies) > 0 { jar.SetCookies(hostURL, cookies) } } // fetch executes a single GET via the internal Colly collector and hands the // parsed goquery document to parse. The collector clone scopes the OnHTML/ // OnResponse callbacks to this single call, so concurrent calls do not see // each other's responses. // // Context cancellation propagates through the http.Request and the rate // limiter a cancelled ctx surfaces from Wait or from the underlying // transport, depending on which phase the request is in. func (c *Client) fetch(ctx context.Context, rawURL string, parse func(doc *goquery.Document) error, opts ...Option) error { ctx = c.applyRequestOptions(ctx, opts) clone := c.collector.Clone() clone.SetClient(c.http) clone.SetCookieJar(c.jar) clone.Context = ctx var ( parseErr error respErr error ) clone.OnResponse(func(r *colly.Response) { doc, err := goquery.NewDocumentFromReader(bytes.NewReader(r.Body)) if err != nil { parseErr = fmt.Errorf("%w: build document: %v", ErrParse, err) return } if smErr := classifySystemMessage(doc); smErr != nil { parseErr = smErr return } if err := parse(doc); err != nil { parseErr = err } }) clone.OnError(func(r *colly.Response, err error) { respErr = err }) if err := clone.Visit(rawURL); err != nil { if respErr != nil { return respErr } return err } if respErr != nil { return respErr } return parseErr }