Files
go-fa-api/client.go
2026-05-26 20:21:55 +02:00

187 lines
5.2 KiB
Go

package fa
import (
"bytes"
"context"
"fmt"
"log/slog"
"net/http"
"net/http/cookiejar"
"net/url"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/gocolly/colly/v2"
farouting "git.anthrove.art/public/go-fa-api/internal/urls"
)
// Client is the entry point of the SDK. It is safe for concurrent use; the
// internal rate limiter serializes outbound requests regardless of the
// number of calling goroutines.
//
// One Client corresponds to one FA session. Construct anonymous and
// authenticated clients separately rather than mutating one in-flight.
type Client struct {
cfg config
limiter *rateLimiter
logger *slog.Logger
collector *colly.Collector
http *http.Client
jar http.CookieJar
}
// New returns a configured Client. Pass options to override defaults.
//
// client := fa.New(
// fa.WithCookies(fa.Cookies{A: aCookie, B: bCookie}),
// fa.WithUserAgent("myapp/1.0"),
// )
func New(opts ...Option) *Client {
cfg := config{
userAgent: defaultUserAgent,
// One request per second steady-state, but allow a small burst so
// that e.g. avatar enrichment (one fetch per distinct author) can
// fire a few requests back-to-back before the 1/s pacing kicks in.
rateInterval: time.Second,
rateBurst: 3,
logger: slog.Default(),
maxRetries: defaultMaxRetries,
}
for _, o := range opts {
o(&cfg)
}
limiter := newRateLimiter(cfg.rateInterval, cfg.rateBurst, cfg.priorityRL)
// Build the base RoundTripper. If caller supplied an http.Client, reuse
// its transport as the "base" so that any TLS customisation (uTLS,
// chromedp, etc.) still applies. Otherwise wrap the stdlib default.
var baseRT http.RoundTripper = http.DefaultTransport
if cfg.httpClient != nil && cfg.httpClient.Transport != nil {
baseRT = cfg.httpClient.Transport
}
rt := &transport{
base: baseRT,
limiter: limiter,
userAgent: cfg.userAgent,
maxRetries: cfg.maxRetries,
logger: cfg.logger,
}
jar, _ := cookiejar.New(nil)
seedJar(jar, cfg.cookies, cfg.cf, cfg.sfw)
httpClient := &http.Client{
Transport: rt,
Jar: jar,
}
if cfg.httpClient != nil {
httpClient.Timeout = cfg.httpClient.Timeout
httpClient.CheckRedirect = cfg.httpClient.CheckRedirect
}
base := colly.NewCollector(
colly.UserAgent(cfg.userAgent),
colly.AllowURLRevisit(),
)
base.SetClient(httpClient)
base.SetCookieJar(jar)
// Colly's own LimitRule would compose with our transport limiter and
// double-throttle requests; instead, leave Colly unthrottled and let the
// transport be the single source of pacing truth.
return &Client{
cfg: cfg,
limiter: limiter,
logger: cfg.logger,
collector: base,
http: httpClient,
jar: jar,
}
}
// seedJar installs the FA session and Cloudflare clearance cookies onto the
// cookie jar so every outbound request to the host picks them up. The
// stdlib jar requires a URL to scope cookies; we use the FA host root.
//
// When sfw is [SFWOn] or [SFWOff] the `sfw` cookie is set to "1" or "0"
// respectively, matching what FA's navbar slider writes client-side.
// [SFWAuto] leaves the cookie unset so the account default applies.
func seedJar(jar http.CookieJar, fa Cookies, cf CFCookies, sfw SFWMode) {
hostURL, err := url.Parse(farouting.Host)
if err != nil {
return
}
var cookies []*http.Cookie
if fa.A != "" {
cookies = append(cookies, &http.Cookie{Name: "a", Value: fa.A, Path: "/"})
}
if fa.B != "" {
cookies = append(cookies, &http.Cookie{Name: "b", Value: fa.B, Path: "/"})
}
if cf.Clearance != "" {
cookies = append(cookies, &http.Cookie{Name: "cf_clearance", Value: cf.Clearance, Path: "/"})
}
switch sfw {
case SFWOn:
cookies = append(cookies, &http.Cookie{Name: "sfw", Value: "1", Path: "/"})
case SFWOff:
cookies = append(cookies, &http.Cookie{Name: "sfw", Value: "0", Path: "/"})
}
if len(cookies) > 0 {
jar.SetCookies(hostURL, cookies)
}
}
// fetch executes a single GET via the internal Colly collector and hands the
// parsed goquery document to parse. The collector clone scopes the OnHTML/
// OnResponse callbacks to this single call, so concurrent calls do not see
// each other's responses.
//
// Context cancellation propagates through the http.Request and the rate
// limiter a cancelled ctx surfaces from Wait or from the underlying
// transport, depending on which phase the request is in.
func (c *Client) fetch(ctx context.Context, rawURL string, parse func(doc *goquery.Document) error, opts ...Option) error {
ctx = c.applyRequestOptions(ctx, opts)
clone := c.collector.Clone()
clone.SetClient(c.http)
clone.SetCookieJar(c.jar)
clone.Context = ctx
var (
parseErr error
respErr error
)
clone.OnResponse(func(r *colly.Response) {
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(r.Body))
if err != nil {
parseErr = fmt.Errorf("%w: build document: %v", ErrParse, err)
return
}
if smErr := classifySystemMessage(doc); smErr != nil {
parseErr = smErr
return
}
if err := parse(doc); err != nil {
parseErr = err
}
})
clone.OnError(func(r *colly.Response, err error) {
respErr = err
})
if err := clone.Visit(rawURL); err != nil {
if respErr != nil {
return respErr
}
return err
}
if respErr != nil {
return respErr
}
return parseErr
}