FA's beta listing pages emit each submission's tag list on the
figure's <img data-tags="..."> attribute, mixing prefixed system tags
(s_/c_/a_/u_/t_) with the unprefixed keyword list. Reading it during
gallery-page parse lets callers classify favorites/gallery/scraps/
browse/search/inbox items at scrape time, avoiding a /view/{id}
round-trip per submission.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
122 lines
4.2 KiB
Go
122 lines
4.2 KiB
Go
package fa
|
||
|
||
import (
|
||
"context"
|
||
"errors"
|
||
"fmt"
|
||
"io"
|
||
"net/http"
|
||
"time"
|
||
|
||
"github.com/PuerkitoBio/goquery"
|
||
|
||
"git.anthrove.art/public/go-fa-api/internal/urls"
|
||
)
|
||
|
||
// CategorizedTags groups FA's prefixed system tags by category. Names are
|
||
// stored without their prefix (e.g. "s_hybrid_species" → Species "hybrid_species").
|
||
type CategorizedTags struct {
|
||
Species []string
|
||
Characters []string
|
||
Artists []string
|
||
Types []string
|
||
}
|
||
|
||
// Submission is a fully resolved FA submission as seen on /view/{id}/.
|
||
type Submission struct {
|
||
ID SubmissionID
|
||
Title string
|
||
Author UserRef
|
||
PostedAt time.Time
|
||
Rating Rating
|
||
Category Category
|
||
Type Type
|
||
Species Species
|
||
Gender Gender
|
||
Description string // raw HTML; sanitise before rendering to a browser
|
||
DescriptionText string // plaintext convenience
|
||
// Tags holds the user-supplied keyword tags. On /view/-path Submissions
|
||
// these come from div.submission-tags anchors. On listing-path
|
||
// Submissions (Gallery/Scraps/Favorites/Browse/Search/SubmissionInbox)
|
||
// they come from the figure's data-tags attribute, which carries the
|
||
// same keywords FA renders on /view/ for that submission.
|
||
Tags []string
|
||
// CategorizedTags groups FA's prefixed system tags by category.
|
||
// On /view/-path Submissions FA emits these as tag-block entries inside
|
||
// div.submission-tags with prefixes s_ (species), c_ (character),
|
||
// a_/u_ (artist), and t_ (type). On listing-path Submissions the same
|
||
// prefixed tokens are parsed out of the figure's data-tags attribute;
|
||
// the a_ vs u_ distinction is lost there because FA collapses both into
|
||
// u_ in that flat list.
|
||
CategorizedTags CategorizedTags
|
||
FileURL string // absolute CDN URL; pass to Download
|
||
ThumbURL string
|
||
Width int // 0 if unknown / non-image
|
||
Height int
|
||
Stats SubmissionStats
|
||
Folders []FolderRef
|
||
Prev SubmissionID // 0 if this is the oldest in the gallery
|
||
Next SubmissionID // 0 if this is the newest
|
||
|
||
// Favorited reports whether the authenticated viewer has favorited this
|
||
// submission. It is true only when the page was fetched with valid
|
||
// cookies and FA rendered the "−Fav" (/unfav/) link. An anonymous fetch
|
||
// always yields false.
|
||
Favorited bool
|
||
}
|
||
|
||
// GetSubmission fetches the submission with the given numeric ID.
|
||
// Returns [ErrNotFound] if FA renders a "submission not found" system message,
|
||
// [ErrUnauthorized] for restricted-visibility submissions when called
|
||
// without valid cookies, or a wrapped parse error if the markup has shifted.
|
||
func (c *Client) GetSubmission(ctx context.Context, id SubmissionID, opts ...Option) (*Submission, error) {
|
||
if id <= 0 {
|
||
return nil, fmt.Errorf("fa: GetSubmission: id must be > 0")
|
||
}
|
||
var out *Submission
|
||
err := c.fetch(ctx, urls.Submission(int64(id)), func(doc *goquery.Document) error {
|
||
s, err := parseSubmission(id, doc)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
out = s
|
||
return nil
|
||
}, opts...)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
return out, nil
|
||
}
|
||
|
||
// Download streams the submission's main file from the CDN into w. The same
|
||
// rate limiter that paces /view/ fetches paces CDN fetches, so an in-flight
|
||
// gallery iteration will yield correctly when Download is interleaved.
|
||
//
|
||
// Returns the number of bytes written. Errors from the writer are wrapped
|
||
// as-is; HTTP errors come back as [*HTTPError].
|
||
func (c *Client) Download(ctx context.Context, sub *Submission, w io.Writer, opts ...Option) (int64, error) {
|
||
if sub == nil {
|
||
return 0, errors.New("fa: Download: nil submission")
|
||
}
|
||
if sub.FileURL == "" {
|
||
return 0, errors.New("fa: Download: submission has no FileURL")
|
||
}
|
||
ctx = c.applyRequestOptions(ctx, opts)
|
||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, sub.FileURL, nil)
|
||
if err != nil {
|
||
return 0, err
|
||
}
|
||
// CDN fetches share the same rate-limited transport as page fetches —
|
||
// see RoundTrip in transport.go where the limiter gates every request.
|
||
resp, err := c.http.Do(req)
|
||
if err != nil {
|
||
return 0, err
|
||
}
|
||
defer resp.Body.Close()
|
||
if resp.StatusCode != http.StatusOK {
|
||
_, _ = io.Copy(io.Discard, resp.Body)
|
||
return 0, &HTTPError{StatusCode: resp.StatusCode, URL: sub.FileURL}
|
||
}
|
||
return io.Copy(w, resp.Body)
|
||
}
|