Files
go-fa-api/submission.go
SoXX a2fc1b7e32 feat(listing): populate Tags and CategorizedTags from figure data-tags
FA's beta listing pages emit each submission's tag list on the
figure's <img data-tags="..."> attribute, mixing prefixed system tags
(s_/c_/a_/u_/t_) with the unprefixed keyword list. Reading it during
gallery-page parse lets callers classify favorites/gallery/scraps/
browse/search/inbox items at scrape time, avoiding a /view/{id}
round-trip per submission.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-06-02 21:53:56 +02:00

122 lines
4.2 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package fa
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"time"
"github.com/PuerkitoBio/goquery"
"git.anthrove.art/public/go-fa-api/internal/urls"
)
// CategorizedTags groups FA's prefixed system tags by category. Names are
// stored without their prefix (e.g. "s_hybrid_species" → Species "hybrid_species").
type CategorizedTags struct {
Species []string
Characters []string
Artists []string
Types []string
}
// Submission is a fully resolved FA submission as seen on /view/{id}/.
type Submission struct {
ID SubmissionID
Title string
Author UserRef
PostedAt time.Time
Rating Rating
Category Category
Type Type
Species Species
Gender Gender
Description string // raw HTML; sanitise before rendering to a browser
DescriptionText string // plaintext convenience
// Tags holds the user-supplied keyword tags. On /view/-path Submissions
// these come from div.submission-tags anchors. On listing-path
// Submissions (Gallery/Scraps/Favorites/Browse/Search/SubmissionInbox)
// they come from the figure's data-tags attribute, which carries the
// same keywords FA renders on /view/ for that submission.
Tags []string
// CategorizedTags groups FA's prefixed system tags by category.
// On /view/-path Submissions FA emits these as tag-block entries inside
// div.submission-tags with prefixes s_ (species), c_ (character),
// a_/u_ (artist), and t_ (type). On listing-path Submissions the same
// prefixed tokens are parsed out of the figure's data-tags attribute;
// the a_ vs u_ distinction is lost there because FA collapses both into
// u_ in that flat list.
CategorizedTags CategorizedTags
FileURL string // absolute CDN URL; pass to Download
ThumbURL string
Width int // 0 if unknown / non-image
Height int
Stats SubmissionStats
Folders []FolderRef
Prev SubmissionID // 0 if this is the oldest in the gallery
Next SubmissionID // 0 if this is the newest
// Favorited reports whether the authenticated viewer has favorited this
// submission. It is true only when the page was fetched with valid
// cookies and FA rendered the "Fav" (/unfav/) link. An anonymous fetch
// always yields false.
Favorited bool
}
// GetSubmission fetches the submission with the given numeric ID.
// Returns [ErrNotFound] if FA renders a "submission not found" system message,
// [ErrUnauthorized] for restricted-visibility submissions when called
// without valid cookies, or a wrapped parse error if the markup has shifted.
func (c *Client) GetSubmission(ctx context.Context, id SubmissionID, opts ...Option) (*Submission, error) {
if id <= 0 {
return nil, fmt.Errorf("fa: GetSubmission: id must be > 0")
}
var out *Submission
err := c.fetch(ctx, urls.Submission(int64(id)), func(doc *goquery.Document) error {
s, err := parseSubmission(id, doc)
if err != nil {
return err
}
out = s
return nil
}, opts...)
if err != nil {
return nil, err
}
return out, nil
}
// Download streams the submission's main file from the CDN into w. The same
// rate limiter that paces /view/ fetches paces CDN fetches, so an in-flight
// gallery iteration will yield correctly when Download is interleaved.
//
// Returns the number of bytes written. Errors from the writer are wrapped
// as-is; HTTP errors come back as [*HTTPError].
func (c *Client) Download(ctx context.Context, sub *Submission, w io.Writer, opts ...Option) (int64, error) {
if sub == nil {
return 0, errors.New("fa: Download: nil submission")
}
if sub.FileURL == "" {
return 0, errors.New("fa: Download: submission has no FileURL")
}
ctx = c.applyRequestOptions(ctx, opts)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, sub.FileURL, nil)
if err != nil {
return 0, err
}
// CDN fetches share the same rate-limited transport as page fetches —
// see RoundTrip in transport.go where the limiter gates every request.
resp, err := c.http.Do(req)
if err != nil {
return 0, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
_, _ = io.Copy(io.Discard, resp.Body)
return 0, &HTTPError{StatusCode: resp.StatusCode, URL: sub.FileURL}
}
return io.Copy(w, resp.Body)
}