feat(submission): parse FA's prefixed system tags into CategorizedTags

FA renders its species/character/artist/type system tags as tag-block
anchors with a data-tag-name carrying a single-letter prefix
(s_/c_/a_-u_/t_) and a sibling tag-invalid span instead of a /search/
link. The existing keyword pass skips them, so they were lost.

Adds a Submission.CategorizedTags field exposing the four buckets with
the prefix stripped, plus an examples/categorized_tags runnable demo.
This commit is contained in:
2026-06-02 21:15:30 +02:00
parent 02479212bc
commit 20fcad7fbb
4 changed files with 161 additions and 0 deletions

View File

@@ -0,0 +1,90 @@
// categorized_tags demonstrates how the SDK groups FA's prefixed system
// tags into the four CategorizedTags buckets: Species (s_), Characters (c_),
// Artists (a_/u_), and Types (t_). Each bucket is printed on its own so the
// example covers every aspect of the feature.
//
// Runs anonymously by default. Set FA_A / FA_B (and ideally CF_CLEARANCE +
// FA_UA) to authenticate when the target submission requires it.
//
// go run ./examples/categorized_tags 12345678
package main
import (
"context"
"fmt"
"log"
"os"
"strconv"
"strings"
fa "git.anthrove.art/public/go-fa-api"
)
func main() {
if len(os.Args) < 2 {
log.Fatalf("usage: %s <submission-id>", os.Args[0])
}
id, err := strconv.ParseInt(os.Args[1], 10, 64)
if err != nil {
log.Fatalf("invalid submission id: %v", err)
}
opts := []fa.Option{fa.WithUserAgent("go-fa-api-example/0.1")}
if a, b := os.Getenv("FA_A"), os.Getenv("FA_B"); a != "" && b != "" {
log.Printf("using FA_A/FA_B cookies for authenticated request")
opts = []fa.Option{
fa.WithCookies(fa.Cookies{A: a, B: b}),
fa.WithCloudflare(fa.CFCookies{Clearance: os.Getenv("CF_CLEARANCE")}),
fa.WithUserAgent(envOr("FA_UA", "go-fa-api-example/0.1")),
}
}
client := fa.New(opts...)
sub, err := client.GetSubmission(context.Background(), fa.SubmissionID(id))
if err != nil {
log.Fatalf("GetSubmission: %v", err)
}
fmt.Printf("%s\nby %s\n\n", sub.Title, sub.Author.DisplayName)
fmt.Println("=== Plain keyword tags (no prefix) ===")
if len(sub.Tags) == 0 {
fmt.Println(" (none)")
} else {
fmt.Println(" " + strings.Join(sub.Tags, ", "))
}
ct := sub.CategorizedTags
fmt.Println()
fmt.Println("=== Species (s_) ===")
printBucket(ct.Species, "s_")
fmt.Println()
fmt.Println("=== Characters (c_) ===")
printBucket(ct.Characters, "c_")
fmt.Println()
fmt.Println("=== Artists (a_ / u_) ===")
printBucket(ct.Artists, "a_")
fmt.Println()
fmt.Println("=== Types (t_) ===")
printBucket(ct.Types, "t_")
}
func printBucket(items []string, prefix string) {
if len(items) == 0 {
fmt.Println(" (none)")
return
}
for _, v := range items {
fmt.Printf(" %s%s\n", prefix, v)
}
}
func envOr(key, fallback string) string {
if v := os.Getenv(key); v != "" {
return v
}
return fallback
}

View File

@@ -13,6 +13,15 @@ import (
"git.anthrove.art/public/go-fa-api/internal/urls" "git.anthrove.art/public/go-fa-api/internal/urls"
) )
// CategorizedTags groups FA's prefixed system tags by category. Names are
// stored without their prefix (e.g. "s_hybrid_species" → Species "hybrid_species").
type CategorizedTags struct {
Species []string
Characters []string
Artists []string
Types []string
}
// Submission is a fully resolved FA submission as seen on /view/{id}/. // Submission is a fully resolved FA submission as seen on /view/{id}/.
type Submission struct { type Submission struct {
ID SubmissionID ID SubmissionID
@@ -27,6 +36,10 @@ type Submission struct {
Description string // raw HTML; sanitise before rendering to a browser Description string // raw HTML; sanitise before rendering to a browser
DescriptionText string // plaintext convenience DescriptionText string // plaintext convenience
Tags []string Tags []string
// CategorizedTags groups FA's prefixed system tags by category.
// FA emits these as tag-block entries inside div.submission-tags with
// prefixes s_ (species), c_ (character), a_/u_ (artist), and t_ (type).
CategorizedTags CategorizedTags
FileURL string // absolute CDN URL; pass to Download FileURL string // absolute CDN URL; pass to Download
ThumbURL string ThumbURL string
Width int // 0 if unknown / non-image Width int // 0 if unknown / non-image

View File

@@ -156,6 +156,32 @@ func parseSubmission(id SubmissionID, doc *goquery.Document) (*Submission, error
} }
}) })
// Prefixed system tags FA renders these as tag-block anchors with a
// data-tag-name attribute carrying a leading single-letter prefix:
// s_ species, c_ character, a_/u_ artist, t_ type.
// They are paired with a sibling <span class="tag-invalid"> and have no
// /search/ href, so they are skipped by the keyword pass above.
doc.Find("div.submission-tags a.tag-block[data-tag-name]").Each(func(_ int, a *goquery.Selection) {
raw := strings.TrimSpace(trimAttr(a, "data-tag-name"))
if len(raw) < 3 || raw[1] != '_' {
return
}
name := raw[2:]
if name == "" {
return
}
switch raw[0] {
case 's':
s.CategorizedTags.Species = append(s.CategorizedTags.Species, name)
case 'c':
s.CategorizedTags.Characters = append(s.CategorizedTags.Characters, name)
case 'a', 'u':
s.CategorizedTags.Artists = append(s.CategorizedTags.Artists, name)
case 't':
s.CategorizedTags.Types = append(s.CategorizedTags.Types, name)
}
})
// File URL FA renders a "Download" button in #submission-options that // File URL FA renders a "Download" button in #submission-options that
// links to the canonical file for *every* submission type. For visual // links to the canonical file for *every* submission type. For visual
// art it equals the #submissionImg source; for stories and music it's // art it equals the #submissionImg source; for stories and music it's

View File

@@ -54,6 +54,10 @@ const syntheticSubmissionHTML = `<html><body>
<div> <div>
<span class="tags"><span><a href="javascript:void(0);" class="tag-block"></a><a href="/search/@keywords wolf">wolf</a></span></span> <span class="tags"><span><a href="javascript:void(0);" class="tag-block"></a><a href="/search/@keywords wolf">wolf</a></span></span>
<span class="tags"><span><a href="javascript:void(0);" class="tag-block"></a><a href="/search/@keywords art">art</a></span></span> <span class="tags"><span><a href="javascript:void(0);" class="tag-block"></a><a href="/search/@keywords art">art</a></span></span>
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="s_wolf" class="tag-block"></a><span class="tag-invalid">s_wolf</span></span></span>
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="c_artwork_digital" class="tag-block"></a><span class="tag-invalid">c_artwork_digital</span></span></span>
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="t_general_furry_art" class="tag-block"></a><span class="tag-invalid">t_general_furry_art</span></span></span>
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="u_somefurry" class="tag-block"></a><span class="tag-invalid">u_somefurry</span></span></span>
</div> </div>
</div> </div>
@@ -110,6 +114,34 @@ func TestParseSubmission_Synthetic(t *testing.T) {
if !strings.Contains(sub.Description, "world") { if !strings.Contains(sub.Description, "world") {
t.Errorf("Description missing expected content: %q", sub.Description) t.Errorf("Description missing expected content: %q", sub.Description)
} }
catChecks := []struct {
name string
got []string
want []string
}{
{"Species", sub.CategorizedTags.Species, []string{"wolf"}},
{"Characters", sub.CategorizedTags.Characters, []string{"artwork_digital"}},
{"Types", sub.CategorizedTags.Types, []string{"general_furry_art"}},
{"Artists", sub.CategorizedTags.Artists, []string{"somefurry"}},
}
for _, c := range catChecks {
if !equalStrings(c.got, c.want) {
t.Errorf("CategorizedTags.%s = %v; want %v", c.name, c.got, c.want)
}
}
}
func equalStrings(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
} }
// TestParseSubmission_FavoritedState verifies parseSubmission reports the // TestParseSubmission_FavoritedState verifies parseSubmission reports the