feat(submission): parse FA's prefixed system tags into CategorizedTags
FA renders its species/character/artist/type system tags as tag-block anchors with a data-tag-name carrying a single-letter prefix (s_/c_/a_-u_/t_) and a sibling tag-invalid span instead of a /search/ link. The existing keyword pass skips them, so they were lost. Adds a Submission.CategorizedTags field exposing the four buckets with the prefix stripped, plus an examples/categorized_tags runnable demo.
This commit is contained in:
90
examples/categorized_tags/main.go
Normal file
90
examples/categorized_tags/main.go
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
// categorized_tags demonstrates how the SDK groups FA's prefixed system
|
||||||
|
// tags into the four CategorizedTags buckets: Species (s_), Characters (c_),
|
||||||
|
// Artists (a_/u_), and Types (t_). Each bucket is printed on its own so the
|
||||||
|
// example covers every aspect of the feature.
|
||||||
|
//
|
||||||
|
// Runs anonymously by default. Set FA_A / FA_B (and ideally CF_CLEARANCE +
|
||||||
|
// FA_UA) to authenticate when the target submission requires it.
|
||||||
|
//
|
||||||
|
// go run ./examples/categorized_tags 12345678
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
fa "git.anthrove.art/public/go-fa-api"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
if len(os.Args) < 2 {
|
||||||
|
log.Fatalf("usage: %s <submission-id>", os.Args[0])
|
||||||
|
}
|
||||||
|
id, err := strconv.ParseInt(os.Args[1], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("invalid submission id: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := []fa.Option{fa.WithUserAgent("go-fa-api-example/0.1")}
|
||||||
|
if a, b := os.Getenv("FA_A"), os.Getenv("FA_B"); a != "" && b != "" {
|
||||||
|
log.Printf("using FA_A/FA_B cookies for authenticated request")
|
||||||
|
opts = []fa.Option{
|
||||||
|
fa.WithCookies(fa.Cookies{A: a, B: b}),
|
||||||
|
fa.WithCloudflare(fa.CFCookies{Clearance: os.Getenv("CF_CLEARANCE")}),
|
||||||
|
fa.WithUserAgent(envOr("FA_UA", "go-fa-api-example/0.1")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
client := fa.New(opts...)
|
||||||
|
|
||||||
|
sub, err := client.GetSubmission(context.Background(), fa.SubmissionID(id))
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("GetSubmission: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("%s\nby %s\n\n", sub.Title, sub.Author.DisplayName)
|
||||||
|
|
||||||
|
fmt.Println("=== Plain keyword tags (no prefix) ===")
|
||||||
|
if len(sub.Tags) == 0 {
|
||||||
|
fmt.Println(" (none)")
|
||||||
|
} else {
|
||||||
|
fmt.Println(" " + strings.Join(sub.Tags, ", "))
|
||||||
|
}
|
||||||
|
|
||||||
|
ct := sub.CategorizedTags
|
||||||
|
fmt.Println()
|
||||||
|
fmt.Println("=== Species (s_) ===")
|
||||||
|
printBucket(ct.Species, "s_")
|
||||||
|
|
||||||
|
fmt.Println()
|
||||||
|
fmt.Println("=== Characters (c_) ===")
|
||||||
|
printBucket(ct.Characters, "c_")
|
||||||
|
|
||||||
|
fmt.Println()
|
||||||
|
fmt.Println("=== Artists (a_ / u_) ===")
|
||||||
|
printBucket(ct.Artists, "a_")
|
||||||
|
|
||||||
|
fmt.Println()
|
||||||
|
fmt.Println("=== Types (t_) ===")
|
||||||
|
printBucket(ct.Types, "t_")
|
||||||
|
}
|
||||||
|
|
||||||
|
func printBucket(items []string, prefix string) {
|
||||||
|
if len(items) == 0 {
|
||||||
|
fmt.Println(" (none)")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, v := range items {
|
||||||
|
fmt.Printf(" %s%s\n", prefix, v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func envOr(key, fallback string) string {
|
||||||
|
if v := os.Getenv(key); v != "" {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
return fallback
|
||||||
|
}
|
||||||
@@ -13,6 +13,15 @@ import (
|
|||||||
"git.anthrove.art/public/go-fa-api/internal/urls"
|
"git.anthrove.art/public/go-fa-api/internal/urls"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// CategorizedTags groups FA's prefixed system tags by category. Names are
|
||||||
|
// stored without their prefix (e.g. "s_hybrid_species" → Species "hybrid_species").
|
||||||
|
type CategorizedTags struct {
|
||||||
|
Species []string
|
||||||
|
Characters []string
|
||||||
|
Artists []string
|
||||||
|
Types []string
|
||||||
|
}
|
||||||
|
|
||||||
// Submission is a fully resolved FA submission as seen on /view/{id}/.
|
// Submission is a fully resolved FA submission as seen on /view/{id}/.
|
||||||
type Submission struct {
|
type Submission struct {
|
||||||
ID SubmissionID
|
ID SubmissionID
|
||||||
@@ -27,6 +36,10 @@ type Submission struct {
|
|||||||
Description string // raw HTML; sanitise before rendering to a browser
|
Description string // raw HTML; sanitise before rendering to a browser
|
||||||
DescriptionText string // plaintext convenience
|
DescriptionText string // plaintext convenience
|
||||||
Tags []string
|
Tags []string
|
||||||
|
// CategorizedTags groups FA's prefixed system tags by category.
|
||||||
|
// FA emits these as tag-block entries inside div.submission-tags with
|
||||||
|
// prefixes s_ (species), c_ (character), a_/u_ (artist), and t_ (type).
|
||||||
|
CategorizedTags CategorizedTags
|
||||||
FileURL string // absolute CDN URL; pass to Download
|
FileURL string // absolute CDN URL; pass to Download
|
||||||
ThumbURL string
|
ThumbURL string
|
||||||
Width int // 0 if unknown / non-image
|
Width int // 0 if unknown / non-image
|
||||||
|
|||||||
@@ -156,6 +156,32 @@ func parseSubmission(id SubmissionID, doc *goquery.Document) (*Submission, error
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Prefixed system tags FA renders these as tag-block anchors with a
|
||||||
|
// data-tag-name attribute carrying a leading single-letter prefix:
|
||||||
|
// s_ species, c_ character, a_/u_ artist, t_ type.
|
||||||
|
// They are paired with a sibling <span class="tag-invalid"> and have no
|
||||||
|
// /search/ href, so they are skipped by the keyword pass above.
|
||||||
|
doc.Find("div.submission-tags a.tag-block[data-tag-name]").Each(func(_ int, a *goquery.Selection) {
|
||||||
|
raw := strings.TrimSpace(trimAttr(a, "data-tag-name"))
|
||||||
|
if len(raw) < 3 || raw[1] != '_' {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
name := raw[2:]
|
||||||
|
if name == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
switch raw[0] {
|
||||||
|
case 's':
|
||||||
|
s.CategorizedTags.Species = append(s.CategorizedTags.Species, name)
|
||||||
|
case 'c':
|
||||||
|
s.CategorizedTags.Characters = append(s.CategorizedTags.Characters, name)
|
||||||
|
case 'a', 'u':
|
||||||
|
s.CategorizedTags.Artists = append(s.CategorizedTags.Artists, name)
|
||||||
|
case 't':
|
||||||
|
s.CategorizedTags.Types = append(s.CategorizedTags.Types, name)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
// File URL FA renders a "Download" button in #submission-options that
|
// File URL FA renders a "Download" button in #submission-options that
|
||||||
// links to the canonical file for *every* submission type. For visual
|
// links to the canonical file for *every* submission type. For visual
|
||||||
// art it equals the #submissionImg source; for stories and music it's
|
// art it equals the #submissionImg source; for stories and music it's
|
||||||
|
|||||||
@@ -54,6 +54,10 @@ const syntheticSubmissionHTML = `<html><body>
|
|||||||
<div>
|
<div>
|
||||||
<span class="tags"><span><a href="javascript:void(0);" class="tag-block"></a><a href="/search/@keywords wolf">wolf</a></span></span>
|
<span class="tags"><span><a href="javascript:void(0);" class="tag-block"></a><a href="/search/@keywords wolf">wolf</a></span></span>
|
||||||
<span class="tags"><span><a href="javascript:void(0);" class="tag-block"></a><a href="/search/@keywords art">art</a></span></span>
|
<span class="tags"><span><a href="javascript:void(0);" class="tag-block"></a><a href="/search/@keywords art">art</a></span></span>
|
||||||
|
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="s_wolf" class="tag-block"></a><span class="tag-invalid">s_wolf</span></span></span>
|
||||||
|
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="c_artwork_digital" class="tag-block"></a><span class="tag-invalid">c_artwork_digital</span></span></span>
|
||||||
|
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="t_general_furry_art" class="tag-block"></a><span class="tag-invalid">t_general_furry_art</span></span></span>
|
||||||
|
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="u_somefurry" class="tag-block"></a><span class="tag-invalid">u_somefurry</span></span></span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -110,6 +114,34 @@ func TestParseSubmission_Synthetic(t *testing.T) {
|
|||||||
if !strings.Contains(sub.Description, "world") {
|
if !strings.Contains(sub.Description, "world") {
|
||||||
t.Errorf("Description missing expected content: %q", sub.Description)
|
t.Errorf("Description missing expected content: %q", sub.Description)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
catChecks := []struct {
|
||||||
|
name string
|
||||||
|
got []string
|
||||||
|
want []string
|
||||||
|
}{
|
||||||
|
{"Species", sub.CategorizedTags.Species, []string{"wolf"}},
|
||||||
|
{"Characters", sub.CategorizedTags.Characters, []string{"artwork_digital"}},
|
||||||
|
{"Types", sub.CategorizedTags.Types, []string{"general_furry_art"}},
|
||||||
|
{"Artists", sub.CategorizedTags.Artists, []string{"somefurry"}},
|
||||||
|
}
|
||||||
|
for _, c := range catChecks {
|
||||||
|
if !equalStrings(c.got, c.want) {
|
||||||
|
t.Errorf("CategorizedTags.%s = %v; want %v", c.name, c.got, c.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func equalStrings(a, b []string) bool {
|
||||||
|
if len(a) != len(b) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := range a {
|
||||||
|
if a[i] != b[i] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestParseSubmission_FavoritedState verifies parseSubmission reports the
|
// TestParseSubmission_FavoritedState verifies parseSubmission reports the
|
||||||
|
|||||||
Reference in New Issue
Block a user