feat(submission): parse FA's prefixed system tags into CategorizedTags

FA renders its species/character/artist/type system tags as tag-block
anchors with a data-tag-name carrying a single-letter prefix
(s_/c_/a_-u_/t_) and a sibling tag-invalid span instead of a /search/
link. The existing keyword pass skips them, so they were lost.

Adds a Submission.CategorizedTags field exposing the four buckets with
the prefix stripped, plus an examples/categorized_tags runnable demo.
This commit is contained in:
2026-06-02 21:15:30 +02:00
parent 02479212bc
commit 20fcad7fbb
4 changed files with 161 additions and 0 deletions

View File

@@ -156,6 +156,32 @@ func parseSubmission(id SubmissionID, doc *goquery.Document) (*Submission, error
}
})
// Prefixed system tags FA renders these as tag-block anchors with a
// data-tag-name attribute carrying a leading single-letter prefix:
// s_ species, c_ character, a_/u_ artist, t_ type.
// They are paired with a sibling <span class="tag-invalid"> and have no
// /search/ href, so they are skipped by the keyword pass above.
doc.Find("div.submission-tags a.tag-block[data-tag-name]").Each(func(_ int, a *goquery.Selection) {
raw := strings.TrimSpace(trimAttr(a, "data-tag-name"))
if len(raw) < 3 || raw[1] != '_' {
return
}
name := raw[2:]
if name == "" {
return
}
switch raw[0] {
case 's':
s.CategorizedTags.Species = append(s.CategorizedTags.Species, name)
case 'c':
s.CategorizedTags.Characters = append(s.CategorizedTags.Characters, name)
case 'a', 'u':
s.CategorizedTags.Artists = append(s.CategorizedTags.Artists, name)
case 't':
s.CategorizedTags.Types = append(s.CategorizedTags.Types, name)
}
})
// File URL FA renders a "Download" button in #submission-options that
// links to the canonical file for *every* submission type. For visual
// art it equals the #submissionImg source; for stories and music it's