feat(submission): parse FA's prefixed system tags into CategorizedTags

FA renders its species/character/artist/type system tags as tag-block
anchors with a data-tag-name carrying a single-letter prefix
(s_/c_/a_-u_/t_) and a sibling tag-invalid span instead of a /search/
link. The existing keyword pass skips them, so they were lost.

Adds a Submission.CategorizedTags field exposing the four buckets with
the prefix stripped, plus an examples/categorized_tags runnable demo.
This commit is contained in:
2026-06-02 21:15:30 +02:00
parent 02479212bc
commit 20fcad7fbb
4 changed files with 161 additions and 0 deletions

View File

@@ -54,6 +54,10 @@ const syntheticSubmissionHTML = `<html><body>
<div>
<span class="tags"><span><a href="javascript:void(0);" class="tag-block"></a><a href="/search/@keywords wolf">wolf</a></span></span>
<span class="tags"><span><a href="javascript:void(0);" class="tag-block"></a><a href="/search/@keywords art">art</a></span></span>
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="s_wolf" class="tag-block"></a><span class="tag-invalid">s_wolf</span></span></span>
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="c_artwork_digital" class="tag-block"></a><span class="tag-invalid">c_artwork_digital</span></span></span>
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="t_general_furry_art" class="tag-block"></a><span class="tag-invalid">t_general_furry_art</span></span></span>
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="u_somefurry" class="tag-block"></a><span class="tag-invalid">u_somefurry</span></span></span>
</div>
</div>
@@ -110,6 +114,34 @@ func TestParseSubmission_Synthetic(t *testing.T) {
if !strings.Contains(sub.Description, "world") {
t.Errorf("Description missing expected content: %q", sub.Description)
}
catChecks := []struct {
name string
got []string
want []string
}{
{"Species", sub.CategorizedTags.Species, []string{"wolf"}},
{"Characters", sub.CategorizedTags.Characters, []string{"artwork_digital"}},
{"Types", sub.CategorizedTags.Types, []string{"general_furry_art"}},
{"Artists", sub.CategorizedTags.Artists, []string{"somefurry"}},
}
for _, c := range catChecks {
if !equalStrings(c.got, c.want) {
t.Errorf("CategorizedTags.%s = %v; want %v", c.name, c.got, c.want)
}
}
}
func equalStrings(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}
// TestParseSubmission_FavoritedState verifies parseSubmission reports the