diff --git a/gallery_parser.go b/gallery_parser.go
index 53fed6a..037ceca 100644
--- a/gallery_parser.go
+++ b/gallery_parser.go
@@ -85,6 +85,15 @@ func parseGalleryFigure(sel *goquery.Selection, jsonData listingJSONMap) *Submis
}
}
+ // data-tags on the figure's carries both the unprefixed keyword
+ // list and the prefixed system tags (s_/c_/a_/u_/t_). Splitting it lets
+ // callers classify listing items without an extra /view/ fetch.
+ if img := sel.Find("img[data-tags]").First(); img.Length() > 0 {
+ if raw, ok := img.Attr("data-tags"); ok {
+ applyListingDataTags(s, raw)
+ }
+ }
+
// JSON enrichment preferred sources for the fields it carries.
if jsonData != nil {
if entry, ok := jsonData[id]; ok {
@@ -105,3 +114,35 @@ func parseGalleryFigure(sel *goquery.Selection, jsonData listingJSONMap) *Submis
return s
}
+
+// applyListingDataTags splits the whitespace-separated data-tags attribute
+// FA emits on listing-page elements and routes each token to either
+// CategorizedTags (when the token has a known single-letter prefix
+// s_/c_/a_/u_/t_) or Tags (everything else).
+//
+// The prefix mapping mirrors the /view/ parser in submission_parser.go so a
+// listing-path Submission carries the same categorisation a /view/-path one
+// would, modulo tokens FA can't represent in this flat attribute (multi-word
+// tags, the a_ vs u_ distinction).
+func applyListingDataTags(s *Submission, raw string) {
+ for _, tok := range strings.Fields(raw) {
+ if len(tok) >= 3 && tok[1] == '_' {
+ name := tok[2:]
+ switch tok[0] {
+ case 's':
+ s.CategorizedTags.Species = append(s.CategorizedTags.Species, name)
+ continue
+ case 'c':
+ s.CategorizedTags.Characters = append(s.CategorizedTags.Characters, name)
+ continue
+ case 'a', 'u':
+ s.CategorizedTags.Artists = append(s.CategorizedTags.Artists, name)
+ continue
+ case 't':
+ s.CategorizedTags.Types = append(s.CategorizedTags.Types, name)
+ continue
+ }
+ }
+ s.Tags = append(s.Tags, tok)
+ }
+}
diff --git a/gallery_parser_test.go b/gallery_parser_test.go
index ee631da..44db617 100644
--- a/gallery_parser_test.go
+++ b/gallery_parser_test.go
@@ -62,6 +62,99 @@ func TestParseGalleryPage_Synthetic(t *testing.T) {
}
}
+func TestParseGalleryFigure_DataTags(t *testing.T) {
+ const html = `
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+`
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
+ if err != nil {
+ t.Fatalf("setup: %v", err)
+ }
+ items, _ := parseGalleryPage(doc, false)
+ if len(items) != 3 {
+ t.Fatalf("items = %d; want 3", len(items))
+ }
+
+ // Mixed prefixed + unprefixed.
+ mixed := items[0]
+ wantTags := []string{"wolf", "solo", "digital", "landscape"}
+ if !equalStrings(mixed.Tags, wantTags) {
+ t.Errorf("items[0].Tags = %v; want %v", mixed.Tags, wantTags)
+ }
+ if !equalStrings(mixed.CategorizedTags.Species, []string{"wolf"}) {
+ t.Errorf("items[0].Species = %v", mixed.CategorizedTags.Species)
+ }
+ if !equalStrings(mixed.CategorizedTags.Characters, []string{"artwork_digital"}) {
+ t.Errorf("items[0].Characters = %v", mixed.CategorizedTags.Characters)
+ }
+ if !equalStrings(mixed.CategorizedTags.Types, []string{"all"}) {
+ t.Errorf("items[0].Types = %v", mixed.CategorizedTags.Types)
+ }
+ if !equalStrings(mixed.CategorizedTags.Artists, []string{"someartist"}) {
+ t.Errorf("items[0].Artists = %v", mixed.CategorizedTags.Artists)
+ }
+
+ // Missing data-tags: both slices stay nil.
+ if items[1].Tags != nil {
+ t.Errorf("items[1].Tags = %v; want nil", items[1].Tags)
+ }
+ if items[1].CategorizedTags.Species != nil ||
+ items[1].CategorizedTags.Characters != nil ||
+ items[1].CategorizedTags.Artists != nil ||
+ items[1].CategorizedTags.Types != nil {
+ t.Errorf("items[1].CategorizedTags = %+v; want zero", items[1].CategorizedTags)
+ }
+
+ // Unprefixed-only: everything lands in Tags.
+ if !equalStrings(items[2].Tags, []string{"wolf", "solo"}) {
+ t.Errorf("items[2].Tags = %v", items[2].Tags)
+ }
+ if items[2].CategorizedTags.Species != nil {
+ t.Errorf("items[2].Species = %v; want nil", items[2].CategorizedTags.Species)
+ }
+}
+
+func TestParseGalleryPage_RealFixtureTags(t *testing.T) {
+ raw := loadFixture(t, "gallery_page1.html")
+ doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
+ if err != nil {
+ t.Fatalf("read doc: %v", err)
+ }
+ items, _ := parseGalleryPage(doc, false)
+ if len(items) == 0 {
+ t.Fatal("real fixture: no items parsed")
+ }
+ var withTags, withSpecies int
+ for _, it := range items {
+ if len(it.Tags) > 0 {
+ withTags++
+ }
+ if len(it.CategorizedTags.Species) > 0 {
+ withSpecies++
+ }
+ }
+ if withTags == 0 {
+ t.Error("no items got Tags populated from data-tags")
+ }
+ if withSpecies == 0 {
+ t.Error("no items got CategorizedTags.Species populated from data-tags")
+ }
+}
+
func TestParseGalleryPage_RealFixture(t *testing.T) {
raw := loadFixture(t, "gallery_page1.html")
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
diff --git a/submission.go b/submission.go
index 04323e9..89e0333 100644
--- a/submission.go
+++ b/submission.go
@@ -35,10 +35,19 @@ type Submission struct {
Gender Gender
Description string // raw HTML; sanitise before rendering to a browser
DescriptionText string // plaintext convenience
- Tags []string
+ // Tags holds the user-supplied keyword tags. On /view/-path Submissions
+ // these come from div.submission-tags anchors. On listing-path
+ // Submissions (Gallery/Scraps/Favorites/Browse/Search/SubmissionInbox)
+ // they come from the figure's data-tags attribute, which carries the
+ // same keywords FA renders on /view/ for that submission.
+ Tags []string
// CategorizedTags groups FA's prefixed system tags by category.
- // FA emits these as tag-block entries inside div.submission-tags with
- // prefixes s_ (species), c_ (character), a_/u_ (artist), and t_ (type).
+ // On /view/-path Submissions FA emits these as tag-block entries inside
+ // div.submission-tags with prefixes s_ (species), c_ (character),
+ // a_/u_ (artist), and t_ (type). On listing-path Submissions the same
+ // prefixed tokens are parsed out of the figure's data-tags attribute;
+ // the a_ vs u_ distinction is lost there because FA collapses both into
+ // u_ in that flat list.
CategorizedTags CategorizedTags
FileURL string // absolute CDN URL; pass to Download
ThumbURL string