FA's beta listing pages emit each submission's tag list on the
figure's <img data-tags="..."> attribute, mixing prefixed system tags
(s_/c_/a_/u_/t_) with the unprefixed keyword list. Reading it during
gallery-page parse lets callers classify favorites/gallery/scraps/
browse/search/inbox items at scrape time, avoiding a /view/{id}
round-trip per submission.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
177 lines
5.2 KiB
Go
177 lines
5.2 KiB
Go
package fa
|
|
|
|
import (
|
|
"bytes"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
const syntheticGalleryHTML = `<html><body>
|
|
<figure id="sid-1001" class="t-image r-general">
|
|
<a href="/view/1001/" title="Submission One">
|
|
<img src="//d.example/thumb/1001.png" data-src="//d.example/thumb/1001.png"/>
|
|
</a>
|
|
<figcaption>
|
|
<p>Submission One</p>
|
|
<a href="/user/artistone/">ArtistOne</a>
|
|
</figcaption>
|
|
</figure>
|
|
<figure id="sid-1002" class="t-image r-adult">
|
|
<a href="/view/1002/" title="Submission Two">
|
|
<img src="//d.example/thumb/1002.png"/>
|
|
</a>
|
|
<figcaption>
|
|
<p>Submission Two</p>
|
|
<a href="/user/artisttwo/">ArtistTwo</a>
|
|
</figcaption>
|
|
</figure>
|
|
<a class="button standard" href="/gallery/me/2/">Next</a>
|
|
</body></html>`
|
|
|
|
func TestParseGalleryPage_Synthetic(t *testing.T) {
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(syntheticGalleryHTML))
|
|
if err != nil {
|
|
t.Fatalf("setup: %v", err)
|
|
}
|
|
items, hasNext := parseGalleryPage(doc, false)
|
|
if len(items) != 2 {
|
|
t.Fatalf("items = %d; want 2", len(items))
|
|
}
|
|
if items[0].ID != 1001 || items[1].ID != 1002 {
|
|
t.Errorf("ids = [%d, %d]", items[0].ID, items[1].ID)
|
|
}
|
|
if items[0].Title != "Submission One" {
|
|
t.Errorf("items[0].Title = %q", items[0].Title)
|
|
}
|
|
if items[0].Rating != RatingGeneral {
|
|
t.Errorf("items[0].Rating = %q; want General", items[0].Rating)
|
|
}
|
|
if items[1].Rating != RatingAdult {
|
|
t.Errorf("items[1].Rating = %q; want Adult", items[1].Rating)
|
|
}
|
|
if items[0].Author.Name != "artistone" {
|
|
t.Errorf("items[0].Author.Name = %q", items[0].Author.Name)
|
|
}
|
|
if !strings.HasPrefix(items[0].ThumbURL, "https://") {
|
|
t.Errorf("items[0].ThumbURL = %q; want absolute URL", items[0].ThumbURL)
|
|
}
|
|
if !hasNext {
|
|
t.Error("hasNext = false; want true")
|
|
}
|
|
}
|
|
|
|
func TestParseGalleryFigure_DataTags(t *testing.T) {
|
|
const html = `<html><body>
|
|
<figure id="sid-2001" class="t-image r-general">
|
|
<a href="/view/2001/" title="Mixed Tags">
|
|
<img data-tags="u_someartist c_artwork_digital t_all s_wolf wolf solo digital landscape" src="//d.example/thumb/2001.png"/>
|
|
</a>
|
|
</figure>
|
|
<figure id="sid-2002" class="t-image r-general">
|
|
<a href="/view/2002/" title="No Tags">
|
|
<img src="//d.example/thumb/2002.png"/>
|
|
</a>
|
|
</figure>
|
|
<figure id="sid-2003" class="t-image r-general">
|
|
<a href="/view/2003/" title="Only Keywords">
|
|
<img data-tags="wolf solo" src="//d.example/thumb/2003.png"/>
|
|
</a>
|
|
</figure>
|
|
</body></html>`
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
|
if err != nil {
|
|
t.Fatalf("setup: %v", err)
|
|
}
|
|
items, _ := parseGalleryPage(doc, false)
|
|
if len(items) != 3 {
|
|
t.Fatalf("items = %d; want 3", len(items))
|
|
}
|
|
|
|
// Mixed prefixed + unprefixed.
|
|
mixed := items[0]
|
|
wantTags := []string{"wolf", "solo", "digital", "landscape"}
|
|
if !equalStrings(mixed.Tags, wantTags) {
|
|
t.Errorf("items[0].Tags = %v; want %v", mixed.Tags, wantTags)
|
|
}
|
|
if !equalStrings(mixed.CategorizedTags.Species, []string{"wolf"}) {
|
|
t.Errorf("items[0].Species = %v", mixed.CategorizedTags.Species)
|
|
}
|
|
if !equalStrings(mixed.CategorizedTags.Characters, []string{"artwork_digital"}) {
|
|
t.Errorf("items[0].Characters = %v", mixed.CategorizedTags.Characters)
|
|
}
|
|
if !equalStrings(mixed.CategorizedTags.Types, []string{"all"}) {
|
|
t.Errorf("items[0].Types = %v", mixed.CategorizedTags.Types)
|
|
}
|
|
if !equalStrings(mixed.CategorizedTags.Artists, []string{"someartist"}) {
|
|
t.Errorf("items[0].Artists = %v", mixed.CategorizedTags.Artists)
|
|
}
|
|
|
|
// Missing data-tags: both slices stay nil.
|
|
if items[1].Tags != nil {
|
|
t.Errorf("items[1].Tags = %v; want nil", items[1].Tags)
|
|
}
|
|
if items[1].CategorizedTags.Species != nil ||
|
|
items[1].CategorizedTags.Characters != nil ||
|
|
items[1].CategorizedTags.Artists != nil ||
|
|
items[1].CategorizedTags.Types != nil {
|
|
t.Errorf("items[1].CategorizedTags = %+v; want zero", items[1].CategorizedTags)
|
|
}
|
|
|
|
// Unprefixed-only: everything lands in Tags.
|
|
if !equalStrings(items[2].Tags, []string{"wolf", "solo"}) {
|
|
t.Errorf("items[2].Tags = %v", items[2].Tags)
|
|
}
|
|
if items[2].CategorizedTags.Species != nil {
|
|
t.Errorf("items[2].Species = %v; want nil", items[2].CategorizedTags.Species)
|
|
}
|
|
}
|
|
|
|
func TestParseGalleryPage_RealFixtureTags(t *testing.T) {
|
|
raw := loadFixture(t, "gallery_page1.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
items, _ := parseGalleryPage(doc, false)
|
|
if len(items) == 0 {
|
|
t.Fatal("real fixture: no items parsed")
|
|
}
|
|
var withTags, withSpecies int
|
|
for _, it := range items {
|
|
if len(it.Tags) > 0 {
|
|
withTags++
|
|
}
|
|
if len(it.CategorizedTags.Species) > 0 {
|
|
withSpecies++
|
|
}
|
|
}
|
|
if withTags == 0 {
|
|
t.Error("no items got Tags populated from data-tags")
|
|
}
|
|
if withSpecies == 0 {
|
|
t.Error("no items got CategorizedTags.Species populated from data-tags")
|
|
}
|
|
}
|
|
|
|
func TestParseGalleryPage_RealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "gallery_page1.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
items, _ := parseGalleryPage(doc, false)
|
|
if len(items) == 0 {
|
|
t.Fatal("real fixture: no items parsed")
|
|
}
|
|
for i, it := range items {
|
|
if it.ID == 0 {
|
|
t.Errorf("item %d: ID == 0", i)
|
|
}
|
|
if it.Title == "" {
|
|
t.Errorf("item %d: empty Title", i)
|
|
}
|
|
}
|
|
}
|