Files
go-fa-api/submission_parser_test.go
SoXX 20fcad7fbb feat(submission): parse FA's prefixed system tags into CategorizedTags
FA renders its species/character/artist/type system tags as tag-block
anchors with a data-tag-name carrying a single-letter prefix
(s_/c_/a_-u_/t_) and a sibling tag-invalid span instead of a /search/
link. The existing keyword pass skips them, so they were lost.

Adds a Submission.CategorizedTags field exposing the four buckets with
the prefix stripped, plus an examples/categorized_tags runnable demo.
2026-06-02 21:15:30 +02:00

221 lines
8.2 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package fa
import (
"bytes"
"fmt"
"strings"
"testing"
"github.com/PuerkitoBio/goquery"
)
// syntheticSubmissionHTML is a minimal hand-rolled page that exercises every
// selector the parser cares about. Real FA HTML differs in ways that
// fixture-driven tests will catch; this synthetic input pins the parser
// against a stable, deterministic input independent of FA's mood.
const syntheticSubmissionHTML = `<html><body>
<meta property="og:url" content="https://www.furaffinity.net/view/1234/"/>
<section class="submission-description">
<div class="submission-description-header">
<div class="submission-description-artist">
<div><a href="/user/somefurry/"><img class="submission-user-icon avatar" src="//d.example/avatars/somefurry.png"/></a></div>
<div>
<div class="submission-title"><h2>My Test Submission</h2></div>
<div>by <span class="c-usernameBlockSimple"><a href="/user/somefurry/"><span class="c-usernameBlockSimple__displayName" title="somefurry">SomeFurry</span></a></span></div>
<div>Posted <span class="popup_date" title="March 17, 2026 04:21:21 PM">5 hours ago</span></div>
</div>
</div>
</div>
<div class="section-body">
<div class="submission-description-text"><p>Hello <b>world</b>.</p></div>
</div>
</section>
<img id="submissionImg" src="//d.example/art/somefurry/1234.png" data-fullview-src="//d.example/art/somefurry/1234_full.png" data-preview-src="//d.example/art/somefurry/1234_thumb.png" data-fullview-width="1920" data-fullview-height="1080"/>
<div class="submission-page-stats">
<div title="Views"><div>1,234</div><div class="highlight">Views</div></div>
<div title="Comments"><div>7</div><div class="highlight">Comments</div></div>
<div title="Favorites"><div>56</div><div class="highlight">Favorites</div></div>
<div><div class="font-large inline c-contentRating--general">General</div><div class="highlight">Rating</div></div>
</div>
<div class="submission-content-stats">
<span class="highlight">
<span>Category</span><span>Theme</span><span>Species</span><span>Gender</span><span>Resolution</span>
</span>
<span>
<span>Artwork (Digital)</span><span>Digital</span><span>Wolf</span><span>Male</span><span>1920 x 1080</span>
</span>
</div>
<div class="submission-tags">
<div class="highlight">Keywords</div>
<div>
<span class="tags"><span><a href="javascript:void(0);" class="tag-block"></a><a href="/search/@keywords wolf">wolf</a></span></span>
<span class="tags"><span><a href="javascript:void(0);" class="tag-block"></a><a href="/search/@keywords art">art</a></span></span>
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="s_wolf" class="tag-block"></a><span class="tag-invalid">s_wolf</span></span></span>
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="c_artwork_digital" class="tag-block"></a><span class="tag-invalid">c_artwork_digital</span></span></span>
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="t_general_furry_art" class="tag-block"></a><span class="tag-invalid">t_general_furry_art</span></span></span>
<span class="tags"><span><a href="javascript:void(0);" data-tag-name="u_somefurry" class="tag-block"></a><span class="tag-invalid">u_somefurry</span></span></span>
</div>
</div>
<div class="minigallery-navigation">
<a href="/view/1235/">&laquo; Newer</a>
<a href="/view/1233/">Older &raquo;</a>
</div>
</body></html>`
func TestParseSubmission_Synthetic(t *testing.T) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(syntheticSubmissionHTML))
if err != nil {
t.Fatalf("setup: %v", err)
}
sub, err := parseSubmission(1234, doc)
if err != nil {
t.Fatalf("parseSubmission: %v", err)
}
checks := []struct {
name string
got any
want any
}{
{"ID", sub.ID, SubmissionID(1234)},
{"Title", sub.Title, "My Test Submission"},
{"Author.Name", sub.Author.Name, "somefurry"},
{"Author.DisplayName", sub.Author.DisplayName, "SomeFurry"},
{"Author.AvatarURL", sub.Author.AvatarURL, "https://d.example/avatars/somefurry.png"},
{"Rating", sub.Rating, RatingGeneral},
{"Category", sub.Category, Category("Artwork (Digital)")},
{"Type", sub.Type, Type("Digital")},
{"Species", sub.Species, Species("Wolf")},
{"Gender", sub.Gender, Gender("Male")},
{"FileURL", sub.FileURL, "https://d.example/art/somefurry/1234_full.png"},
{"ThumbURL", sub.ThumbURL, "https://d.example/art/somefurry/1234_thumb.png"},
{"Width", sub.Width, 1920},
{"Height", sub.Height, 1080},
{"Stats.Views", sub.Stats.Views, 1234},
{"Stats.Favorites", sub.Stats.Favorites, 56},
{"Stats.Comments", sub.Stats.Comments, 7},
{"Prev (Newer)", sub.Prev, SubmissionID(1235)},
{"Next (Older)", sub.Next, SubmissionID(1233)},
{"len(Tags)", len(sub.Tags), 2},
}
for _, c := range checks {
if c.got != c.want {
t.Errorf("%s = %v; want %v", c.name, c.got, c.want)
}
}
if !sub.PostedAt.IsZero() && sub.PostedAt.Year() != 2026 {
t.Errorf("PostedAt year = %d; want 2026", sub.PostedAt.Year())
}
if !strings.Contains(sub.Description, "world") {
t.Errorf("Description missing expected content: %q", sub.Description)
}
catChecks := []struct {
name string
got []string
want []string
}{
{"Species", sub.CategorizedTags.Species, []string{"wolf"}},
{"Characters", sub.CategorizedTags.Characters, []string{"artwork_digital"}},
{"Types", sub.CategorizedTags.Types, []string{"general_furry_art"}},
{"Artists", sub.CategorizedTags.Artists, []string{"somefurry"}},
}
for _, c := range catChecks {
if !equalStrings(c.got, c.want) {
t.Errorf("CategorizedTags.%s = %v; want %v", c.name, c.got, c.want)
}
}
}
func equalStrings(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}
// TestParseSubmission_FavoritedState verifies parseSubmission reports the
// authenticated viewer's favorite state. FA renders exactly one of the
// "+Fav" (/fav/) or "Fav" (/unfav/) anchors, matching the viewer's current
// state; an anonymous fetch shows neither.
func TestParseSubmission_FavoritedState(t *testing.T) {
const tmpl = `<html><body>
<meta property="og:url" content="https://www.furaffinity.net/view/1234/"/>
<section class="submission-description"><div class="submission-description-header">
<div class="submission-description-artist"><div></div>
<div><div class="submission-title"><h2>T</h2></div></div></div></div></section>
<div id="submission-options">%s</div>
</body></html>`
cases := []struct {
name string
link string
want bool
}{
{"favorited shows unfav link", `<a href="/unfav/1234/?key=abc">Fav</a>`, true},
{"not favorited shows fav link", `<a href="/fav/1234/?key=abc">+Fav</a>`, false},
{"anonymous shows neither", ``, false},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(fmt.Sprintf(tmpl, c.link)))
if err != nil {
t.Fatalf("setup: %v", err)
}
sub, err := parseSubmission(1234, doc)
if err != nil {
t.Fatalf("parseSubmission: %v", err)
}
if sub.Favorited != c.want {
t.Errorf("Favorited = %v; want %v", sub.Favorited, c.want)
}
})
}
}
func TestParseSubmission_MissingTitleErrors(t *testing.T) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader("<html><body></body></html>"))
if err != nil {
t.Fatalf("setup: %v", err)
}
if _, err := parseSubmission(1, doc); err == nil {
t.Fatal("expected parse error for missing title")
}
}
// TestParseSubmission_RealFixture runs the parser against a real FA HTML
// dump captured by the `fixtures` build tag. Skips cleanly if no fixture
// has been recorded.
func TestParseSubmission_RealFixture(t *testing.T) {
raw := loadFixture(t, "submission.html")
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
if err != nil {
t.Fatalf("read doc: %v", err)
}
sub, err := parseSubmission(0, doc)
if err != nil {
t.Fatalf("parseSubmission(real): %v", err)
}
// We can't assert exact values against a fixture whose contents we don't
// pin in this repo. Instead assert that the load-bearing fields populated.
if sub.Title == "" {
t.Error("real fixture: Title is empty")
}
if sub.Author.Name == "" {
t.Error("real fixture: Author.Name is empty")
}
if sub.FileURL == "" {
t.Error("real fixture: FileURL is empty")
}
}