152 lines
4.8 KiB
Go
152 lines
4.8 KiB
Go
package fa
|
|
|
|
import (
|
|
"bytes"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
// TestReadListingJSON_RealFixture parses the embedded JSON blob out of the
|
|
// captured gallery_page1.html and asserts the shape we expect: keyed by
|
|
// SubmissionID, populated title/username/lower fields.
|
|
func TestReadListingJSON_RealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "gallery_page1.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
m := readListingJSON(doc)
|
|
if m == nil {
|
|
t.Fatal("readListingJSON returned nil script tag missing or malformed?")
|
|
}
|
|
if len(m) == 0 {
|
|
t.Fatal("readListingJSON returned empty map")
|
|
}
|
|
t.Logf("parsed %d entries", len(m))
|
|
|
|
missingTitle, missingLower, missingMtime := 0, 0, 0
|
|
for id, entry := range m {
|
|
if id == 0 {
|
|
t.Errorf("entry has zero ID")
|
|
}
|
|
if entry.Title == "" {
|
|
missingTitle++
|
|
}
|
|
if entry.Lower == "" {
|
|
missingLower++
|
|
}
|
|
if entry.AvatarMtime == "" {
|
|
missingMtime++
|
|
}
|
|
}
|
|
// Allow some entries to lack avatar_mtime (FA sometimes omits it for
|
|
// stale avatars), but title/lower should be on every entry.
|
|
if missingTitle > 0 {
|
|
t.Errorf("%d/%d entries had empty Title", missingTitle, len(m))
|
|
}
|
|
if missingLower > 0 {
|
|
t.Errorf("%d/%d entries had empty Lower", missingLower, len(m))
|
|
}
|
|
t.Logf("missing avatar_mtime: %d/%d (acceptable)", missingMtime, len(m))
|
|
}
|
|
|
|
// TestParseGalleryPage_JSONMergePicksUpAvatars compares HTML-only and JSON
|
|
// merge modes on a favorites fixture (favorites lists submissions by many
|
|
// different artists, so the embedded JSON carries avatar_mtime for each;
|
|
// gallery pages don't, since every item is by the same artist).
|
|
//
|
|
// The JSON path should populate Author.AvatarURL on items where HTML
|
|
// scraping leaves it empty.
|
|
func TestParseGalleryPage_JSONMergePicksUpAvatars(t *testing.T) {
|
|
raw := loadFixture(t, "favorites_page1.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
|
|
htmlOnly, _ := parseGalleryPage(doc, false)
|
|
jsonMerged, _ := parseGalleryPage(doc, true)
|
|
|
|
if len(htmlOnly) != len(jsonMerged) {
|
|
t.Fatalf("item counts differ: html=%d json=%d", len(htmlOnly), len(jsonMerged))
|
|
}
|
|
if len(jsonMerged) == 0 {
|
|
t.Fatal("no items parsed")
|
|
}
|
|
|
|
htmlAvatars, jsonAvatars := 0, 0
|
|
for _, s := range htmlOnly {
|
|
if s.Author.AvatarURL != "" {
|
|
htmlAvatars++
|
|
}
|
|
}
|
|
for _, s := range jsonMerged {
|
|
if s.Author.AvatarURL != "" {
|
|
jsonAvatars++
|
|
}
|
|
}
|
|
t.Logf("html-only avatars: %d/%d; json-merged avatars: %d/%d",
|
|
htmlAvatars, len(htmlOnly), jsonAvatars, len(jsonMerged))
|
|
|
|
if jsonAvatars <= htmlAvatars {
|
|
t.Errorf("expected JSON-merged parse to populate more AvatarURLs than HTML-only; got html=%d json=%d",
|
|
htmlAvatars, jsonAvatars)
|
|
}
|
|
// Spot-check one avatar URL shape.
|
|
for _, s := range jsonMerged {
|
|
if s.Author.AvatarURL != "" {
|
|
if !strings.HasPrefix(s.Author.AvatarURL, "https://a.furaffinity.net/") {
|
|
t.Errorf("avatar URL has wrong prefix: %q", s.Author.AvatarURL)
|
|
}
|
|
if !strings.HasSuffix(s.Author.AvatarURL, "/"+s.Author.Name+".gif") {
|
|
t.Errorf("avatar URL doesn't end with /%s.gif: %q", s.Author.Name, s.Author.AvatarURL)
|
|
}
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestParseGalleryPage_JSONMergeGracefullyFallsBack confirms that
|
|
// useJSON=true on a doc with no js-submissionData blob produces the same
|
|
// result as useJSON=false i.e. the merge is non-destructive.
|
|
func TestParseGalleryPage_JSONMergeGracefullyFallsBack(t *testing.T) {
|
|
// The synthetic gallery fixture has no js-submissionData tag.
|
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(syntheticGalleryHTML))
|
|
if err != nil {
|
|
t.Fatalf("setup: %v", err)
|
|
}
|
|
htmlOnly, _ := parseGalleryPage(doc, false)
|
|
jsonMerged, _ := parseGalleryPage(doc, true)
|
|
|
|
if len(htmlOnly) != len(jsonMerged) {
|
|
t.Fatalf("counts differ: %d vs %d", len(htmlOnly), len(jsonMerged))
|
|
}
|
|
for i := range htmlOnly {
|
|
if htmlOnly[i].ID != jsonMerged[i].ID || htmlOnly[i].Title != jsonMerged[i].Title {
|
|
t.Errorf("item %d: html=%+v json=%+v", i, htmlOnly[i], jsonMerged[i])
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestReadListingJSON_MissingTagReturnsNil makes the fallback path
|
|
// explicit in a unit-style assertion.
|
|
func TestReadListingJSON_MissingTagReturnsNil(t *testing.T) {
|
|
doc, _ := goquery.NewDocumentFromReader(strings.NewReader("<html><body></body></html>"))
|
|
if m := readListingJSON(doc); m != nil {
|
|
t.Errorf("expected nil, got %d entries", len(m))
|
|
}
|
|
}
|
|
|
|
// TestReadListingJSON_MalformedJSONReturnsNil makes the fallback path
|
|
// explicit when FA's blob is present but somehow unparseable.
|
|
func TestReadListingJSON_MalformedJSONReturnsNil(t *testing.T) {
|
|
doc, _ := goquery.NewDocumentFromReader(strings.NewReader(
|
|
`<html><body><script id="js-submissionData" type="application/json">{not valid</script></body></html>`,
|
|
))
|
|
if m := readListingJSON(doc); m != nil {
|
|
t.Errorf("expected nil, got %d entries", len(m))
|
|
}
|
|
}
|