inital commit
This commit is contained in:
151
listing_json_test.go
Normal file
151
listing_json_test.go
Normal file
@@ -0,0 +1,151 @@
|
||||
package fa
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// TestReadListingJSON_RealFixture parses the embedded JSON blob out of the
|
||||
// captured gallery_page1.html and asserts the shape we expect: keyed by
|
||||
// SubmissionID, populated title/username/lower fields.
|
||||
func TestReadListingJSON_RealFixture(t *testing.T) {
|
||||
raw := loadFixture(t, "gallery_page1.html")
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
||||
if err != nil {
|
||||
t.Fatalf("read doc: %v", err)
|
||||
}
|
||||
m := readListingJSON(doc)
|
||||
if m == nil {
|
||||
t.Fatal("readListingJSON returned nil script tag missing or malformed?")
|
||||
}
|
||||
if len(m) == 0 {
|
||||
t.Fatal("readListingJSON returned empty map")
|
||||
}
|
||||
t.Logf("parsed %d entries", len(m))
|
||||
|
||||
missingTitle, missingLower, missingMtime := 0, 0, 0
|
||||
for id, entry := range m {
|
||||
if id == 0 {
|
||||
t.Errorf("entry has zero ID")
|
||||
}
|
||||
if entry.Title == "" {
|
||||
missingTitle++
|
||||
}
|
||||
if entry.Lower == "" {
|
||||
missingLower++
|
||||
}
|
||||
if entry.AvatarMtime == "" {
|
||||
missingMtime++
|
||||
}
|
||||
}
|
||||
// Allow some entries to lack avatar_mtime (FA sometimes omits it for
|
||||
// stale avatars), but title/lower should be on every entry.
|
||||
if missingTitle > 0 {
|
||||
t.Errorf("%d/%d entries had empty Title", missingTitle, len(m))
|
||||
}
|
||||
if missingLower > 0 {
|
||||
t.Errorf("%d/%d entries had empty Lower", missingLower, len(m))
|
||||
}
|
||||
t.Logf("missing avatar_mtime: %d/%d (acceptable)", missingMtime, len(m))
|
||||
}
|
||||
|
||||
// TestParseGalleryPage_JSONMergePicksUpAvatars compares HTML-only and JSON
|
||||
// merge modes on a favorites fixture (favorites lists submissions by many
|
||||
// different artists, so the embedded JSON carries avatar_mtime for each;
|
||||
// gallery pages don't, since every item is by the same artist).
|
||||
//
|
||||
// The JSON path should populate Author.AvatarURL on items where HTML
|
||||
// scraping leaves it empty.
|
||||
func TestParseGalleryPage_JSONMergePicksUpAvatars(t *testing.T) {
|
||||
raw := loadFixture(t, "favorites_page1.html")
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
||||
if err != nil {
|
||||
t.Fatalf("read doc: %v", err)
|
||||
}
|
||||
|
||||
htmlOnly, _ := parseGalleryPage(doc, false)
|
||||
jsonMerged, _ := parseGalleryPage(doc, true)
|
||||
|
||||
if len(htmlOnly) != len(jsonMerged) {
|
||||
t.Fatalf("item counts differ: html=%d json=%d", len(htmlOnly), len(jsonMerged))
|
||||
}
|
||||
if len(jsonMerged) == 0 {
|
||||
t.Fatal("no items parsed")
|
||||
}
|
||||
|
||||
htmlAvatars, jsonAvatars := 0, 0
|
||||
for _, s := range htmlOnly {
|
||||
if s.Author.AvatarURL != "" {
|
||||
htmlAvatars++
|
||||
}
|
||||
}
|
||||
for _, s := range jsonMerged {
|
||||
if s.Author.AvatarURL != "" {
|
||||
jsonAvatars++
|
||||
}
|
||||
}
|
||||
t.Logf("html-only avatars: %d/%d; json-merged avatars: %d/%d",
|
||||
htmlAvatars, len(htmlOnly), jsonAvatars, len(jsonMerged))
|
||||
|
||||
if jsonAvatars <= htmlAvatars {
|
||||
t.Errorf("expected JSON-merged parse to populate more AvatarURLs than HTML-only; got html=%d json=%d",
|
||||
htmlAvatars, jsonAvatars)
|
||||
}
|
||||
// Spot-check one avatar URL shape.
|
||||
for _, s := range jsonMerged {
|
||||
if s.Author.AvatarURL != "" {
|
||||
if !strings.HasPrefix(s.Author.AvatarURL, "https://a.furaffinity.net/") {
|
||||
t.Errorf("avatar URL has wrong prefix: %q", s.Author.AvatarURL)
|
||||
}
|
||||
if !strings.HasSuffix(s.Author.AvatarURL, "/"+s.Author.Name+".gif") {
|
||||
t.Errorf("avatar URL doesn't end with /%s.gif: %q", s.Author.Name, s.Author.AvatarURL)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestParseGalleryPage_JSONMergeGracefullyFallsBack confirms that
|
||||
// useJSON=true on a doc with no js-submissionData blob produces the same
|
||||
// result as useJSON=false i.e. the merge is non-destructive.
|
||||
func TestParseGalleryPage_JSONMergeGracefullyFallsBack(t *testing.T) {
|
||||
// The synthetic gallery fixture has no js-submissionData tag.
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(syntheticGalleryHTML))
|
||||
if err != nil {
|
||||
t.Fatalf("setup: %v", err)
|
||||
}
|
||||
htmlOnly, _ := parseGalleryPage(doc, false)
|
||||
jsonMerged, _ := parseGalleryPage(doc, true)
|
||||
|
||||
if len(htmlOnly) != len(jsonMerged) {
|
||||
t.Fatalf("counts differ: %d vs %d", len(htmlOnly), len(jsonMerged))
|
||||
}
|
||||
for i := range htmlOnly {
|
||||
if htmlOnly[i].ID != jsonMerged[i].ID || htmlOnly[i].Title != jsonMerged[i].Title {
|
||||
t.Errorf("item %d: html=%+v json=%+v", i, htmlOnly[i], jsonMerged[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestReadListingJSON_MissingTagReturnsNil makes the fallback path
|
||||
// explicit in a unit-style assertion.
|
||||
func TestReadListingJSON_MissingTagReturnsNil(t *testing.T) {
|
||||
doc, _ := goquery.NewDocumentFromReader(strings.NewReader("<html><body></body></html>"))
|
||||
if m := readListingJSON(doc); m != nil {
|
||||
t.Errorf("expected nil, got %d entries", len(m))
|
||||
}
|
||||
}
|
||||
|
||||
// TestReadListingJSON_MalformedJSONReturnsNil makes the fallback path
|
||||
// explicit when FA's blob is present but somehow unparseable.
|
||||
func TestReadListingJSON_MalformedJSONReturnsNil(t *testing.T) {
|
||||
doc, _ := goquery.NewDocumentFromReader(strings.NewReader(
|
||||
`<html><body><script id="js-submissionData" type="application/json">{not valid</script></body></html>`,
|
||||
))
|
||||
if m := readListingJSON(doc); m != nil {
|
||||
t.Errorf("expected nil, got %d entries", len(m))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user