462 lines
16 KiB
Go
462 lines
16 KiB
Go
package fa
|
|
|
|
import (
|
|
"bytes"
|
|
"errors"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
// This file groups parser tests that exercise the *additional* fixtures
|
|
// captured by the extended TestRefreshFixtures (story / shouts / last page
|
|
// / scraps / favorites / journals listing / journal comments / system
|
|
// message). Each test t.Skip's cleanly when its fixture isn't present.
|
|
|
|
// TestParseSubmission_StoryRealFixture verifies the non-image submission
|
|
// path. FA still renders a #submissionImg for stories, but it's a generated
|
|
// thumbnail (a .gif preview of the document) FileURL must point at the
|
|
// real document URL from the Download button, not the thumbnail.
|
|
func TestParseSubmission_StoryRealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "submission_story.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
sub, err := parseSubmission(0, doc)
|
|
if err != nil {
|
|
t.Fatalf("parseSubmission(story): %v", err)
|
|
}
|
|
if sub.Title == "" {
|
|
t.Error("story fixture: Title is empty")
|
|
}
|
|
if sub.Author.Name == "" {
|
|
t.Error("story fixture: Author.Name is empty")
|
|
}
|
|
if sub.FileURL == "" {
|
|
t.Fatal("story fixture: FileURL is empty (Download button selector missed?)")
|
|
}
|
|
// The thumbnail FA injects into #submissionImg ends in .gif; the real
|
|
// document does not. Catching this is the whole point of the fixture.
|
|
if strings.HasSuffix(sub.FileURL, ".gif") {
|
|
t.Errorf("story FileURL = %q; points at the #submissionImg thumbnail gif, not the document", sub.FileURL)
|
|
}
|
|
if !strings.Contains(sub.FileURL, "/download/") {
|
|
t.Errorf("story FileURL = %q; want the Download-button URL (.../download/...)", sub.FileURL)
|
|
}
|
|
if sub.Category != "Story" {
|
|
t.Errorf("story Category = %q; want %q", sub.Category, "Story")
|
|
}
|
|
// The captured page renders a "+Fav" link (viewer has not favorited it),
|
|
// so Favorited must be false against this real markup.
|
|
if sub.Favorited {
|
|
t.Error("story fixture: Favorited = true; fixture page shows the +Fav link")
|
|
}
|
|
t.Logf("story struct: %+v", sub)
|
|
}
|
|
|
|
// TestParseUser_WithShoutsRealFixture exercises the shouts parser. If the
|
|
// captured profile happens to have zero shouts (e.g. shouts disabled), the
|
|
// test logs that rather than failing the assertion is that parsing didn't
|
|
// crash, not that the user had shouts.
|
|
func TestParseUser_WithShoutsRealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "user_with_shouts.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
u, err := parseUser("fixture", doc)
|
|
if err != nil {
|
|
t.Fatalf("parseUser(shouts): %v", err)
|
|
}
|
|
if u.DisplayName == "" {
|
|
t.Error("shouts fixture: DisplayName is empty")
|
|
}
|
|
if len(u.Shouts) == 0 {
|
|
t.Logf("shouts fixture: 0 shouts parsed either the user has none, or the selector missed them")
|
|
} else {
|
|
t.Logf("shouts fixture: parsed %d shouts", len(u.Shouts))
|
|
first := u.Shouts[0]
|
|
if first.Author.DisplayName == "" && first.BodyHTML == "" {
|
|
t.Error("shouts fixture: first shout has empty Author + Body")
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestParseGalleryPage_LastPageRealFixture asserts that detectNextPage
|
|
// returns false on the last gallery page. A trailing page that still
|
|
// reports "next" usually means our pagination selector matched a button on
|
|
// the page header instead of the paginator.
|
|
func TestParseGalleryPage_LastPageRealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "gallery_page_last.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
items, hasNext := parseGalleryPage(doc, false)
|
|
t.Logf("last page: %d items, hasNext=%v", len(items), hasNext)
|
|
if hasNext {
|
|
t.Error("last page fixture: hasNext = true; detectNextPage likely matched a non-paginator button")
|
|
}
|
|
}
|
|
|
|
// TestParseGalleryPage_ScrapsRealFixture confirms the same parser works on
|
|
// /scraps/ pages. Scraps and gallery share figure[id^=sid-] markup.
|
|
func TestParseGalleryPage_ScrapsRealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "scraps_page1.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
items, hasNext := parseGalleryPage(doc, false)
|
|
t.Logf("scraps page1: %d items, hasNext=%v", len(items), hasNext)
|
|
for i, it := range items {
|
|
if it.ID == 0 {
|
|
t.Errorf("scraps item %d: ID == 0", i)
|
|
}
|
|
if it.Title == "" {
|
|
t.Errorf("scraps item %d: empty Title", i)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestParseGalleryPage_FavoritesRealFixture verifies that on a favorites
|
|
// page, the per-item Author reflects the original artist (not the user
|
|
// whose favorites we are walking). This is the single load-bearing
|
|
// difference between gallery and favorites parsing.
|
|
func TestParseGalleryPage_FavoritesRealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "favorites_page1.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
items, hasNext := parseGalleryPage(doc, false)
|
|
if len(items) == 0 {
|
|
t.Fatal("favorites fixture: no items parsed")
|
|
}
|
|
t.Logf("favorites: %d items, hasNext=%v", len(items), hasNext)
|
|
|
|
withAuthor := 0
|
|
for i, it := range items {
|
|
if it.ID == 0 {
|
|
t.Errorf("fav item %d: ID == 0", i)
|
|
}
|
|
if it.Title == "" {
|
|
t.Errorf("fav item %d: empty Title", i)
|
|
}
|
|
if it.Author.Name != "" {
|
|
withAuthor++
|
|
}
|
|
}
|
|
// We require Author on at least the majority of items; FA occasionally
|
|
// renders a "blocked" placeholder figure without a usable author link.
|
|
if withAuthor < len(items)/2 {
|
|
t.Errorf("favorites fixture: only %d/%d items had Author.Name set figcaption /user/ selector likely off",
|
|
withAuthor, len(items))
|
|
}
|
|
}
|
|
|
|
// TestParseUserJournalsPage_RealFixture parses the journals listing
|
|
// captured for FA_TEST_JOURNALS_USER. Zero entries is acceptable (the user
|
|
// may have no journals); the test asserts the parser doesn't crash and
|
|
// pagination detection doesn't return a false-positive next link.
|
|
func TestParseUserJournalsPage_RealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "journals_listing_page1.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
entries, hasNext := parseUserJournalsPage(doc)
|
|
t.Logf("journals listing: %d entries, hasNext=%v", len(entries), hasNext)
|
|
if len(entries) == 0 && hasNext {
|
|
t.Error("journals listing fixture: zero entries but hasNext=true; pagination selector likely matched a header button")
|
|
}
|
|
for i, j := range entries {
|
|
if j.ID == 0 && j.Title == "" {
|
|
t.Errorf("journals entry %d: both ID and Title empty", i)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestParseComments_JournalRealFixture confirms the comment parser works
|
|
// on /journal/ pages, not just /view/ pages.
|
|
func TestParseComments_JournalRealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "comments_journal.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
cs := parseComments(doc)
|
|
t.Logf("journal comments: %d", len(cs))
|
|
for i, c := range cs {
|
|
if c.Depth < 0 {
|
|
t.Errorf("journal comment %d: negative depth %d", i, c.Depth)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestParseSearchResults_RealFixture verifies parseSearchResults against
|
|
// the captured /search/?q=dragon fixture. Expects ~72 items, hasNext=true
|
|
// (there are over a million dragon submissions), and per-item Author
|
|
// populated from the figcaption.
|
|
func TestParseSearchResults_RealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "search_results.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
items, hasNext := parseSearchResults(doc, false)
|
|
t.Logf("search: %d items, hasNext=%v", len(items), hasNext)
|
|
if len(items) == 0 {
|
|
t.Fatal("search fixture: no items parsed")
|
|
}
|
|
if len(items) < 50 {
|
|
t.Errorf("search fixture: only %d items parsed; expected ~72 selector drift?", len(items))
|
|
}
|
|
if !hasNext {
|
|
t.Error("search fixture: hasNext = false; pagination Next anchor not detected")
|
|
}
|
|
withAuthor := 0
|
|
for _, it := range items {
|
|
if it.Author.Name != "" {
|
|
withAuthor++
|
|
}
|
|
}
|
|
if withAuthor < len(items)/2 {
|
|
t.Errorf("search: only %d/%d items have Author.Name", withAuthor, len(items))
|
|
}
|
|
}
|
|
|
|
// TestParseNotifications_RealFixture parses the captured /msg/others/ page.
|
|
// The captured account only has Journal notifications pending, so this
|
|
// test asserts journals are populated and the other categories at least
|
|
// don't crash (they come back as nil slices, which is the correct shape).
|
|
func TestParseNotifications_RealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "msg_others.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
n, err := parseNotifications(doc)
|
|
if err != nil {
|
|
t.Fatalf("parseNotifications: %v", err)
|
|
}
|
|
t.Logf("notifications: journals=%d watches=%d subComments=%d journalComments=%d favs=%d shouts=%d",
|
|
len(n.Journals), len(n.Watches),
|
|
len(n.SubmissionComments), len(n.JournalComments),
|
|
len(n.Favorites), len(n.Shouts))
|
|
|
|
if len(n.Journals) == 0 {
|
|
t.Fatal("expected at least one Journal notification in fixture")
|
|
}
|
|
for i, j := range n.Journals {
|
|
if j.JournalID == 0 {
|
|
t.Errorf("journal[%d]: JournalID == 0", i)
|
|
}
|
|
if j.Title == "" {
|
|
t.Errorf("journal[%d]: empty Title", i)
|
|
}
|
|
if j.Author.Name == "" {
|
|
t.Errorf("journal[%d]: empty Author.Name", i)
|
|
}
|
|
if j.PostedAt.IsZero() {
|
|
t.Errorf("journal[%d]: zero PostedAt", i)
|
|
}
|
|
if j.Rating == "" {
|
|
t.Errorf("journal[%d]: empty Rating", i)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestParseNotesInboxPage_RealFixture parses /msg/pms/. We assert subject,
|
|
// sender, sent-at, and note id were extracted for each row.
|
|
func TestParseNotesInboxPage_RealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "msg_pms.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
notes, nextURL := parseNotesInboxPage(doc)
|
|
t.Logf("notes inbox: %d items, nextURL=%q", len(notes), nextURL)
|
|
if len(notes) == 0 {
|
|
t.Fatal("expected at least one note in inbox fixture")
|
|
}
|
|
deletedSenders := 0
|
|
for i, np := range notes {
|
|
if np.ID == 0 {
|
|
t.Errorf("note[%d]: ID == 0 (href=%q)", i, np.ThreadURL)
|
|
}
|
|
if np.Subject == "" {
|
|
t.Errorf("note[%d]: empty Subject", i)
|
|
}
|
|
// FA renders notes from removed accounts with no usernameBlock and a
|
|
// [deleted] sentinel; that's expected. Count and don't fail.
|
|
if np.Sender.Name == "" {
|
|
if np.Sender.DisplayName == "[deleted]" {
|
|
deletedSenders++
|
|
} else {
|
|
t.Errorf("note[%d]: empty Sender.Name (Display=%q)", i, np.Sender.DisplayName)
|
|
}
|
|
}
|
|
if np.SentAt.IsZero() {
|
|
t.Errorf("note[%d]: zero SentAt", i)
|
|
}
|
|
}
|
|
if deletedSenders > 0 {
|
|
t.Logf("notes inbox: %d/%d items had deleted senders", deletedSenders, len(notes))
|
|
}
|
|
}
|
|
|
|
// TestParseNote_RealFixture parses /viewmessage/{id}/ and asserts subject,
|
|
// from, to, and body are populated.
|
|
func TestParseNote_RealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "note_view.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
n, err := parseNote(0, doc)
|
|
if err != nil {
|
|
t.Fatalf("parseNote: %v", err)
|
|
}
|
|
if n.Subject == "" {
|
|
t.Error("note: empty Subject")
|
|
}
|
|
if n.From.Name == "" {
|
|
t.Error("note: empty From.Name")
|
|
}
|
|
if n.To.Name == "" {
|
|
t.Error("note: empty To.Name")
|
|
}
|
|
if n.BodyText == "" {
|
|
t.Error("note: empty BodyText")
|
|
}
|
|
if n.SentAt.IsZero() {
|
|
t.Error("note: zero SentAt")
|
|
}
|
|
t.Logf("note: subject=%q from=%s to=%s body-len=%d", n.Subject, n.From.Name, n.To.Name, len(n.BodyText))
|
|
}
|
|
|
|
// TestParseSubmissionInboxPage_RealFixture parses the captured
|
|
// /msg/submissions/ page (the "new stuff from people you watch" feed) and
|
|
// asserts: items are extracted, dates are lifted from the date-divider,
|
|
// authors are populated from figcaptions, and the cursor link is found.
|
|
func TestParseSubmissionInboxPage_RealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "msg_submissions.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
items, nextURL := parseSubmissionInboxPage(doc, false)
|
|
t.Logf("inbox: %d items, nextURL=%q", len(items), nextURL)
|
|
if len(items) == 0 {
|
|
t.Fatal("inbox fixture: no items parsed")
|
|
}
|
|
withAuthor := 0
|
|
withDate := 0
|
|
for _, it := range items {
|
|
if it.ID == 0 {
|
|
t.Errorf("inbox item: ID == 0")
|
|
}
|
|
if it.Author.Name != "" {
|
|
withAuthor++
|
|
}
|
|
if !it.PostedAt.IsZero() {
|
|
withDate++
|
|
}
|
|
}
|
|
if withAuthor < len(items)/2 {
|
|
t.Errorf("inbox: only %d/%d items have Author.Name", withAuthor, len(items))
|
|
}
|
|
if withDate < len(items)/2 {
|
|
t.Errorf("inbox: only %d/%d items have PostedAt group-date lift failing?", withDate, len(items))
|
|
}
|
|
if nextURL == "" {
|
|
t.Log("inbox: no cursor link found (fixture may be on the last page)")
|
|
} else if !strings.Contains(nextURL, "/msg/submissions/") {
|
|
t.Errorf("inbox: cursor href looks wrong: %q", nextURL)
|
|
}
|
|
}
|
|
|
|
// TestParseGalleryPage_BrowseRealFixture verifies parseGalleryPage works
|
|
// against FA's /browse/ feed the front-page firehose. Same figure[id^=sid-]
|
|
// structure as user galleries, plus a u-{name} class that encodes the
|
|
// artist; figcaption still carries the artist link too.
|
|
func TestParseGalleryPage_BrowseRealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "browse.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
items, hasNext := parseGalleryPage(doc, false)
|
|
t.Logf("browse: %d items, hasNext=%v", len(items), hasNext)
|
|
if len(items) == 0 {
|
|
t.Fatal("browse fixture: no items parsed")
|
|
}
|
|
// FA serves 72 per page by default; assert we got close to that so we
|
|
// notice if a selector starts dropping silently.
|
|
if len(items) < 50 {
|
|
t.Errorf("browse fixture: only %d items parsed; expected ~72 selector drift?", len(items))
|
|
}
|
|
withAuthor := 0
|
|
for _, it := range items {
|
|
if it.Author.Name != "" {
|
|
withAuthor++
|
|
}
|
|
}
|
|
if withAuthor < len(items)/2 {
|
|
t.Errorf("browse: only %d/%d items have Author.Name", withAuthor, len(items))
|
|
}
|
|
}
|
|
|
|
// TestClassifySystemMessage_NotFoundRealFixture pins the not-found
|
|
// classifier against FA's real System Error template captured by visiting
|
|
// a non-existent submission ID.
|
|
func TestClassifySystemMessage_NotFoundRealFixture(t *testing.T) {
|
|
raw := loadFixture(t, "system_message_not_found.html")
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
t.Fatalf("read doc: %v", err)
|
|
}
|
|
got := classifySystemMessage(doc)
|
|
if !errors.Is(got, ErrNotFound) {
|
|
t.Fatalf("classifySystemMessage = %v; want ErrNotFound", got)
|
|
}
|
|
}
|
|
|
|
// Sanity helper: list the document title to help diagnose fixtures whose
|
|
// content shape doesn't match parser expectations. Useful when adding new
|
|
// fixtures and triaging which selector to update.
|
|
func docTitleFor(t *testing.T, name string) string {
|
|
t.Helper()
|
|
raw := loadFixture(t, name)
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw))
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(doc.Find("title").First().Text())
|
|
}
|
|
|
|
// TestFixtureTitles is a non-failing diagnostic that prints the <title> of
|
|
// every captured fixture. Skipped silently when no fixtures are present.
|
|
func TestFixtureTitles(t *testing.T) {
|
|
names := []string{
|
|
"submission.html", "submission_story.html",
|
|
"user.html", "user_with_shouts.html",
|
|
"gallery_page1.html", "gallery_page_last.html",
|
|
"scraps_page1.html", "favorites_page1.html",
|
|
"journals_listing_page1.html", "journal.html",
|
|
"comments_submission.html", "comments_journal.html",
|
|
"system_message_not_found.html",
|
|
"msg_submissions.html", "msg_others.html", "msg_pms.html", "note_view.html",
|
|
"search_results.html", "browse.html",
|
|
}
|
|
for _, n := range names {
|
|
t.Run(n, func(t *testing.T) {
|
|
title := docTitleFor(t, n) // t.Skip fires inside if missing
|
|
t.Logf("title: %s", title)
|
|
})
|
|
}
|
|
}
|