package fa import ( "bytes" "errors" "strings" "testing" "github.com/PuerkitoBio/goquery" ) // This file groups parser tests that exercise the *additional* fixtures // captured by the extended TestRefreshFixtures (story / shouts / last page // / scraps / favorites / journals listing / journal comments / system // message). Each test t.Skip's cleanly when its fixture isn't present. // TestParseSubmission_StoryRealFixture verifies the non-image submission // path. FA still renders a #submissionImg for stories, but it's a generated // thumbnail (a .gif preview of the document) FileURL must point at the // real document URL from the Download button, not the thumbnail. func TestParseSubmission_StoryRealFixture(t *testing.T) { raw := loadFixture(t, "submission_story.html") doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw)) if err != nil { t.Fatalf("read doc: %v", err) } sub, err := parseSubmission(0, doc) if err != nil { t.Fatalf("parseSubmission(story): %v", err) } if sub.Title == "" { t.Error("story fixture: Title is empty") } if sub.Author.Name == "" { t.Error("story fixture: Author.Name is empty") } if sub.FileURL == "" { t.Fatal("story fixture: FileURL is empty (Download button selector missed?)") } // The thumbnail FA injects into #submissionImg ends in .gif; the real // document does not. Catching this is the whole point of the fixture. if strings.HasSuffix(sub.FileURL, ".gif") { t.Errorf("story FileURL = %q; points at the #submissionImg thumbnail gif, not the document", sub.FileURL) } if !strings.Contains(sub.FileURL, "/download/") { t.Errorf("story FileURL = %q; want the Download-button URL (.../download/...)", sub.FileURL) } if sub.Category != "Story" { t.Errorf("story Category = %q; want %q", sub.Category, "Story") } // The captured page renders a "+Fav" link (viewer has not favorited it), // so Favorited must be false against this real markup. if sub.Favorited { t.Error("story fixture: Favorited = true; fixture page shows the +Fav link") } t.Logf("story struct: %+v", sub) } // TestParseUser_WithShoutsRealFixture exercises the shouts parser. If the // captured profile happens to have zero shouts (e.g. shouts disabled), the // test logs that rather than failing the assertion is that parsing didn't // crash, not that the user had shouts. func TestParseUser_WithShoutsRealFixture(t *testing.T) { raw := loadFixture(t, "user_with_shouts.html") doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw)) if err != nil { t.Fatalf("read doc: %v", err) } u, err := parseUser("fixture", doc) if err != nil { t.Fatalf("parseUser(shouts): %v", err) } if u.DisplayName == "" { t.Error("shouts fixture: DisplayName is empty") } if len(u.Shouts) == 0 { t.Logf("shouts fixture: 0 shouts parsed either the user has none, or the selector missed them") } else { t.Logf("shouts fixture: parsed %d shouts", len(u.Shouts)) first := u.Shouts[0] if first.Author.DisplayName == "" && first.BodyHTML == "" { t.Error("shouts fixture: first shout has empty Author + Body") } } } // TestParseGalleryPage_LastPageRealFixture asserts that detectNextPage // returns false on the last gallery page. A trailing page that still // reports "next" usually means our pagination selector matched a button on // the page header instead of the paginator. func TestParseGalleryPage_LastPageRealFixture(t *testing.T) { raw := loadFixture(t, "gallery_page_last.html") doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw)) if err != nil { t.Fatalf("read doc: %v", err) } items, hasNext := parseGalleryPage(doc, false) t.Logf("last page: %d items, hasNext=%v", len(items), hasNext) if hasNext { t.Error("last page fixture: hasNext = true; detectNextPage likely matched a non-paginator button") } } // TestParseGalleryPage_ScrapsRealFixture confirms the same parser works on // /scraps/ pages. Scraps and gallery share figure[id^=sid-] markup. func TestParseGalleryPage_ScrapsRealFixture(t *testing.T) { raw := loadFixture(t, "scraps_page1.html") doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw)) if err != nil { t.Fatalf("read doc: %v", err) } items, hasNext := parseGalleryPage(doc, false) t.Logf("scraps page1: %d items, hasNext=%v", len(items), hasNext) for i, it := range items { if it.ID == 0 { t.Errorf("scraps item %d: ID == 0", i) } if it.Title == "" { t.Errorf("scraps item %d: empty Title", i) } } } // TestParseGalleryPage_FavoritesRealFixture verifies that on a favorites // page, the per-item Author reflects the original artist (not the user // whose favorites we are walking). This is the single load-bearing // difference between gallery and favorites parsing. func TestParseGalleryPage_FavoritesRealFixture(t *testing.T) { raw := loadFixture(t, "favorites_page1.html") doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw)) if err != nil { t.Fatalf("read doc: %v", err) } items, hasNext := parseGalleryPage(doc, false) if len(items) == 0 { t.Fatal("favorites fixture: no items parsed") } t.Logf("favorites: %d items, hasNext=%v", len(items), hasNext) withAuthor := 0 for i, it := range items { if it.ID == 0 { t.Errorf("fav item %d: ID == 0", i) } if it.Title == "" { t.Errorf("fav item %d: empty Title", i) } if it.Author.Name != "" { withAuthor++ } } // We require Author on at least the majority of items; FA occasionally // renders a "blocked" placeholder figure without a usable author link. if withAuthor < len(items)/2 { t.Errorf("favorites fixture: only %d/%d items had Author.Name set figcaption /user/ selector likely off", withAuthor, len(items)) } } // TestParseUserJournalsPage_RealFixture parses the journals listing // captured for FA_TEST_JOURNALS_USER. Zero entries is acceptable (the user // may have no journals); the test asserts the parser doesn't crash and // pagination detection doesn't return a false-positive next link. func TestParseUserJournalsPage_RealFixture(t *testing.T) { raw := loadFixture(t, "journals_listing_page1.html") doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw)) if err != nil { t.Fatalf("read doc: %v", err) } entries, hasNext := parseUserJournalsPage(doc) t.Logf("journals listing: %d entries, hasNext=%v", len(entries), hasNext) if len(entries) == 0 && hasNext { t.Error("journals listing fixture: zero entries but hasNext=true; pagination selector likely matched a header button") } for i, j := range entries { if j.ID == 0 && j.Title == "" { t.Errorf("journals entry %d: both ID and Title empty", i) } } } // TestParseComments_JournalRealFixture confirms the comment parser works // on /journal/ pages, not just /view/ pages. func TestParseComments_JournalRealFixture(t *testing.T) { raw := loadFixture(t, "comments_journal.html") doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw)) if err != nil { t.Fatalf("read doc: %v", err) } cs := parseComments(doc) t.Logf("journal comments: %d", len(cs)) for i, c := range cs { if c.Depth < 0 { t.Errorf("journal comment %d: negative depth %d", i, c.Depth) } } } // TestParseSearchResults_RealFixture verifies parseSearchResults against // the captured /search/?q=dragon fixture. Expects ~72 items, hasNext=true // (there are over a million dragon submissions), and per-item Author // populated from the figcaption. func TestParseSearchResults_RealFixture(t *testing.T) { raw := loadFixture(t, "search_results.html") doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw)) if err != nil { t.Fatalf("read doc: %v", err) } items, hasNext := parseSearchResults(doc, false) t.Logf("search: %d items, hasNext=%v", len(items), hasNext) if len(items) == 0 { t.Fatal("search fixture: no items parsed") } if len(items) < 50 { t.Errorf("search fixture: only %d items parsed; expected ~72 selector drift?", len(items)) } if !hasNext { t.Error("search fixture: hasNext = false; pagination Next anchor not detected") } withAuthor := 0 for _, it := range items { if it.Author.Name != "" { withAuthor++ } } if withAuthor < len(items)/2 { t.Errorf("search: only %d/%d items have Author.Name", withAuthor, len(items)) } } // TestParseNotifications_RealFixture parses the captured /msg/others/ page. // The captured account only has Journal notifications pending, so this // test asserts journals are populated and the other categories at least // don't crash (they come back as nil slices, which is the correct shape). func TestParseNotifications_RealFixture(t *testing.T) { raw := loadFixture(t, "msg_others.html") doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw)) if err != nil { t.Fatalf("read doc: %v", err) } n, err := parseNotifications(doc) if err != nil { t.Fatalf("parseNotifications: %v", err) } t.Logf("notifications: journals=%d watches=%d subComments=%d journalComments=%d favs=%d shouts=%d", len(n.Journals), len(n.Watches), len(n.SubmissionComments), len(n.JournalComments), len(n.Favorites), len(n.Shouts)) if len(n.Journals) == 0 { t.Fatal("expected at least one Journal notification in fixture") } for i, j := range n.Journals { if j.JournalID == 0 { t.Errorf("journal[%d]: JournalID == 0", i) } if j.Title == "" { t.Errorf("journal[%d]: empty Title", i) } if j.Author.Name == "" { t.Errorf("journal[%d]: empty Author.Name", i) } if j.PostedAt.IsZero() { t.Errorf("journal[%d]: zero PostedAt", i) } if j.Rating == "" { t.Errorf("journal[%d]: empty Rating", i) } } } // TestParseNotesInboxPage_RealFixture parses /msg/pms/. We assert subject, // sender, sent-at, and note id were extracted for each row. func TestParseNotesInboxPage_RealFixture(t *testing.T) { raw := loadFixture(t, "msg_pms.html") doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw)) if err != nil { t.Fatalf("read doc: %v", err) } notes, nextURL := parseNotesInboxPage(doc) t.Logf("notes inbox: %d items, nextURL=%q", len(notes), nextURL) if len(notes) == 0 { t.Fatal("expected at least one note in inbox fixture") } deletedSenders := 0 for i, np := range notes { if np.ID == 0 { t.Errorf("note[%d]: ID == 0 (href=%q)", i, np.ThreadURL) } if np.Subject == "" { t.Errorf("note[%d]: empty Subject", i) } // FA renders notes from removed accounts with no usernameBlock and a // [deleted] sentinel; that's expected. Count and don't fail. if np.Sender.Name == "" { if np.Sender.DisplayName == "[deleted]" { deletedSenders++ } else { t.Errorf("note[%d]: empty Sender.Name (Display=%q)", i, np.Sender.DisplayName) } } if np.SentAt.IsZero() { t.Errorf("note[%d]: zero SentAt", i) } } if deletedSenders > 0 { t.Logf("notes inbox: %d/%d items had deleted senders", deletedSenders, len(notes)) } } // TestParseNote_RealFixture parses /viewmessage/{id}/ and asserts subject, // from, to, and body are populated. func TestParseNote_RealFixture(t *testing.T) { raw := loadFixture(t, "note_view.html") doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw)) if err != nil { t.Fatalf("read doc: %v", err) } n, err := parseNote(0, doc) if err != nil { t.Fatalf("parseNote: %v", err) } if n.Subject == "" { t.Error("note: empty Subject") } if n.From.Name == "" { t.Error("note: empty From.Name") } if n.To.Name == "" { t.Error("note: empty To.Name") } if n.BodyText == "" { t.Error("note: empty BodyText") } if n.SentAt.IsZero() { t.Error("note: zero SentAt") } t.Logf("note: subject=%q from=%s to=%s body-len=%d", n.Subject, n.From.Name, n.To.Name, len(n.BodyText)) } // TestParseSubmissionInboxPage_RealFixture parses the captured // /msg/submissions/ page (the "new stuff from people you watch" feed) and // asserts: items are extracted, dates are lifted from the date-divider, // authors are populated from figcaptions, and the cursor link is found. func TestParseSubmissionInboxPage_RealFixture(t *testing.T) { raw := loadFixture(t, "msg_submissions.html") doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw)) if err != nil { t.Fatalf("read doc: %v", err) } items, nextURL := parseSubmissionInboxPage(doc, false) t.Logf("inbox: %d items, nextURL=%q", len(items), nextURL) if len(items) == 0 { t.Fatal("inbox fixture: no items parsed") } withAuthor := 0 withDate := 0 for _, it := range items { if it.ID == 0 { t.Errorf("inbox item: ID == 0") } if it.Author.Name != "" { withAuthor++ } if !it.PostedAt.IsZero() { withDate++ } } if withAuthor < len(items)/2 { t.Errorf("inbox: only %d/%d items have Author.Name", withAuthor, len(items)) } if withDate < len(items)/2 { t.Errorf("inbox: only %d/%d items have PostedAt group-date lift failing?", withDate, len(items)) } if nextURL == "" { t.Log("inbox: no cursor link found (fixture may be on the last page)") } else if !strings.Contains(nextURL, "/msg/submissions/") { t.Errorf("inbox: cursor href looks wrong: %q", nextURL) } } // TestParseGalleryPage_BrowseRealFixture verifies parseGalleryPage works // against FA's /browse/ feed the front-page firehose. Same figure[id^=sid-] // structure as user galleries, plus a u-{name} class that encodes the // artist; figcaption still carries the artist link too. func TestParseGalleryPage_BrowseRealFixture(t *testing.T) { raw := loadFixture(t, "browse.html") doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw)) if err != nil { t.Fatalf("read doc: %v", err) } items, hasNext := parseGalleryPage(doc, false) t.Logf("browse: %d items, hasNext=%v", len(items), hasNext) if len(items) == 0 { t.Fatal("browse fixture: no items parsed") } // FA serves 72 per page by default; assert we got close to that so we // notice if a selector starts dropping silently. if len(items) < 50 { t.Errorf("browse fixture: only %d items parsed; expected ~72 selector drift?", len(items)) } withAuthor := 0 for _, it := range items { if it.Author.Name != "" { withAuthor++ } } if withAuthor < len(items)/2 { t.Errorf("browse: only %d/%d items have Author.Name", withAuthor, len(items)) } } // TestClassifySystemMessage_NotFoundRealFixture pins the not-found // classifier against FA's real System Error template captured by visiting // a non-existent submission ID. func TestClassifySystemMessage_NotFoundRealFixture(t *testing.T) { raw := loadFixture(t, "system_message_not_found.html") doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw)) if err != nil { t.Fatalf("read doc: %v", err) } got := classifySystemMessage(doc) if !errors.Is(got, ErrNotFound) { t.Fatalf("classifySystemMessage = %v; want ErrNotFound", got) } } // Sanity helper: list the document title to help diagnose fixtures whose // content shape doesn't match parser expectations. Useful when adding new // fixtures and triaging which selector to update. func docTitleFor(t *testing.T, name string) string { t.Helper() raw := loadFixture(t, name) doc, err := goquery.NewDocumentFromReader(bytes.NewReader(raw)) if err != nil { return "" } return strings.TrimSpace(doc.Find("title").First().Text()) } // TestFixtureTitles is a non-failing diagnostic that prints the of // every captured fixture. Skipped silently when no fixtures are present. func TestFixtureTitles(t *testing.T) { names := []string{ "submission.html", "submission_story.html", "user.html", "user_with_shouts.html", "gallery_page1.html", "gallery_page_last.html", "scraps_page1.html", "favorites_page1.html", "journals_listing_page1.html", "journal.html", "comments_submission.html", "comments_journal.html", "system_message_not_found.html", "msg_submissions.html", "msg_others.html", "msg_pms.html", "note_view.html", "search_results.html", "browse.html", } for _, n := range names { t.Run(n, func(t *testing.T) { title := docTitleFor(t, n) // t.Skip fires inside if missing t.Logf("title: %s", title) }) } }