FA's /favorites/{user}/ pagination is cursor-addressed by the fave-ID
of the last item on the previous page (e.g.
/favorites/{user}/1951234825/next), not by sequential integers. The
previous URL builder generated /favorites/{user}/{N}/ for N>=2; FA
interpreted that as a malformed cursor and silently returned page 1,
which caused the Favorites iterator to loop forever and the new
FavoritesPage to report HasNext=true on every call.
Changes:
- urls.Favorites(name) returns the first-page URL; new
urls.FavoritesCursor(name, cursor) builds /favorites/.../next URLs.
- FavoritesPage now takes a cursor string; empty = first page.
Returns ListingPage.NextPage as the opaque fave-ID for the next call.
- ListingPage gains NextPage string (decimal page number for
Gallery/Scraps, fave-ID cursor for Favorites) and drops the Page int
field that conflated those two notions.
- Client.Favorites iterator now walks cursors internally; StartPage
is ignored for favorites (documented).
- detectNextPage / nextPageURL now parse the form action so the same
helper works for both page-number and cursor pagination.
- Added regression test that fails on the infinite-loop bug.
- Example: examples/favorites_page demonstrates cursor walking.
158 lines
5.2 KiB
Go
158 lines
5.2 KiB
Go
package fa
|
|
|
|
import (
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"git.anthrove.art/public/go-fa-api/internal/urls"
|
|
)
|
|
|
|
// parseGalleryPage parses one page of /gallery/, /scraps/, /favorites/, or
|
|
// /browse/, returning each submission preview and whether a next page
|
|
// exists.
|
|
//
|
|
// useJSON controls the experimental JSON-first merge: when true, the
|
|
// parser reads the embedded js-submissionData blob first and uses it as
|
|
// the primary source for title/author/avatar; HTML scraping covers what
|
|
// the JSON doesn't carry (rating, thumb, ID). When false the parser is
|
|
// pure HTML the same behaviour as before [WithExperimentalJSONListings]
|
|
// existed.
|
|
func parseGalleryPage(doc *goquery.Document, useJSON bool) (items []*Submission, hasNext bool) {
|
|
items, _, hasNext = parseListingPage(doc, useJSON)
|
|
return items, hasNext
|
|
}
|
|
|
|
// parseListingPage parses one page of a listing endpoint and also returns
|
|
// the raw next-page URL FA emits in its "Next" pagination form. Callers
|
|
// that need to chain across cursor-based pages (Favorites) consume the
|
|
// URL; callers that don't (Gallery / Scraps) can ignore it.
|
|
func parseListingPage(doc *goquery.Document, useJSON bool) (items []*Submission, nextURL string, hasNext bool) {
|
|
var jsonData listingJSONMap
|
|
if useJSON {
|
|
jsonData = readListingJSON(doc)
|
|
}
|
|
doc.Find("figure[id^=sid-]").Each(func(_ int, sel *goquery.Selection) {
|
|
if s := parseGalleryFigure(sel, jsonData); s != nil {
|
|
items = append(items, s)
|
|
}
|
|
})
|
|
nextURL, hasNext = nextPageURL(doc)
|
|
return items, nextURL, hasNext
|
|
}
|
|
|
|
// parseGalleryFigure lifts a single submission preview from a
|
|
// <figure id="sid-…"> element. Shared between gallery, browse, favorites,
|
|
// search, and the submission inbox.
|
|
//
|
|
// When jsonData is non-nil and contains an entry for this submission's
|
|
// ID, the JSON values win for title/author display name/lower-cased name/
|
|
// avatar. Rating, ThumbURL, and ID always come from the HTML those
|
|
// aren't represented in the JSON blob.
|
|
func parseGalleryFigure(sel *goquery.Selection, jsonData listingJSONMap) *Submission {
|
|
idAttr, _ := sel.Attr("id")
|
|
idStr := strings.TrimPrefix(idAttr, "sid-")
|
|
id, err := parseID[SubmissionID](idStr)
|
|
if err != nil || id == 0 {
|
|
return nil
|
|
}
|
|
s := &Submission{ID: id}
|
|
|
|
viewLink := sel.Find("a[href^='/view/']").First()
|
|
if viewLink.Length() > 0 {
|
|
s.Title = firstNonEmpty(
|
|
trimAttr(viewLink, "title"),
|
|
trimText(sel.Find("figcaption p:first-child").First()),
|
|
trimText(viewLink),
|
|
)
|
|
img := viewLink.Find("img").First()
|
|
s.ThumbURL = urls.AbsoluteCDN(firstNonEmpty(
|
|
trimAttr(img, "data-src"),
|
|
trimAttr(img, "src"),
|
|
))
|
|
}
|
|
|
|
// Rating class on the figure: figure.t-image.r-general (et al.)
|
|
class, _ := sel.Attr("class")
|
|
switch {
|
|
case strings.Contains(class, "r-adult"):
|
|
s.Rating = RatingAdult
|
|
case strings.Contains(class, "r-mature"):
|
|
s.Rating = RatingMature
|
|
case strings.Contains(class, "r-general"):
|
|
s.Rating = RatingGeneral
|
|
}
|
|
|
|
// Author from figcaption (favorites/browse render an artist link there).
|
|
if author := sel.Find("figcaption a[href^='/user/']").First(); author.Length() > 0 {
|
|
href, _ := author.Attr("href")
|
|
s.Author = UserRef{
|
|
DisplayName: trimText(author),
|
|
}
|
|
if parts := strings.Split(strings.Trim(href, "/"), "/"); len(parts) >= 2 {
|
|
s.Author.Name = strings.ToLower(parts[1])
|
|
}
|
|
}
|
|
|
|
// data-tags on the figure's <img> carries both the unprefixed keyword
|
|
// list and the prefixed system tags (s_/c_/a_/u_/t_). Splitting it lets
|
|
// callers classify listing items without an extra /view/ fetch.
|
|
if img := sel.Find("img[data-tags]").First(); img.Length() > 0 {
|
|
if raw, ok := img.Attr("data-tags"); ok {
|
|
applyListingDataTags(s, raw)
|
|
}
|
|
}
|
|
|
|
// JSON enrichment preferred sources for the fields it carries.
|
|
if jsonData != nil {
|
|
if entry, ok := jsonData[id]; ok {
|
|
if entry.Title != "" {
|
|
s.Title = entry.Title
|
|
}
|
|
if entry.Username != "" {
|
|
s.Author.DisplayName = entry.Username
|
|
}
|
|
if entry.Lower != "" {
|
|
s.Author.Name = entry.Lower
|
|
}
|
|
if av := avatarURLFromMtime(entry.Lower, entry.AvatarMtime); av != "" {
|
|
s.Author.AvatarURL = av
|
|
}
|
|
}
|
|
}
|
|
|
|
return s
|
|
}
|
|
|
|
// applyListingDataTags splits the whitespace-separated data-tags attribute
|
|
// FA emits on listing-page <img> elements and routes each token to either
|
|
// CategorizedTags (when the token has a known single-letter prefix
|
|
// s_/c_/a_/u_/t_) or Tags (everything else).
|
|
//
|
|
// The prefix mapping mirrors the /view/ parser in submission_parser.go so a
|
|
// listing-path Submission carries the same categorisation a /view/-path one
|
|
// would, modulo tokens FA can't represent in this flat attribute (multi-word
|
|
// tags, the a_ vs u_ distinction).
|
|
func applyListingDataTags(s *Submission, raw string) {
|
|
for _, tok := range strings.Fields(raw) {
|
|
if len(tok) >= 3 && tok[1] == '_' {
|
|
name := tok[2:]
|
|
switch tok[0] {
|
|
case 's':
|
|
s.CategorizedTags.Species = append(s.CategorizedTags.Species, name)
|
|
continue
|
|
case 'c':
|
|
s.CategorizedTags.Characters = append(s.CategorizedTags.Characters, name)
|
|
continue
|
|
case 'a', 'u':
|
|
s.CategorizedTags.Artists = append(s.CategorizedTags.Artists, name)
|
|
continue
|
|
case 't':
|
|
s.CategorizedTags.Types = append(s.CategorizedTags.Types, name)
|
|
continue
|
|
}
|
|
}
|
|
s.Tags = append(s.Tags, tok)
|
|
}
|
|
}
|