Files
go-fa-api/gallery_parser.go
2026-05-25 22:27:18 +02:00

108 lines
3.2 KiB
Go

package fa
import (
"strings"
"github.com/PuerkitoBio/goquery"
"git.anthrove.art/public/go-fa-api/internal/urls"
)
// parseGalleryPage parses one page of /gallery/, /scraps/, /favorites/, or
// /browse/, returning each submission preview and whether a next page
// exists.
//
// useJSON controls the experimental JSON-first merge: when true, the
// parser reads the embedded js-submissionData blob first and uses it as
// the primary source for title/author/avatar; HTML scraping covers what
// the JSON doesn't carry (rating, thumb, ID). When false the parser is
// pure HTML the same behaviour as before [WithExperimentalJSONListings]
// existed.
func parseGalleryPage(doc *goquery.Document, useJSON bool) (items []*Submission, hasNext bool) {
var jsonData listingJSONMap
if useJSON {
jsonData = readListingJSON(doc)
}
doc.Find("figure[id^=sid-]").Each(func(_ int, sel *goquery.Selection) {
if s := parseGalleryFigure(sel, jsonData); s != nil {
items = append(items, s)
}
})
hasNext = detectNextPage(doc)
return items, hasNext
}
// parseGalleryFigure lifts a single submission preview from a
// <figure id="sid-…"> element. Shared between gallery, browse, favorites,
// search, and the submission inbox.
//
// When jsonData is non-nil and contains an entry for this submission's
// ID, the JSON values win for title/author display name/lower-cased name/
// avatar. Rating, ThumbURL, and ID always come from the HTML those
// aren't represented in the JSON blob.
func parseGalleryFigure(sel *goquery.Selection, jsonData listingJSONMap) *Submission {
idAttr, _ := sel.Attr("id")
idStr := strings.TrimPrefix(idAttr, "sid-")
id, err := parseID[SubmissionID](idStr)
if err != nil || id == 0 {
return nil
}
s := &Submission{ID: id}
viewLink := sel.Find("a[href^='/view/']").First()
if viewLink.Length() > 0 {
s.Title = firstNonEmpty(
trimAttr(viewLink, "title"),
trimText(sel.Find("figcaption p:first-child").First()),
trimText(viewLink),
)
img := viewLink.Find("img").First()
s.ThumbURL = urls.AbsoluteCDN(firstNonEmpty(
trimAttr(img, "data-src"),
trimAttr(img, "src"),
))
}
// Rating class on the figure: figure.t-image.r-general (et al.)
class, _ := sel.Attr("class")
switch {
case strings.Contains(class, "r-adult"):
s.Rating = RatingAdult
case strings.Contains(class, "r-mature"):
s.Rating = RatingMature
case strings.Contains(class, "r-general"):
s.Rating = RatingGeneral
}
// Author from figcaption (favorites/browse render an artist link there).
if author := sel.Find("figcaption a[href^='/user/']").First(); author.Length() > 0 {
href, _ := author.Attr("href")
s.Author = UserRef{
DisplayName: trimText(author),
}
if parts := strings.Split(strings.Trim(href, "/"), "/"); len(parts) >= 2 {
s.Author.Name = strings.ToLower(parts[1])
}
}
// JSON enrichment preferred sources for the fields it carries.
if jsonData != nil {
if entry, ok := jsonData[id]; ok {
if entry.Title != "" {
s.Title = entry.Title
}
if entry.Username != "" {
s.Author.DisplayName = entry.Username
}
if entry.Lower != "" {
s.Author.Name = entry.Lower
}
if av := avatarURLFromMtime(entry.Lower, entry.AvatarMtime); av != "" {
s.Author.AvatarURL = av
}
}
}
return s
}