package fa import ( "strings" "github.com/PuerkitoBio/goquery" "git.anthrove.art/public/go-fa-api/internal/urls" ) // parseGalleryPage parses one page of /gallery/, /scraps/, /favorites/, or // /browse/, returning each submission preview and whether a next page // exists. // // useJSON controls the experimental JSON-first merge: when true, the // parser reads the embedded js-submissionData blob first and uses it as // the primary source for title/author/avatar; HTML scraping covers what // the JSON doesn't carry (rating, thumb, ID). When false the parser is // pure HTML the same behaviour as before [WithExperimentalJSONListings] // existed. func parseGalleryPage(doc *goquery.Document, useJSON bool) (items []*Submission, hasNext bool) { items, _, hasNext = parseListingPage(doc, useJSON) return items, hasNext } // parseListingPage parses one page of a listing endpoint and also returns // the raw next-page URL FA emits in its "Next" pagination form. Callers // that need to chain across cursor-based pages (Favorites) consume the // URL; callers that don't (Gallery / Scraps) can ignore it. func parseListingPage(doc *goquery.Document, useJSON bool) (items []*Submission, nextURL string, hasNext bool) { var jsonData listingJSONMap if useJSON { jsonData = readListingJSON(doc) } doc.Find("figure[id^=sid-]").Each(func(_ int, sel *goquery.Selection) { if s := parseGalleryFigure(sel, jsonData); s != nil { items = append(items, s) } }) nextURL, hasNext = nextPageURL(doc) return items, nextURL, hasNext } // parseGalleryFigure lifts a single submission preview from a //
element. Shared between gallery, browse, favorites, // search, and the submission inbox. // // When jsonData is non-nil and contains an entry for this submission's // ID, the JSON values win for title/author display name/lower-cased name/ // avatar. Rating, ThumbURL, and ID always come from the HTML those // aren't represented in the JSON blob. func parseGalleryFigure(sel *goquery.Selection, jsonData listingJSONMap) *Submission { idAttr, _ := sel.Attr("id") idStr := strings.TrimPrefix(idAttr, "sid-") id, err := parseID[SubmissionID](idStr) if err != nil || id == 0 { return nil } s := &Submission{ID: id} viewLink := sel.Find("a[href^='/view/']").First() if viewLink.Length() > 0 { s.Title = firstNonEmpty( trimAttr(viewLink, "title"), trimText(sel.Find("figcaption p:first-child").First()), trimText(viewLink), ) img := viewLink.Find("img").First() s.ThumbURL = urls.AbsoluteCDN(firstNonEmpty( trimAttr(img, "data-src"), trimAttr(img, "src"), )) } // Rating class on the figure: figure.t-image.r-general (et al.) class, _ := sel.Attr("class") switch { case strings.Contains(class, "r-adult"): s.Rating = RatingAdult case strings.Contains(class, "r-mature"): s.Rating = RatingMature case strings.Contains(class, "r-general"): s.Rating = RatingGeneral } // Author from figcaption (favorites/browse render an artist link there). if author := sel.Find("figcaption a[href^='/user/']").First(); author.Length() > 0 { href, _ := author.Attr("href") s.Author = UserRef{ DisplayName: trimText(author), } if parts := strings.Split(strings.Trim(href, "/"), "/"); len(parts) >= 2 { s.Author.Name = strings.ToLower(parts[1]) } } // data-tags on the figure's carries both the unprefixed keyword // list and the prefixed system tags (s_/c_/a_/u_/t_). Splitting it lets // callers classify listing items without an extra /view/ fetch. if img := sel.Find("img[data-tags]").First(); img.Length() > 0 { if raw, ok := img.Attr("data-tags"); ok { applyListingDataTags(s, raw) } } // JSON enrichment preferred sources for the fields it carries. if jsonData != nil { if entry, ok := jsonData[id]; ok { if entry.Title != "" { s.Title = entry.Title } if entry.Username != "" { s.Author.DisplayName = entry.Username } if entry.Lower != "" { s.Author.Name = entry.Lower } if av := avatarURLFromMtime(entry.Lower, entry.AvatarMtime); av != "" { s.Author.AvatarURL = av } } } return s } // applyListingDataTags splits the whitespace-separated data-tags attribute // FA emits on listing-page elements and routes each token to either // CategorizedTags (when the token has a known single-letter prefix // s_/c_/a_/u_/t_) or Tags (everything else). // // The prefix mapping mirrors the /view/ parser in submission_parser.go so a // listing-path Submission carries the same categorisation a /view/-path one // would, modulo tokens FA can't represent in this flat attribute (multi-word // tags, the a_ vs u_ distinction). func applyListingDataTags(s *Submission, raw string) { for _, tok := range strings.Fields(raw) { if len(tok) >= 3 && tok[1] == '_' { name := tok[2:] switch tok[0] { case 's': s.CategorizedTags.Species = append(s.CategorizedTags.Species, name) continue case 'c': s.CategorizedTags.Characters = append(s.CategorizedTags.Characters, name) continue case 'a', 'u': s.CategorizedTags.Artists = append(s.CategorizedTags.Artists, name) continue case 't': s.CategorizedTags.Types = append(s.CategorizedTags.Types, name) continue } } s.Tags = append(s.Tags, tok) } }