package fa import ( "fmt" "strconv" "strings" "github.com/PuerkitoBio/goquery" "git.anthrove.art/public/go-fa-api/internal/urls" ) // parseSubmission lifts a [Submission] out of a /view/{id}/ document. The // selectors target FA's beta theme as captured in testdata/html/submission.html. // // FA's beta page renders submission metadata as two parallel columns // inside .submission-content-stats labels in the first, values in the // second so this parser pairs them up positionally rather than scanning // label-then-value rows. func parseSubmission(id SubmissionID, doc *goquery.Document) (*Submission, error) { s := &Submission{ID: id} // Resolve the canonical ID from the og:url meta tag when caller passed 0 // (e.g. the real-fixture test). Lets the parser stand on its own. if s.ID == 0 { if og := trimAttr(doc.Find(`meta[property="og:url"]`).First(), "content"); og != "" { if n := extractIntFromHref(og); n > 0 { s.ID = SubmissionID(n) } } } // Author scoping: there can be multiple .iconusername references on the // page (e.g. inside the description). The submission's true author lives // inside .submission-description-artist. authorBox := doc.Find("div.submission-description-artist").First() // Title. s.Title = strings.TrimSpace(authorBox.Find("div.submission-title h2").First().Text()) if s.Title == "" { s.Title = strings.TrimSpace(doc.Find("div.submission-title h2").First().Text()) } if s.Title == "" { // Surface what FA actually served so the caller can tell the // difference between a CF challenge, an SFW guard, a deleted // submission, and a real markup-drift bug. pageTitle := strings.TrimSpace(doc.Find("title").First().Text()) return nil, fmt.Errorf("%w: submission %d: missing title (page =%q)", ErrParse, id, pageTitle) } // Author. if authorBox.Length() > 0 { avatarLink := authorBox.Find("a[href^='/user/'] img").First() nameSpan := authorBox.Find(".c-usernameBlockSimple__displayName").First() s.Author = UserRef{ DisplayName: trimText(nameSpan), AvatarURL: urls.AbsoluteCDN(trimAttr(avatarLink, "src")), } // Prefer the title attr (URL-safe login) when present; fall back to href. if t := strings.TrimSpace(trimAttr(nameSpan, "title")); t != "" { s.Author.Name = strings.ToLower(t) } if s.Author.Name == "" { href, _ := authorBox.Find("a[href^='/user/']").First().Attr("href") if parts := strings.Split(strings.Trim(href, "/"), "/"); len(parts) >= 2 { s.Author.Name = strings.ToLower(parts[1]) } } } // Posted date popup_date carries authoritative data-time. s.PostedAt = parsePopupDate(authorBox.Find("span.popup_date").First()) if s.PostedAt.IsZero() { s.PostedAt = parsePopupDate(doc.Find("span.popup_date").First()) } // Rating div with class c-contentRating--{general,mature,adult} in the // page stats panel; fall back to legacy .rating-box for older markup. doc.Find("div.submission-page-stats div[class*='c-contentRating--']").EachWithBreak(func(_ int, sel *goquery.Selection) bool { s.Rating = ParseRating(trimText(sel)) return false }) if s.Rating == "" { doc.Find(".rating-box").EachWithBreak(func(_ int, sel *goquery.Selection) bool { s.Rating = ParseRating(trimText(sel)) return false }) } // Stats .submission-page-stats > div[title=...] each holds <div>N</div> // <div class="highlight">Label</div>. doc.Find("div.submission-page-stats > div[title]").Each(func(_ int, sel *goquery.Selection) { title := strings.ToLower(trimAttr(sel, "title")) val := parseStatNumber(trimText(sel.Find("div").First())) switch title { case "views": s.Stats.Views = val case "favorites": s.Stats.Favorites = val case "comments": s.Stats.Comments = val } }) // Category / Theme / Species / Resolution / File Size are two parallel // span columns inside .submission-content-stats. Pair them up by index. statsBlock := doc.Find("div.submission-content-stats").First() if statsBlock.Length() > 0 { var labels []string statsBlock.Find("span.highlight > span").Each(func(_ int, sel *goquery.Selection) { labels = append(labels, strings.ToLower(strings.TrimSpace(sel.Text()))) }) var values []string statsBlock.ChildrenFiltered("span").Each(func(_ int, sel *goquery.Selection) { if class, _ := sel.Attr("class"); strings.Contains(class, "highlight") { return // skip the labels column } sel.ChildrenFiltered("span").Each(func(_ int, inner *goquery.Selection) { values = append(values, strings.TrimSpace(inner.Text())) }) }) for i := 0; i < len(labels) && i < len(values); i++ { switch labels[i] { case "category": s.Category = Category(values[i]) case "type", "theme": s.Type = Type(values[i]) case "species": s.Species = Species(values[i]) case "gender": s.Gender = Gender(values[i]) case "resolution": if w, h, ok := parseResolution(values[i]); ok { s.Width, s.Height = w, h } } } } // Description section.submission-description holds the body inside // .section-body > .submission-description-text. descBody := doc.Find("section.submission-description div.submission-description-text").First() if descBody.Length() == 0 { descBody = doc.Find("div.submission-description").First() } s.Description = htmlOf(descBody) s.DescriptionText = strings.TrimSpace(descBody.Text()) // Tags anchors inside .submission-tags whose href targets the search. // Tag-block helper anchors and invalid system tags are filtered out. doc.Find("div.submission-tags span.tags a[href*='/search/']").Each(func(_ int, a *goquery.Selection) { t := strings.TrimSpace(a.Text()) if t != "" { s.Tags = append(s.Tags, t) } }) // File URL FA renders a "Download" button in #submission-options that // links to the canonical file for *every* submission type. For visual // art it equals the #submissionImg source; for stories and music it's // the only correct source, because FA injects a generated thumbnail // (e.g. ".thumbnail.<name>.docx.gif") into #submissionImg there. So the // Download button is authoritative; #submissionImg is only a fallback. doc.Find("div#submission-options a").EachWithBreak(func(_ int, a *goquery.Selection) bool { if strings.EqualFold(trimText(a), "download") { s.FileURL = urls.AbsoluteCDN(trimAttr(a, "href")) return false } return true }) // #submissionImg holds the inline image for visual art, or a generated // thumbnail for non-image submissions. It always supplies ThumbURL, but // only supplies FileURL when no Download button was found. img := doc.Find("#submissionImg").First() if img.Length() > 0 { s.ThumbURL = urls.AbsoluteCDN(firstNonEmpty( trimAttr(img, "data-preview-src"), trimAttr(img, "src"), )) if s.FileURL == "" { s.FileURL = urls.AbsoluteCDN(firstNonEmpty( trimAttr(img, "data-fullview-src"), trimAttr(img, "src"), )) } // Dimensions also live in width/height attrs on some pages. if w, err := strconv.Atoi(trimAttr(img, "data-fullview-width")); err == nil { s.Width = w } if h, err := strconv.Atoi(trimAttr(img, "data-fullview-height")); err == nil { s.Height = h } } // Legacy fallback for older themes that predate #submission-options. if s.FileURL == "" { dl := doc.Find("div.submission-controls-upper a[href*='/d.furaffinity.net/'], div.download a, a.download-logged-in").First() s.FileURL = urls.AbsoluteCDN(trimAttr(dl, "href")) } // Prev / Next (FA's minigallery-navigation: "Newer" / "Older"). doc.Find("div.minigallery-navigation a").Each(func(_ int, a *goquery.Selection) { href, _ := a.Attr("href") text := strings.ToLower(trimText(a)) id := SubmissionID(extractIntFromHref(href)) if id == 0 { return } switch { case strings.Contains(text, "newer"): // "Newer" goes to a more recent submission surface as Prev so // callers walking a gallery can call client.GetSubmission(s.Prev) // to step toward the newest. s.Prev = id case strings.Contains(text, "older"): s.Next = id } }) // Legacy fallback for older themes that still use favorite-nav. if s.Prev == 0 && s.Next == 0 { doc.Find("div.favorite-nav a, .submission-nav a").Each(func(_ int, a *goquery.Selection) { href, _ := a.Attr("href") text := strings.ToLower(trimText(a)) id := SubmissionID(extractIntFromHref(href)) if id == 0 { return } switch { case strings.Contains(text, "prev"): s.Prev = id case strings.Contains(text, "next"): s.Next = id } }) } // Favorite state FA renders exactly one of the "+Fav" / "−Fav" anchors // for an authenticated viewer; the "−Fav" (/unfav/) link means this // submission is currently favorited. findFavLinks (actions.go) already // scrapes both. An anonymous fetch shows neither, leaving Favorited false. if _, unfav := findFavLinks(doc, int64(s.ID)); unfav != "" { s.Favorited = true } return s, nil } // parseResolution splits a "2071 x 1779" string into width and height ints. // Returns ok=false on any malformed input so callers can leave Width/Height // at zero. func parseResolution(s string) (w, h int, ok bool) { parts := strings.Split(s, "x") if len(parts) != 2 { return 0, 0, false } wn, err := strconv.Atoi(strings.TrimSpace(parts[0])) if err != nil { return 0, 0, false } hn, err := strconv.Atoi(strings.TrimSpace(parts[1])) if err != nil { return 0, 0, false } return wn, hn, true }