package fa import ( "strconv" "strings" "github.com/PuerkitoBio/goquery" "git.anthrove.art/public/go-fa-api/internal/urls" ) // parseComments walks a submission or journal page's comment section and // returns the comments in document order. Depth and Parent are inferred from // data-attributes when present, otherwise from the legacy "width: NN%" style // FA still emits on threaded replies. func parseComments(doc *goquery.Document) []*Comment { var out []*Comment doc.Find("div.comment-container, div[id^='cid:'], div[id^='comment-']").Each(func(_ int, sel *goquery.Selection) { idAttr, _ := sel.Attr("id") idStr := strings.TrimPrefix(idAttr, "cid:") idStr = strings.TrimPrefix(idStr, "comment-") id, _ := parseID[CommentID](idStr) c := &Comment{ID: id} // Deleted comments: class on the container. if class, _ := sel.Attr("class"); strings.Contains(class, "comment-deleted") || strings.Contains(class, "deleted-comment") { c.Deleted = true } // Depth: prefer data-depth, then class "c-1/c-2/...", then style width %. if d, ok := sel.Attr("data-depth"); ok { if n, err := strconv.Atoi(strings.TrimSpace(d)); err == nil { c.Depth = n } } else if class, _ := sel.Attr("class"); class != "" { for _, tok := range strings.Fields(class) { if strings.HasPrefix(tok, "c-") { if n, err := strconv.Atoi(strings.TrimPrefix(tok, "c-")); err == nil { c.Depth = n break } } } } if c.Depth == 0 { if style, ok := sel.Attr("style"); ok { c.Depth = depthFromWidthStyle(style) } } // Parent: from data-parent or replyto-* class. if p, ok := sel.Attr("data-parent"); ok { if n, err := parseID[CommentID](strings.TrimSpace(p)); err == nil { c.Parent = n } } else if class, _ := sel.Attr("class"); class != "" { for _, tok := range strings.Fields(class) { if strings.HasPrefix(tok, "replyto-") { if n, err := parseID[CommentID](strings.TrimPrefix(tok, "replyto-")); err == nil { c.Parent = n break } } } } // Author. authorSel := sel.Find("a.iconusername, .comment-username a, .c-usernameBlock a").First() if authorSel.Length() > 0 { href, _ := authorSel.Attr("href") c.Author = UserRef{ DisplayName: trimText(authorSel), AvatarURL: urls.AbsoluteCDN(trimAttr(authorSel.Find("img").First(), "src")), } if parts := strings.Split(strings.Trim(href, "/"), "/"); len(parts) >= 2 { c.Author.Name = parts[1] } } // Date. dateSel := sel.Find("span.popup_date").First() if t, err := ParseFADate(firstNonEmpty(trimAttr(dateSel, "title"), trimText(dateSel))); err == nil { c.PostedAt = t } // Body. body := sel.Find("div.comment-user-text, div.user-text, .no_overflow").First() c.BodyHTML = htmlOf(body) c.BodyText = strings.TrimSpace(body.Text()) if c.ID != 0 || c.Author.DisplayName != "" || c.BodyText != "" { out = append(out, c) } }) return out } // depthFromWidthStyle reads a legacy FA inline style like // "width: 96%; padding: ..." and maps it to a depth level. FA used to shrink // each reply by 3% per level, which is how earlier scrapers detected depth. // Returns 0 if no usable width found. func depthFromWidthStyle(style string) int { low := strings.ToLower(style) i := strings.Index(low, "width:") if i == -1 { return 0 } rest := low[i+len("width:"):] end := strings.Index(rest, "%") if end == -1 { return 0 } numStr := strings.TrimSpace(rest[:end]) num, err := strconv.Atoi(numStr) if err != nil { return 0 } // 100% or 99% -> depth 0; each 3% step is one level deeper. switch { case num >= 99: return 0 case num <= 0: return 0 default: return (99 - num) / 3 } }