Files
go-fa-api/comment_parser.go
2026-05-25 22:27:18 +02:00

132 lines
3.6 KiB
Go

package fa
import (
"strconv"
"strings"
"github.com/PuerkitoBio/goquery"
"git.anthrove.art/public/go-fa-api/internal/urls"
)
// parseComments walks a submission or journal page's comment section and
// returns the comments in document order. Depth and Parent are inferred from
// data-attributes when present, otherwise from the legacy "width: NN%" style
// FA still emits on threaded replies.
func parseComments(doc *goquery.Document) []*Comment {
var out []*Comment
doc.Find("div.comment-container, div[id^='cid:'], div[id^='comment-']").Each(func(_ int, sel *goquery.Selection) {
idAttr, _ := sel.Attr("id")
idStr := strings.TrimPrefix(idAttr, "cid:")
idStr = strings.TrimPrefix(idStr, "comment-")
id, _ := parseID[CommentID](idStr)
c := &Comment{ID: id}
// Deleted comments: class on the container.
if class, _ := sel.Attr("class"); strings.Contains(class, "comment-deleted") ||
strings.Contains(class, "deleted-comment") {
c.Deleted = true
}
// Depth: prefer data-depth, then class "c-1/c-2/...", then style width %.
if d, ok := sel.Attr("data-depth"); ok {
if n, err := strconv.Atoi(strings.TrimSpace(d)); err == nil {
c.Depth = n
}
} else if class, _ := sel.Attr("class"); class != "" {
for _, tok := range strings.Fields(class) {
if strings.HasPrefix(tok, "c-") {
if n, err := strconv.Atoi(strings.TrimPrefix(tok, "c-")); err == nil {
c.Depth = n
break
}
}
}
}
if c.Depth == 0 {
if style, ok := sel.Attr("style"); ok {
c.Depth = depthFromWidthStyle(style)
}
}
// Parent: from data-parent or replyto-* class.
if p, ok := sel.Attr("data-parent"); ok {
if n, err := parseID[CommentID](strings.TrimSpace(p)); err == nil {
c.Parent = n
}
} else if class, _ := sel.Attr("class"); class != "" {
for _, tok := range strings.Fields(class) {
if strings.HasPrefix(tok, "replyto-") {
if n, err := parseID[CommentID](strings.TrimPrefix(tok, "replyto-")); err == nil {
c.Parent = n
break
}
}
}
}
// Author.
authorSel := sel.Find("a.iconusername, .comment-username a, .c-usernameBlock a").First()
if authorSel.Length() > 0 {
href, _ := authorSel.Attr("href")
c.Author = UserRef{
DisplayName: trimText(authorSel),
AvatarURL: urls.AbsoluteCDN(trimAttr(authorSel.Find("img").First(), "src")),
}
if parts := strings.Split(strings.Trim(href, "/"), "/"); len(parts) >= 2 {
c.Author.Name = parts[1]
}
}
// Date.
dateSel := sel.Find("span.popup_date").First()
if t, err := ParseFADate(firstNonEmpty(trimAttr(dateSel, "title"), trimText(dateSel))); err == nil {
c.PostedAt = t
}
// Body.
body := sel.Find("div.comment-user-text, div.user-text, .no_overflow").First()
c.BodyHTML = htmlOf(body)
c.BodyText = strings.TrimSpace(body.Text())
if c.ID != 0 || c.Author.DisplayName != "" || c.BodyText != "" {
out = append(out, c)
}
})
return out
}
// depthFromWidthStyle reads a legacy FA inline style like
// "width: 96%; padding: ..." and maps it to a depth level. FA used to shrink
// each reply by 3% per level, which is how earlier scrapers detected depth.
// Returns 0 if no usable width found.
func depthFromWidthStyle(style string) int {
low := strings.ToLower(style)
i := strings.Index(low, "width:")
if i == -1 {
return 0
}
rest := low[i+len("width:"):]
end := strings.Index(rest, "%")
if end == -1 {
return 0
}
numStr := strings.TrimSpace(rest[:end])
num, err := strconv.Atoi(numStr)
if err != nil {
return 0
}
// 100% or 99% -> depth 0; each 3% step is one level deeper.
switch {
case num >= 99:
return 0
case num <= 0:
return 0
default:
return (99 - num) / 3
}
}