132 lines
3.6 KiB
Go
132 lines
3.6 KiB
Go
package fa
|
|
|
|
import (
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"git.anthrove.art/public/go-fa-api/internal/urls"
|
|
)
|
|
|
|
// parseComments walks a submission or journal page's comment section and
|
|
// returns the comments in document order. Depth and Parent are inferred from
|
|
// data-attributes when present, otherwise from the legacy "width: NN%" style
|
|
// FA still emits on threaded replies.
|
|
func parseComments(doc *goquery.Document) []*Comment {
|
|
var out []*Comment
|
|
|
|
doc.Find("div.comment-container, div[id^='cid:'], div[id^='comment-']").Each(func(_ int, sel *goquery.Selection) {
|
|
idAttr, _ := sel.Attr("id")
|
|
idStr := strings.TrimPrefix(idAttr, "cid:")
|
|
idStr = strings.TrimPrefix(idStr, "comment-")
|
|
id, _ := parseID[CommentID](idStr)
|
|
|
|
c := &Comment{ID: id}
|
|
|
|
// Deleted comments: class on the container.
|
|
if class, _ := sel.Attr("class"); strings.Contains(class, "comment-deleted") ||
|
|
strings.Contains(class, "deleted-comment") {
|
|
c.Deleted = true
|
|
}
|
|
|
|
// Depth: prefer data-depth, then class "c-1/c-2/...", then style width %.
|
|
if d, ok := sel.Attr("data-depth"); ok {
|
|
if n, err := strconv.Atoi(strings.TrimSpace(d)); err == nil {
|
|
c.Depth = n
|
|
}
|
|
} else if class, _ := sel.Attr("class"); class != "" {
|
|
for _, tok := range strings.Fields(class) {
|
|
if strings.HasPrefix(tok, "c-") {
|
|
if n, err := strconv.Atoi(strings.TrimPrefix(tok, "c-")); err == nil {
|
|
c.Depth = n
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if c.Depth == 0 {
|
|
if style, ok := sel.Attr("style"); ok {
|
|
c.Depth = depthFromWidthStyle(style)
|
|
}
|
|
}
|
|
|
|
// Parent: from data-parent or replyto-* class.
|
|
if p, ok := sel.Attr("data-parent"); ok {
|
|
if n, err := parseID[CommentID](strings.TrimSpace(p)); err == nil {
|
|
c.Parent = n
|
|
}
|
|
} else if class, _ := sel.Attr("class"); class != "" {
|
|
for _, tok := range strings.Fields(class) {
|
|
if strings.HasPrefix(tok, "replyto-") {
|
|
if n, err := parseID[CommentID](strings.TrimPrefix(tok, "replyto-")); err == nil {
|
|
c.Parent = n
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Author.
|
|
authorSel := sel.Find("a.iconusername, .comment-username a, .c-usernameBlock a").First()
|
|
if authorSel.Length() > 0 {
|
|
href, _ := authorSel.Attr("href")
|
|
c.Author = UserRef{
|
|
DisplayName: trimText(authorSel),
|
|
AvatarURL: urls.AbsoluteCDN(trimAttr(authorSel.Find("img").First(), "src")),
|
|
}
|
|
if parts := strings.Split(strings.Trim(href, "/"), "/"); len(parts) >= 2 {
|
|
c.Author.Name = parts[1]
|
|
}
|
|
}
|
|
|
|
// Date.
|
|
dateSel := sel.Find("span.popup_date").First()
|
|
if t, err := ParseFADate(firstNonEmpty(trimAttr(dateSel, "title"), trimText(dateSel))); err == nil {
|
|
c.PostedAt = t
|
|
}
|
|
|
|
// Body.
|
|
body := sel.Find("div.comment-user-text, div.user-text, .no_overflow").First()
|
|
c.BodyHTML = htmlOf(body)
|
|
c.BodyText = strings.TrimSpace(body.Text())
|
|
|
|
if c.ID != 0 || c.Author.DisplayName != "" || c.BodyText != "" {
|
|
out = append(out, c)
|
|
}
|
|
})
|
|
|
|
return out
|
|
}
|
|
|
|
// depthFromWidthStyle reads a legacy FA inline style like
|
|
// "width: 96%; padding: ..." and maps it to a depth level. FA used to shrink
|
|
// each reply by 3% per level, which is how earlier scrapers detected depth.
|
|
// Returns 0 if no usable width found.
|
|
func depthFromWidthStyle(style string) int {
|
|
low := strings.ToLower(style)
|
|
i := strings.Index(low, "width:")
|
|
if i == -1 {
|
|
return 0
|
|
}
|
|
rest := low[i+len("width:"):]
|
|
end := strings.Index(rest, "%")
|
|
if end == -1 {
|
|
return 0
|
|
}
|
|
numStr := strings.TrimSpace(rest[:end])
|
|
num, err := strconv.Atoi(numStr)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
// 100% or 99% -> depth 0; each 3% step is one level deeper.
|
|
switch {
|
|
case num >= 99:
|
|
return 0
|
|
case num <= 0:
|
|
return 0
|
|
default:
|
|
return (99 - num) / 3
|
|
}
|
|
}
|