inital commit
This commit is contained in:
131
comment_parser.go
Normal file
131
comment_parser.go
Normal file
@@ -0,0 +1,131 @@
|
||||
package fa
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
|
||||
"git.anthrove.art/public/go-fa-api/internal/urls"
|
||||
)
|
||||
|
||||
// parseComments walks a submission or journal page's comment section and
|
||||
// returns the comments in document order. Depth and Parent are inferred from
|
||||
// data-attributes when present, otherwise from the legacy "width: NN%" style
|
||||
// FA still emits on threaded replies.
|
||||
func parseComments(doc *goquery.Document) []*Comment {
|
||||
var out []*Comment
|
||||
|
||||
doc.Find("div.comment-container, div[id^='cid:'], div[id^='comment-']").Each(func(_ int, sel *goquery.Selection) {
|
||||
idAttr, _ := sel.Attr("id")
|
||||
idStr := strings.TrimPrefix(idAttr, "cid:")
|
||||
idStr = strings.TrimPrefix(idStr, "comment-")
|
||||
id, _ := parseID[CommentID](idStr)
|
||||
|
||||
c := &Comment{ID: id}
|
||||
|
||||
// Deleted comments: class on the container.
|
||||
if class, _ := sel.Attr("class"); strings.Contains(class, "comment-deleted") ||
|
||||
strings.Contains(class, "deleted-comment") {
|
||||
c.Deleted = true
|
||||
}
|
||||
|
||||
// Depth: prefer data-depth, then class "c-1/c-2/...", then style width %.
|
||||
if d, ok := sel.Attr("data-depth"); ok {
|
||||
if n, err := strconv.Atoi(strings.TrimSpace(d)); err == nil {
|
||||
c.Depth = n
|
||||
}
|
||||
} else if class, _ := sel.Attr("class"); class != "" {
|
||||
for _, tok := range strings.Fields(class) {
|
||||
if strings.HasPrefix(tok, "c-") {
|
||||
if n, err := strconv.Atoi(strings.TrimPrefix(tok, "c-")); err == nil {
|
||||
c.Depth = n
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if c.Depth == 0 {
|
||||
if style, ok := sel.Attr("style"); ok {
|
||||
c.Depth = depthFromWidthStyle(style)
|
||||
}
|
||||
}
|
||||
|
||||
// Parent: from data-parent or replyto-* class.
|
||||
if p, ok := sel.Attr("data-parent"); ok {
|
||||
if n, err := parseID[CommentID](strings.TrimSpace(p)); err == nil {
|
||||
c.Parent = n
|
||||
}
|
||||
} else if class, _ := sel.Attr("class"); class != "" {
|
||||
for _, tok := range strings.Fields(class) {
|
||||
if strings.HasPrefix(tok, "replyto-") {
|
||||
if n, err := parseID[CommentID](strings.TrimPrefix(tok, "replyto-")); err == nil {
|
||||
c.Parent = n
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Author.
|
||||
authorSel := sel.Find("a.iconusername, .comment-username a, .c-usernameBlock a").First()
|
||||
if authorSel.Length() > 0 {
|
||||
href, _ := authorSel.Attr("href")
|
||||
c.Author = UserRef{
|
||||
DisplayName: trimText(authorSel),
|
||||
AvatarURL: urls.AbsoluteCDN(trimAttr(authorSel.Find("img").First(), "src")),
|
||||
}
|
||||
if parts := strings.Split(strings.Trim(href, "/"), "/"); len(parts) >= 2 {
|
||||
c.Author.Name = parts[1]
|
||||
}
|
||||
}
|
||||
|
||||
// Date.
|
||||
dateSel := sel.Find("span.popup_date").First()
|
||||
if t, err := ParseFADate(firstNonEmpty(trimAttr(dateSel, "title"), trimText(dateSel))); err == nil {
|
||||
c.PostedAt = t
|
||||
}
|
||||
|
||||
// Body.
|
||||
body := sel.Find("div.comment-user-text, div.user-text, .no_overflow").First()
|
||||
c.BodyHTML = htmlOf(body)
|
||||
c.BodyText = strings.TrimSpace(body.Text())
|
||||
|
||||
if c.ID != 0 || c.Author.DisplayName != "" || c.BodyText != "" {
|
||||
out = append(out, c)
|
||||
}
|
||||
})
|
||||
|
||||
return out
|
||||
}
|
||||
|
||||
// depthFromWidthStyle reads a legacy FA inline style like
|
||||
// "width: 96%; padding: ..." and maps it to a depth level. FA used to shrink
|
||||
// each reply by 3% per level, which is how earlier scrapers detected depth.
|
||||
// Returns 0 if no usable width found.
|
||||
func depthFromWidthStyle(style string) int {
|
||||
low := strings.ToLower(style)
|
||||
i := strings.Index(low, "width:")
|
||||
if i == -1 {
|
||||
return 0
|
||||
}
|
||||
rest := low[i+len("width:"):]
|
||||
end := strings.Index(rest, "%")
|
||||
if end == -1 {
|
||||
return 0
|
||||
}
|
||||
numStr := strings.TrimSpace(rest[:end])
|
||||
num, err := strconv.Atoi(numStr)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
// 100% or 99% -> depth 0; each 3% step is one level deeper.
|
||||
switch {
|
||||
case num >= 99:
|
||||
return 0
|
||||
case num <= 0:
|
||||
return 0
|
||||
default:
|
||||
return (99 - num) / 3
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user