Files
go-fa-api/parse_helpers.go
2026-05-25 22:27:18 +02:00

118 lines
3.0 KiB
Go

package fa
import (
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
// parsePopupDate reads a FurAffinity popup_date element. Every popup_date on
// the site carries an authoritative `data-time` attribute holding the post
// time as Unix seconds; that is preferred. The visible title attribute is a
// fallback for older rendered pages that omit data-time.
//
// Returns the zero time if neither source is parseable. Callers check IsZero
// rather than receiving an error, because dates are nice-to-have and
// shouldn't fail a whole parse on their own.
func parsePopupDate(s *goquery.Selection) time.Time {
if s == nil || s.Length() == 0 {
return time.Time{}
}
if v := trimAttr(s, "data-time"); v != "" {
if secs, err := strconv.ParseInt(v, 10, 64); err == nil && secs > 0 {
return time.Unix(secs, 0).UTC()
}
}
raw := firstNonEmpty(trimAttr(s, "title"), trimText(s))
if t, err := ParseFADate(raw); err == nil {
return t
}
return time.Time{}
}
// extractIntFromHref pulls the first /<numeric>/ segment from an href.
// Returns 0 if none is found. Used to lift submission/journal/comment IDs
// out of links like "/view/12345678/" or "/journal/4567890/".
func extractIntFromHref(href string) int64 {
for _, seg := range strings.Split(href, "/") {
if seg == "" {
continue
}
if n, err := strconv.ParseInt(seg, 10, 64); err == nil {
return n
}
}
return 0
}
// trimText is the goquery-friendly equivalent of strings.TrimSpace applied
// to a selection's text. Returns "" when the selection is empty.
func trimText(s *goquery.Selection) string {
if s == nil || s.Length() == 0 {
return ""
}
return strings.TrimSpace(s.Text())
}
// trimAttr returns the trimmed value of attr, or "" if missing.
func trimAttr(s *goquery.Selection, attr string) string {
if s == nil || s.Length() == 0 {
return ""
}
v, ok := s.Attr(attr)
if !ok {
return ""
}
return strings.TrimSpace(v)
}
// firstNonEmpty returns the first non-empty (after trim) string.
func firstNonEmpty(vals ...string) string {
for _, v := range vals {
if t := strings.TrimSpace(v); t != "" {
return t
}
}
return ""
}
// htmlOf returns the inner HTML of a selection, with leading/trailing
// whitespace trimmed. Returns "" when the selection is empty or rendering fails.
func htmlOf(s *goquery.Selection) string {
if s == nil || s.Length() == 0 {
return ""
}
h, err := s.Html()
if err != nil {
return ""
}
return strings.TrimSpace(h)
}
// parseStatNumber parses a number that may carry commas or surrounding text
// (e.g. "1,234 views"). Returns 0 on any failure rather than propagating an
// error stats are nice-to-have, not load-bearing.
func parseStatNumber(s string) int {
s = strings.TrimSpace(s)
if s == "" {
return 0
}
// Strip everything that isn't a digit or comma; keep digits & commas only.
var b strings.Builder
for _, r := range s {
if r >= '0' && r <= '9' {
b.WriteRune(r)
}
}
if b.Len() == 0 {
return 0
}
n, err := strconv.Atoi(b.String())
if err != nil {
return 0
}
return n
}