118 lines
3.0 KiB
Go
118 lines
3.0 KiB
Go
package fa
|
|
|
|
import (
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
)
|
|
|
|
// parsePopupDate reads a FurAffinity popup_date element. Every popup_date on
|
|
// the site carries an authoritative `data-time` attribute holding the post
|
|
// time as Unix seconds; that is preferred. The visible title attribute is a
|
|
// fallback for older rendered pages that omit data-time.
|
|
//
|
|
// Returns the zero time if neither source is parseable. Callers check IsZero
|
|
// rather than receiving an error, because dates are nice-to-have and
|
|
// shouldn't fail a whole parse on their own.
|
|
func parsePopupDate(s *goquery.Selection) time.Time {
|
|
if s == nil || s.Length() == 0 {
|
|
return time.Time{}
|
|
}
|
|
if v := trimAttr(s, "data-time"); v != "" {
|
|
if secs, err := strconv.ParseInt(v, 10, 64); err == nil && secs > 0 {
|
|
return time.Unix(secs, 0).UTC()
|
|
}
|
|
}
|
|
raw := firstNonEmpty(trimAttr(s, "title"), trimText(s))
|
|
if t, err := ParseFADate(raw); err == nil {
|
|
return t
|
|
}
|
|
return time.Time{}
|
|
}
|
|
|
|
// extractIntFromHref pulls the first /<numeric>/ segment from an href.
|
|
// Returns 0 if none is found. Used to lift submission/journal/comment IDs
|
|
// out of links like "/view/12345678/" or "/journal/4567890/".
|
|
func extractIntFromHref(href string) int64 {
|
|
for _, seg := range strings.Split(href, "/") {
|
|
if seg == "" {
|
|
continue
|
|
}
|
|
if n, err := strconv.ParseInt(seg, 10, 64); err == nil {
|
|
return n
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// trimText is the goquery-friendly equivalent of strings.TrimSpace applied
|
|
// to a selection's text. Returns "" when the selection is empty.
|
|
func trimText(s *goquery.Selection) string {
|
|
if s == nil || s.Length() == 0 {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(s.Text())
|
|
}
|
|
|
|
// trimAttr returns the trimmed value of attr, or "" if missing.
|
|
func trimAttr(s *goquery.Selection, attr string) string {
|
|
if s == nil || s.Length() == 0 {
|
|
return ""
|
|
}
|
|
v, ok := s.Attr(attr)
|
|
if !ok {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(v)
|
|
}
|
|
|
|
// firstNonEmpty returns the first non-empty (after trim) string.
|
|
func firstNonEmpty(vals ...string) string {
|
|
for _, v := range vals {
|
|
if t := strings.TrimSpace(v); t != "" {
|
|
return t
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// htmlOf returns the inner HTML of a selection, with leading/trailing
|
|
// whitespace trimmed. Returns "" when the selection is empty or rendering fails.
|
|
func htmlOf(s *goquery.Selection) string {
|
|
if s == nil || s.Length() == 0 {
|
|
return ""
|
|
}
|
|
h, err := s.Html()
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
return strings.TrimSpace(h)
|
|
}
|
|
|
|
// parseStatNumber parses a number that may carry commas or surrounding text
|
|
// (e.g. "1,234 views"). Returns 0 on any failure rather than propagating an
|
|
// error stats are nice-to-have, not load-bearing.
|
|
func parseStatNumber(s string) int {
|
|
s = strings.TrimSpace(s)
|
|
if s == "" {
|
|
return 0
|
|
}
|
|
// Strip everything that isn't a digit or comma; keep digits & commas only.
|
|
var b strings.Builder
|
|
for _, r := range s {
|
|
if r >= '0' && r <= '9' {
|
|
b.WriteRune(r)
|
|
}
|
|
}
|
|
if b.Len() == 0 {
|
|
return 0
|
|
}
|
|
n, err := strconv.Atoi(b.String())
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return n
|
|
}
|