inital commit
This commit is contained in:
117
parse_helpers.go
Normal file
117
parse_helpers.go
Normal file
@@ -0,0 +1,117 @@
|
||||
package fa
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// parsePopupDate reads a FurAffinity popup_date element. Every popup_date on
|
||||
// the site carries an authoritative `data-time` attribute holding the post
|
||||
// time as Unix seconds; that is preferred. The visible title attribute is a
|
||||
// fallback for older rendered pages that omit data-time.
|
||||
//
|
||||
// Returns the zero time if neither source is parseable. Callers check IsZero
|
||||
// rather than receiving an error, because dates are nice-to-have and
|
||||
// shouldn't fail a whole parse on their own.
|
||||
func parsePopupDate(s *goquery.Selection) time.Time {
|
||||
if s == nil || s.Length() == 0 {
|
||||
return time.Time{}
|
||||
}
|
||||
if v := trimAttr(s, "data-time"); v != "" {
|
||||
if secs, err := strconv.ParseInt(v, 10, 64); err == nil && secs > 0 {
|
||||
return time.Unix(secs, 0).UTC()
|
||||
}
|
||||
}
|
||||
raw := firstNonEmpty(trimAttr(s, "title"), trimText(s))
|
||||
if t, err := ParseFADate(raw); err == nil {
|
||||
return t
|
||||
}
|
||||
return time.Time{}
|
||||
}
|
||||
|
||||
// extractIntFromHref pulls the first /<numeric>/ segment from an href.
|
||||
// Returns 0 if none is found. Used to lift submission/journal/comment IDs
|
||||
// out of links like "/view/12345678/" or "/journal/4567890/".
|
||||
func extractIntFromHref(href string) int64 {
|
||||
for _, seg := range strings.Split(href, "/") {
|
||||
if seg == "" {
|
||||
continue
|
||||
}
|
||||
if n, err := strconv.ParseInt(seg, 10, 64); err == nil {
|
||||
return n
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// trimText is the goquery-friendly equivalent of strings.TrimSpace applied
|
||||
// to a selection's text. Returns "" when the selection is empty.
|
||||
func trimText(s *goquery.Selection) string {
|
||||
if s == nil || s.Length() == 0 {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(s.Text())
|
||||
}
|
||||
|
||||
// trimAttr returns the trimmed value of attr, or "" if missing.
|
||||
func trimAttr(s *goquery.Selection, attr string) string {
|
||||
if s == nil || s.Length() == 0 {
|
||||
return ""
|
||||
}
|
||||
v, ok := s.Attr(attr)
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(v)
|
||||
}
|
||||
|
||||
// firstNonEmpty returns the first non-empty (after trim) string.
|
||||
func firstNonEmpty(vals ...string) string {
|
||||
for _, v := range vals {
|
||||
if t := strings.TrimSpace(v); t != "" {
|
||||
return t
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// htmlOf returns the inner HTML of a selection, with leading/trailing
|
||||
// whitespace trimmed. Returns "" when the selection is empty or rendering fails.
|
||||
func htmlOf(s *goquery.Selection) string {
|
||||
if s == nil || s.Length() == 0 {
|
||||
return ""
|
||||
}
|
||||
h, err := s.Html()
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(h)
|
||||
}
|
||||
|
||||
// parseStatNumber parses a number that may carry commas or surrounding text
|
||||
// (e.g. "1,234 views"). Returns 0 on any failure rather than propagating an
|
||||
// error stats are nice-to-have, not load-bearing.
|
||||
func parseStatNumber(s string) int {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return 0
|
||||
}
|
||||
// Strip everything that isn't a digit or comma; keep digits & commas only.
|
||||
var b strings.Builder
|
||||
for _, r := range s {
|
||||
if r >= '0' && r <= '9' {
|
||||
b.WriteRune(r)
|
||||
}
|
||||
}
|
||||
if b.Len() == 0 {
|
||||
return 0
|
||||
}
|
||||
n, err := strconv.Atoi(b.String())
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return n
|
||||
}
|
||||
Reference in New Issue
Block a user