Files
go-fa-api/user_parser.go
2026-05-25 22:27:18 +02:00

220 lines
7.9 KiB
Go

package fa
import (
"fmt"
"strings"
"github.com/PuerkitoBio/goquery"
"git.anthrove.art/public/go-fa-api/internal/urls"
)
// parseUser pulls a [User] out of /user/{name}/.
//
// FA's profile page has many optional sections; parser treats the headline
// (display name + avatar) as required, everything else as best-effort.
func parseUser(name string, doc *goquery.Document) (*User, error) {
u := &User{UserRef: UserRef{Name: strings.ToLower(strings.TrimSpace(name))}}
// Headline username + avatar.
header := doc.Find("userpage-nav-user-details, div.userpage-nav-user-details, div.username").First()
if header.Length() == 0 {
header = doc.Find("h1.username, h2.username").First()
}
u.DisplayName = firstNonEmpty(
// Scope the display name to the profile header first an unscoped
// .c-usernameBlock__displayName also matches the logged-in viewer's
// block elsewhere on the page.
trimText(doc.Find("userpage-nav-user-details .js-displayName").First()),
trimText(doc.Find("userpage-nav-user-details .c-usernameBlock__displayName").First()),
trimText(doc.Find(".username h2 span").First()),
trimText(doc.Find(".username h1").First()),
trimText(doc.Find(".c-usernameBlock__displayName").First()),
trimText(doc.Find(".c-usernameBlockSimple__displayName").First()),
trimText(header),
u.Name,
)
// The profile owner's avatar lives in the <userpage-nav-avatar> header
// element. It must be scoped there: img.avatar / img.loggedin_user_avatar
// in the site navigation belong to the logged-in viewer, and an unscoped
// selector picks the viewer's avatar on every logged-in page load.
u.AvatarURL = urls.AbsoluteCDN(firstNonEmpty(
trimAttr(doc.Find("userpage-nav-avatar img").First(), "src"),
trimAttr(doc.Find("div.userpage-nav-avatar img").First(), "src"),
trimAttr(doc.Find("img.user-nav-avatar").First(), "src"),
))
if u.DisplayName == "" {
return nil, fmt.Errorf("%w: user %q: missing display name", ErrParse, name)
}
// "Title" headline shown under the username.
u.Title = firstNonEmpty(
trimText(doc.Find(".userpage-flex-item.username .font-small").First()),
trimText(doc.Find(".user-nav-user-details .c-usernameBlock__subtitle").First()),
)
// Registered-on date appears in profile-meta or in a span.popup_date.
doc.Find("span.popup_date").EachWithBreak(func(_ int, sel *goquery.Selection) bool {
raw := firstNonEmpty(trimAttr(sel, "title"), trimText(sel))
if t, err := ParseFADate(raw); err == nil {
u.Joined = t
return false
}
return true
})
// Profile bio: large HTML block on the left column.
bio := firstNonEmptySel(doc,
"div.userpage-profile",
"div.profile-page-body",
"div.profile-description",
)
if bio != nil {
u.BioHTML = htmlOf(bio)
u.BioText = strings.TrimSpace(bio.Text())
}
// Stats: the "Stats" box in the right column is a flat run of
// <span class="highlight">Label:</span> value<br/>
// pairs inside one or more <div class="cell">. The value is the bare
// text node that immediately follows each highlight span.
doc.Find("div.userpage-section-right div.cell").Each(func(_ int, cell *goquery.Selection) {
nodes := cell.Contents()
nodes.Each(func(i int, node *goquery.Selection) {
if !node.Is("span.highlight") {
return
}
label := strings.ToLower(strings.TrimRight(trimText(node), ":"))
val := parseStatNumber(nodes.Eq(i + 1).Text())
switch label {
case "submissions":
u.Stats.Submissions = val
case "favs", "favorites":
u.Stats.Favorites = val
case "views", "page visits":
u.Stats.Views = val
case "comments earned", "comments":
u.Stats.Comments = val
case "journals":
u.Stats.Journals = val
}
})
})
// Watcher / watching counts are NOT in the stats box FA renders them
// in the "Recent Watchers" / "Recently Watched" section headers as
// "View List (Watched by N)" / "View List (Watching N)".
u.Stats.Watchers = parseStatNumber(trimText(doc.Find("section.watched-by-block .section-header a").First()))
u.Stats.Watching = parseStatNumber(trimText(doc.Find("section.is-watching-block .section-header a").First()))
// Contact information rows.
doc.Find("div.user-contact-user-info, .userpage-contact-information li").Each(func(_ int, sel *goquery.Selection) {
site := trimText(sel.Find("span.user-contact-item-name, .contact-site").First())
linkSel := sel.Find("a").First()
handle := trimText(linkSel)
if handle == "" {
handle = strings.TrimSpace(sel.Text())
}
href, _ := linkSel.Attr("href")
if site != "" || handle != "" {
u.Contacts = append(u.Contacts, UserContact{
Site: site,
Handle: handle,
URL: urls.AbsoluteCDN(href),
})
}
})
// Featured submission: small preview thumbnail on the profile.
if feat := doc.Find("div.userpage-featured-submission a, section.userpage-section-right figure a").First(); feat.Length() > 0 {
href, _ := feat.Attr("href")
if id := extractIntFromHref(href); id > 0 {
u.FeaturedSub = &SubmissionRef{
ID: SubmissionID(id),
Title: trimAttr(feat, "title"),
ThumbURL: urls.AbsoluteCDN(trimAttr(feat.Find("img").First(), "src")),
}
}
}
// Shouts: anchored by <a id="shout-NNN"> inside a
// <div class="comment_container"> (underscore). Beta uses custom HTML5
// elements <comment-container>/<comment-username>/<comment-date>/
// <comment-user-text> within that wrapper goquery matches them by tag.
doc.Find("a[id^='shout-']").Each(func(_ int, anchor *goquery.Selection) {
container := anchor.ParentsFiltered("div.comment_container").First()
if container.Length() == 0 {
// Fallback for legacy markup where the anchor sits as a sibling
// of a table or comment-container directly.
container = anchor.Parent()
}
shout := Shout{}
authorLink := container.Find("a.c-usernameBlock__displayName").First()
if authorLink.Length() > 0 {
href, _ := authorLink.Attr("href")
shout.Author = UserRef{
DisplayName: trimText(authorLink.Find("span.js-displayName").First()),
AvatarURL: urls.AbsoluteCDN(trimAttr(container.Find("img.comment_useravatar").First(), "src")),
}
if shout.Author.DisplayName == "" {
shout.Author.DisplayName = trimText(authorLink)
}
if parts := strings.Split(strings.Trim(href, "/"), "/"); len(parts) >= 2 {
shout.Author.Name = strings.ToLower(parts[1])
}
}
shout.PostedAt = parsePopupDate(container.Find("comment-date span.popup_date").First())
if shout.PostedAt.IsZero() {
shout.PostedAt = parsePopupDate(container.Find("span.popup_date").First())
}
body := container.Find("comment-user-text").First()
if body.Length() == 0 {
body = container.Find(".comment_text, .comment-user-text").First()
}
shout.BodyHTML = htmlOf(body)
if shout.Author.DisplayName != "" || shout.BodyHTML != "" {
u.Shouts = append(u.Shouts, shout)
}
})
// Watch state: the header carries a Watch/Unwatch button when the viewer
// is logged in and looking at another user's page. An "/unwatch/" link
// means the viewer currently watches this user.
if _, unwatch := findWatchLinks(doc, u.Name); unwatch != "" {
u.Watched = true
}
// Site banner: the <site-banner> element in the page header holds either
// the artist's own banner (URL under /art/<name>/, uploaded via
// /controls/profilebanner/) or when none is set FA's site-wide promo
// banner (URL under /media/banners/).
if banner := doc.Find("site-banner img").First(); banner.Length() > 0 {
src := urls.AbsoluteCDN(trimAttr(banner, "src"))
if src != "" {
u.SiteBanner = &SiteBanner{
ImageURL: src,
IsCustom: strings.Contains(src, "/art/"+u.Name+"/"),
}
}
}
return u, nil
}
// firstNonEmptySel returns the first selection matching any of the selectors,
// or nil if none match. Useful for parser code that needs to tolerate
// alternate beta-theme markup.
func firstNonEmptySel(doc *goquery.Document, selectors ...string) *goquery.Selection {
for _, sel := range selectors {
s := doc.Find(sel).First()
if s.Length() > 0 {
return s
}
}
return nil
}