220 lines
7.9 KiB
Go
220 lines
7.9 KiB
Go
package fa
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"git.anthrove.art/public/go-fa-api/internal/urls"
|
|
)
|
|
|
|
// parseUser pulls a [User] out of /user/{name}/.
|
|
//
|
|
// FA's profile page has many optional sections; parser treats the headline
|
|
// (display name + avatar) as required, everything else as best-effort.
|
|
func parseUser(name string, doc *goquery.Document) (*User, error) {
|
|
u := &User{UserRef: UserRef{Name: strings.ToLower(strings.TrimSpace(name))}}
|
|
|
|
// Headline username + avatar.
|
|
header := doc.Find("userpage-nav-user-details, div.userpage-nav-user-details, div.username").First()
|
|
if header.Length() == 0 {
|
|
header = doc.Find("h1.username, h2.username").First()
|
|
}
|
|
u.DisplayName = firstNonEmpty(
|
|
// Scope the display name to the profile header first an unscoped
|
|
// .c-usernameBlock__displayName also matches the logged-in viewer's
|
|
// block elsewhere on the page.
|
|
trimText(doc.Find("userpage-nav-user-details .js-displayName").First()),
|
|
trimText(doc.Find("userpage-nav-user-details .c-usernameBlock__displayName").First()),
|
|
trimText(doc.Find(".username h2 span").First()),
|
|
trimText(doc.Find(".username h1").First()),
|
|
trimText(doc.Find(".c-usernameBlock__displayName").First()),
|
|
trimText(doc.Find(".c-usernameBlockSimple__displayName").First()),
|
|
trimText(header),
|
|
u.Name,
|
|
)
|
|
// The profile owner's avatar lives in the <userpage-nav-avatar> header
|
|
// element. It must be scoped there: img.avatar / img.loggedin_user_avatar
|
|
// in the site navigation belong to the logged-in viewer, and an unscoped
|
|
// selector picks the viewer's avatar on every logged-in page load.
|
|
u.AvatarURL = urls.AbsoluteCDN(firstNonEmpty(
|
|
trimAttr(doc.Find("userpage-nav-avatar img").First(), "src"),
|
|
trimAttr(doc.Find("div.userpage-nav-avatar img").First(), "src"),
|
|
trimAttr(doc.Find("img.user-nav-avatar").First(), "src"),
|
|
))
|
|
if u.DisplayName == "" {
|
|
return nil, fmt.Errorf("%w: user %q: missing display name", ErrParse, name)
|
|
}
|
|
|
|
// "Title" headline shown under the username.
|
|
u.Title = firstNonEmpty(
|
|
trimText(doc.Find(".userpage-flex-item.username .font-small").First()),
|
|
trimText(doc.Find(".user-nav-user-details .c-usernameBlock__subtitle").First()),
|
|
)
|
|
|
|
// Registered-on date appears in profile-meta or in a span.popup_date.
|
|
doc.Find("span.popup_date").EachWithBreak(func(_ int, sel *goquery.Selection) bool {
|
|
raw := firstNonEmpty(trimAttr(sel, "title"), trimText(sel))
|
|
if t, err := ParseFADate(raw); err == nil {
|
|
u.Joined = t
|
|
return false
|
|
}
|
|
return true
|
|
})
|
|
|
|
// Profile bio: large HTML block on the left column.
|
|
bio := firstNonEmptySel(doc,
|
|
"div.userpage-profile",
|
|
"div.profile-page-body",
|
|
"div.profile-description",
|
|
)
|
|
if bio != nil {
|
|
u.BioHTML = htmlOf(bio)
|
|
u.BioText = strings.TrimSpace(bio.Text())
|
|
}
|
|
|
|
// Stats: the "Stats" box in the right column is a flat run of
|
|
// <span class="highlight">Label:</span> value<br/>
|
|
// pairs inside one or more <div class="cell">. The value is the bare
|
|
// text node that immediately follows each highlight span.
|
|
doc.Find("div.userpage-section-right div.cell").Each(func(_ int, cell *goquery.Selection) {
|
|
nodes := cell.Contents()
|
|
nodes.Each(func(i int, node *goquery.Selection) {
|
|
if !node.Is("span.highlight") {
|
|
return
|
|
}
|
|
label := strings.ToLower(strings.TrimRight(trimText(node), ":"))
|
|
val := parseStatNumber(nodes.Eq(i + 1).Text())
|
|
switch label {
|
|
case "submissions":
|
|
u.Stats.Submissions = val
|
|
case "favs", "favorites":
|
|
u.Stats.Favorites = val
|
|
case "views", "page visits":
|
|
u.Stats.Views = val
|
|
case "comments earned", "comments":
|
|
u.Stats.Comments = val
|
|
case "journals":
|
|
u.Stats.Journals = val
|
|
}
|
|
})
|
|
})
|
|
|
|
// Watcher / watching counts are NOT in the stats box FA renders them
|
|
// in the "Recent Watchers" / "Recently Watched" section headers as
|
|
// "View List (Watched by N)" / "View List (Watching N)".
|
|
u.Stats.Watchers = parseStatNumber(trimText(doc.Find("section.watched-by-block .section-header a").First()))
|
|
u.Stats.Watching = parseStatNumber(trimText(doc.Find("section.is-watching-block .section-header a").First()))
|
|
|
|
// Contact information rows.
|
|
doc.Find("div.user-contact-user-info, .userpage-contact-information li").Each(func(_ int, sel *goquery.Selection) {
|
|
site := trimText(sel.Find("span.user-contact-item-name, .contact-site").First())
|
|
linkSel := sel.Find("a").First()
|
|
handle := trimText(linkSel)
|
|
if handle == "" {
|
|
handle = strings.TrimSpace(sel.Text())
|
|
}
|
|
href, _ := linkSel.Attr("href")
|
|
if site != "" || handle != "" {
|
|
u.Contacts = append(u.Contacts, UserContact{
|
|
Site: site,
|
|
Handle: handle,
|
|
URL: urls.AbsoluteCDN(href),
|
|
})
|
|
}
|
|
})
|
|
|
|
// Featured submission: small preview thumbnail on the profile.
|
|
if feat := doc.Find("div.userpage-featured-submission a, section.userpage-section-right figure a").First(); feat.Length() > 0 {
|
|
href, _ := feat.Attr("href")
|
|
if id := extractIntFromHref(href); id > 0 {
|
|
u.FeaturedSub = &SubmissionRef{
|
|
ID: SubmissionID(id),
|
|
Title: trimAttr(feat, "title"),
|
|
ThumbURL: urls.AbsoluteCDN(trimAttr(feat.Find("img").First(), "src")),
|
|
}
|
|
}
|
|
}
|
|
|
|
// Shouts: anchored by <a id="shout-NNN"> inside a
|
|
// <div class="comment_container"> (underscore). Beta uses custom HTML5
|
|
// elements <comment-container>/<comment-username>/<comment-date>/
|
|
// <comment-user-text> within that wrapper goquery matches them by tag.
|
|
doc.Find("a[id^='shout-']").Each(func(_ int, anchor *goquery.Selection) {
|
|
container := anchor.ParentsFiltered("div.comment_container").First()
|
|
if container.Length() == 0 {
|
|
// Fallback for legacy markup where the anchor sits as a sibling
|
|
// of a table or comment-container directly.
|
|
container = anchor.Parent()
|
|
}
|
|
shout := Shout{}
|
|
|
|
authorLink := container.Find("a.c-usernameBlock__displayName").First()
|
|
if authorLink.Length() > 0 {
|
|
href, _ := authorLink.Attr("href")
|
|
shout.Author = UserRef{
|
|
DisplayName: trimText(authorLink.Find("span.js-displayName").First()),
|
|
AvatarURL: urls.AbsoluteCDN(trimAttr(container.Find("img.comment_useravatar").First(), "src")),
|
|
}
|
|
if shout.Author.DisplayName == "" {
|
|
shout.Author.DisplayName = trimText(authorLink)
|
|
}
|
|
if parts := strings.Split(strings.Trim(href, "/"), "/"); len(parts) >= 2 {
|
|
shout.Author.Name = strings.ToLower(parts[1])
|
|
}
|
|
}
|
|
|
|
shout.PostedAt = parsePopupDate(container.Find("comment-date span.popup_date").First())
|
|
if shout.PostedAt.IsZero() {
|
|
shout.PostedAt = parsePopupDate(container.Find("span.popup_date").First())
|
|
}
|
|
|
|
body := container.Find("comment-user-text").First()
|
|
if body.Length() == 0 {
|
|
body = container.Find(".comment_text, .comment-user-text").First()
|
|
}
|
|
shout.BodyHTML = htmlOf(body)
|
|
|
|
if shout.Author.DisplayName != "" || shout.BodyHTML != "" {
|
|
u.Shouts = append(u.Shouts, shout)
|
|
}
|
|
})
|
|
|
|
// Watch state: the header carries a Watch/Unwatch button when the viewer
|
|
// is logged in and looking at another user's page. An "/unwatch/" link
|
|
// means the viewer currently watches this user.
|
|
if _, unwatch := findWatchLinks(doc, u.Name); unwatch != "" {
|
|
u.Watched = true
|
|
}
|
|
|
|
// Site banner: the <site-banner> element in the page header holds either
|
|
// the artist's own banner (URL under /art/<name>/, uploaded via
|
|
// /controls/profilebanner/) or when none is set FA's site-wide promo
|
|
// banner (URL under /media/banners/).
|
|
if banner := doc.Find("site-banner img").First(); banner.Length() > 0 {
|
|
src := urls.AbsoluteCDN(trimAttr(banner, "src"))
|
|
if src != "" {
|
|
u.SiteBanner = &SiteBanner{
|
|
ImageURL: src,
|
|
IsCustom: strings.Contains(src, "/art/"+u.Name+"/"),
|
|
}
|
|
}
|
|
}
|
|
|
|
return u, nil
|
|
}
|
|
|
|
// firstNonEmptySel returns the first selection matching any of the selectors,
|
|
// or nil if none match. Useful for parser code that needs to tolerate
|
|
// alternate beta-theme markup.
|
|
func firstNonEmptySel(doc *goquery.Document, selectors ...string) *goquery.Selection {
|
|
for _, sel := range selectors {
|
|
s := doc.Find(sel).First()
|
|
if s.Length() > 0 {
|
|
return s
|
|
}
|
|
}
|
|
return nil
|
|
}
|