package fa import ( "fmt" "strings" "github.com/PuerkitoBio/goquery" "git.anthrove.art/public/go-fa-api/internal/urls" ) // parseUser pulls a [User] out of /user/{name}/. // // FA's profile page has many optional sections; parser treats the headline // (display name + avatar) as required, everything else as best-effort. func parseUser(name string, doc *goquery.Document) (*User, error) { u := &User{UserRef: UserRef{Name: strings.ToLower(strings.TrimSpace(name))}} // Headline username + avatar. header := doc.Find("userpage-nav-user-details, div.userpage-nav-user-details, div.username").First() if header.Length() == 0 { header = doc.Find("h1.username, h2.username").First() } u.DisplayName = firstNonEmpty( // Scope the display name to the profile header first an unscoped // .c-usernameBlock__displayName also matches the logged-in viewer's // block elsewhere on the page. trimText(doc.Find("userpage-nav-user-details .js-displayName").First()), trimText(doc.Find("userpage-nav-user-details .c-usernameBlock__displayName").First()), trimText(doc.Find(".username h2 span").First()), trimText(doc.Find(".username h1").First()), trimText(doc.Find(".c-usernameBlock__displayName").First()), trimText(doc.Find(".c-usernameBlockSimple__displayName").First()), trimText(header), u.Name, ) // The profile owner's avatar lives in the header // element. It must be scoped there: img.avatar / img.loggedin_user_avatar // in the site navigation belong to the logged-in viewer, and an unscoped // selector picks the viewer's avatar on every logged-in page load. u.AvatarURL = urls.AbsoluteCDN(firstNonEmpty( trimAttr(doc.Find("userpage-nav-avatar img").First(), "src"), trimAttr(doc.Find("div.userpage-nav-avatar img").First(), "src"), trimAttr(doc.Find("img.user-nav-avatar").First(), "src"), )) if u.DisplayName == "" { return nil, fmt.Errorf("%w: user %q: missing display name", ErrParse, name) } // "Title" headline shown under the username. u.Title = firstNonEmpty( trimText(doc.Find(".userpage-flex-item.username .font-small").First()), trimText(doc.Find(".user-nav-user-details .c-usernameBlock__subtitle").First()), ) // Registered-on date appears in profile-meta or in a span.popup_date. doc.Find("span.popup_date").EachWithBreak(func(_ int, sel *goquery.Selection) bool { raw := firstNonEmpty(trimAttr(sel, "title"), trimText(sel)) if t, err := ParseFADate(raw); err == nil { u.Joined = t return false } return true }) // Profile bio: large HTML block on the left column. bio := firstNonEmptySel(doc, "div.userpage-profile", "div.profile-page-body", "div.profile-description", ) if bio != nil { u.BioHTML = htmlOf(bio) u.BioText = strings.TrimSpace(bio.Text()) } // Stats: the "Stats" box in the right column is a flat run of // Label: value
// pairs inside one or more
. The value is the bare // text node that immediately follows each highlight span. doc.Find("div.userpage-section-right div.cell").Each(func(_ int, cell *goquery.Selection) { nodes := cell.Contents() nodes.Each(func(i int, node *goquery.Selection) { if !node.Is("span.highlight") { return } label := strings.ToLower(strings.TrimRight(trimText(node), ":")) val := parseStatNumber(nodes.Eq(i + 1).Text()) switch label { case "submissions": u.Stats.Submissions = val case "favs", "favorites": u.Stats.Favorites = val case "views", "page visits": u.Stats.Views = val case "comments earned", "comments": u.Stats.Comments = val case "journals": u.Stats.Journals = val } }) }) // Watcher / watching counts are NOT in the stats box FA renders them // in the "Recent Watchers" / "Recently Watched" section headers as // "View List (Watched by N)" / "View List (Watching N)". u.Stats.Watchers = parseStatNumber(trimText(doc.Find("section.watched-by-block .section-header a").First())) u.Stats.Watching = parseStatNumber(trimText(doc.Find("section.is-watching-block .section-header a").First())) // Contact information rows. doc.Find("div.user-contact-user-info, .userpage-contact-information li").Each(func(_ int, sel *goquery.Selection) { site := trimText(sel.Find("span.user-contact-item-name, .contact-site").First()) linkSel := sel.Find("a").First() handle := trimText(linkSel) if handle == "" { handle = strings.TrimSpace(sel.Text()) } href, _ := linkSel.Attr("href") if site != "" || handle != "" { u.Contacts = append(u.Contacts, UserContact{ Site: site, Handle: handle, URL: urls.AbsoluteCDN(href), }) } }) // Featured submission: small preview thumbnail on the profile. if feat := doc.Find("div.userpage-featured-submission a, section.userpage-section-right figure a").First(); feat.Length() > 0 { href, _ := feat.Attr("href") if id := extractIntFromHref(href); id > 0 { u.FeaturedSub = &SubmissionRef{ ID: SubmissionID(id), Title: trimAttr(feat, "title"), ThumbURL: urls.AbsoluteCDN(trimAttr(feat.Find("img").First(), "src")), } } } // Shouts: anchored by inside a //
(underscore). Beta uses custom HTML5 // elements /// // within that wrapper goquery matches them by tag. doc.Find("a[id^='shout-']").Each(func(_ int, anchor *goquery.Selection) { container := anchor.ParentsFiltered("div.comment_container").First() if container.Length() == 0 { // Fallback for legacy markup where the anchor sits as a sibling // of a table or comment-container directly. container = anchor.Parent() } shout := Shout{} authorLink := container.Find("a.c-usernameBlock__displayName").First() if authorLink.Length() > 0 { href, _ := authorLink.Attr("href") shout.Author = UserRef{ DisplayName: trimText(authorLink.Find("span.js-displayName").First()), AvatarURL: urls.AbsoluteCDN(trimAttr(container.Find("img.comment_useravatar").First(), "src")), } if shout.Author.DisplayName == "" { shout.Author.DisplayName = trimText(authorLink) } if parts := strings.Split(strings.Trim(href, "/"), "/"); len(parts) >= 2 { shout.Author.Name = strings.ToLower(parts[1]) } } shout.PostedAt = parsePopupDate(container.Find("comment-date span.popup_date").First()) if shout.PostedAt.IsZero() { shout.PostedAt = parsePopupDate(container.Find("span.popup_date").First()) } body := container.Find("comment-user-text").First() if body.Length() == 0 { body = container.Find(".comment_text, .comment-user-text").First() } shout.BodyHTML = htmlOf(body) if shout.Author.DisplayName != "" || shout.BodyHTML != "" { u.Shouts = append(u.Shouts, shout) } }) // Watch state: the header carries a Watch/Unwatch button when the viewer // is logged in and looking at another user's page. An "/unwatch/" link // means the viewer currently watches this user. if _, unwatch := findWatchLinks(doc, u.Name); unwatch != "" { u.Watched = true } // Site banner: the element in the page header holds either // the artist's own banner (URL under /art//, uploaded via // /controls/profilebanner/) or when none is set FA's site-wide promo // banner (URL under /media/banners/). if banner := doc.Find("site-banner img").First(); banner.Length() > 0 { src := urls.AbsoluteCDN(trimAttr(banner, "src")) if src != "" { u.SiteBanner = &SiteBanner{ ImageURL: src, IsCustom: strings.Contains(src, "/art/"+u.Name+"/"), } } } return u, nil } // firstNonEmptySel returns the first selection matching any of the selectors, // or nil if none match. Useful for parser code that needs to tolerate // alternate beta-theme markup. func firstNonEmptySel(doc *goquery.Document, selectors ...string) *goquery.Selection { for _, sel := range selectors { s := doc.Find(sel).First() if s.Length() > 0 { return s } } return nil }