97 lines
3.1 KiB
Go
97 lines
3.1 KiB
Go
package fa
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"git.anthrove.art/public/go-fa-api/internal/urls"
|
|
)
|
|
|
|
// parseJournal lifts a [Journal] from a /journal/{id}/ document. FA renders
|
|
// the journal view inside the author's profile shell, so the author is
|
|
// derived from the userpage nav rather than from any byline in the journal
|
|
// body itself.
|
|
func parseJournal(id JournalID, doc *goquery.Document) (*Journal, error) {
|
|
j := &Journal{ID: id}
|
|
|
|
// Title.
|
|
j.Title = trimText(doc.Find("#c-journalTitleTop__subject h3").First())
|
|
if j.Title == "" {
|
|
j.Title = firstNonEmpty(
|
|
trimText(doc.Find("h2.journal-title").First()),
|
|
trimText(doc.Find("h3.journal-title").First()),
|
|
)
|
|
}
|
|
if j.Title == "" {
|
|
return nil, fmt.Errorf("%w: journal %d: missing title", ErrParse, id)
|
|
}
|
|
|
|
// Author from the userpage nav at the top of the rendered page.
|
|
authorLink := doc.Find("a.c-usernameBlock__displayName[href^='/user/']").First()
|
|
if authorLink.Length() > 0 {
|
|
href, _ := authorLink.Attr("href")
|
|
j.Author = UserRef{
|
|
DisplayName: trimText(authorLink.Find("span.js-displayName").First()),
|
|
AvatarURL: urls.AbsoluteCDN(trimAttr(doc.Find("img.user-nav-avatar").First(), "src")),
|
|
}
|
|
if j.Author.DisplayName == "" {
|
|
j.Author.DisplayName = trimText(authorLink)
|
|
}
|
|
if parts := strings.Split(strings.Trim(href, "/"), "/"); len(parts) >= 2 {
|
|
j.Author.Name = strings.ToLower(parts[1])
|
|
}
|
|
}
|
|
|
|
// Date from the journal title block.
|
|
j.PostedAt = parsePopupDate(doc.Find("#c-journalTitleTop__date span.popup_date").First())
|
|
if j.PostedAt.IsZero() {
|
|
j.PostedAt = parsePopupDate(doc.Find("span.popup_date").First())
|
|
}
|
|
|
|
// Body.
|
|
body := firstNonEmptySel(doc,
|
|
"div.section-body.journal-body-theme div.journal-content",
|
|
"div.journal-content",
|
|
"div.journal-body",
|
|
"section.journal-body",
|
|
)
|
|
if body != nil {
|
|
j.BodyHTML = htmlOf(body)
|
|
j.BodyText = strings.TrimSpace(body.Text())
|
|
}
|
|
|
|
return j, nil
|
|
}
|
|
|
|
// parseUserJournalsPage parses a /journals/{user}/[page]/ listing page,
|
|
// returning the journal entries it contains and whether a next page exists.
|
|
//
|
|
// FA renders each entry inside the listing differently from the standalone
|
|
// journal page; selectors here target the listing's tile structure.
|
|
func parseUserJournalsPage(doc *goquery.Document) (entries []*Journal, hasNext bool) {
|
|
doc.Find("section.journal, section[id^=jid], div.journal[id^=jid]").Each(func(_ int, sel *goquery.Selection) {
|
|
j := &Journal{}
|
|
idAttr, _ := sel.Attr("id")
|
|
idAttr = strings.TrimPrefix(idAttr, "jid:")
|
|
idAttr = strings.TrimPrefix(idAttr, "journal-")
|
|
if n, err := parseID[JournalID](strings.TrimSpace(idAttr)); err == nil {
|
|
j.ID = n
|
|
}
|
|
j.Title = firstNonEmpty(
|
|
trimText(sel.Find("h2 a, h3 a").First()),
|
|
trimText(sel.Find("h2, h3").First()),
|
|
)
|
|
j.PostedAt = parsePopupDate(sel.Find("span.popup_date").First())
|
|
body := sel.Find("div.journal-body, div.journal-content").First()
|
|
j.BodyHTML = htmlOf(body)
|
|
j.BodyText = strings.TrimSpace(body.Text())
|
|
if j.ID != 0 || j.Title != "" {
|
|
entries = append(entries, j)
|
|
}
|
|
})
|
|
hasNext = detectNextPage(doc)
|
|
return entries, hasNext
|
|
}
|