Files
go-fa-api/journal_parser.go
2026-05-25 22:27:18 +02:00

97 lines
3.1 KiB
Go

package fa
import (
"fmt"
"strings"
"github.com/PuerkitoBio/goquery"
"git.anthrove.art/public/go-fa-api/internal/urls"
)
// parseJournal lifts a [Journal] from a /journal/{id}/ document. FA renders
// the journal view inside the author's profile shell, so the author is
// derived from the userpage nav rather than from any byline in the journal
// body itself.
func parseJournal(id JournalID, doc *goquery.Document) (*Journal, error) {
j := &Journal{ID: id}
// Title.
j.Title = trimText(doc.Find("#c-journalTitleTop__subject h3").First())
if j.Title == "" {
j.Title = firstNonEmpty(
trimText(doc.Find("h2.journal-title").First()),
trimText(doc.Find("h3.journal-title").First()),
)
}
if j.Title == "" {
return nil, fmt.Errorf("%w: journal %d: missing title", ErrParse, id)
}
// Author from the userpage nav at the top of the rendered page.
authorLink := doc.Find("a.c-usernameBlock__displayName[href^='/user/']").First()
if authorLink.Length() > 0 {
href, _ := authorLink.Attr("href")
j.Author = UserRef{
DisplayName: trimText(authorLink.Find("span.js-displayName").First()),
AvatarURL: urls.AbsoluteCDN(trimAttr(doc.Find("img.user-nav-avatar").First(), "src")),
}
if j.Author.DisplayName == "" {
j.Author.DisplayName = trimText(authorLink)
}
if parts := strings.Split(strings.Trim(href, "/"), "/"); len(parts) >= 2 {
j.Author.Name = strings.ToLower(parts[1])
}
}
// Date from the journal title block.
j.PostedAt = parsePopupDate(doc.Find("#c-journalTitleTop__date span.popup_date").First())
if j.PostedAt.IsZero() {
j.PostedAt = parsePopupDate(doc.Find("span.popup_date").First())
}
// Body.
body := firstNonEmptySel(doc,
"div.section-body.journal-body-theme div.journal-content",
"div.journal-content",
"div.journal-body",
"section.journal-body",
)
if body != nil {
j.BodyHTML = htmlOf(body)
j.BodyText = strings.TrimSpace(body.Text())
}
return j, nil
}
// parseUserJournalsPage parses a /journals/{user}/[page]/ listing page,
// returning the journal entries it contains and whether a next page exists.
//
// FA renders each entry inside the listing differently from the standalone
// journal page; selectors here target the listing's tile structure.
func parseUserJournalsPage(doc *goquery.Document) (entries []*Journal, hasNext bool) {
doc.Find("section.journal, section[id^=jid], div.journal[id^=jid]").Each(func(_ int, sel *goquery.Selection) {
j := &Journal{}
idAttr, _ := sel.Attr("id")
idAttr = strings.TrimPrefix(idAttr, "jid:")
idAttr = strings.TrimPrefix(idAttr, "journal-")
if n, err := parseID[JournalID](strings.TrimSpace(idAttr)); err == nil {
j.ID = n
}
j.Title = firstNonEmpty(
trimText(sel.Find("h2 a, h3 a").First()),
trimText(sel.Find("h2, h3").First()),
)
j.PostedAt = parsePopupDate(sel.Find("span.popup_date").First())
body := sel.Find("div.journal-body, div.journal-content").First()
j.BodyHTML = htmlOf(body)
j.BodyText = strings.TrimSpace(body.Text())
if j.ID != 0 || j.Title != "" {
entries = append(entries, j)
}
})
hasNext = detectNextPage(doc)
return entries, hasNext
}