inital commit
This commit is contained in:
96
journal_parser.go
Normal file
96
journal_parser.go
Normal file
@@ -0,0 +1,96 @@
|
||||
package fa
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
|
||||
"git.anthrove.art/public/go-fa-api/internal/urls"
|
||||
)
|
||||
|
||||
// parseJournal lifts a [Journal] from a /journal/{id}/ document. FA renders
|
||||
// the journal view inside the author's profile shell, so the author is
|
||||
// derived from the userpage nav rather than from any byline in the journal
|
||||
// body itself.
|
||||
func parseJournal(id JournalID, doc *goquery.Document) (*Journal, error) {
|
||||
j := &Journal{ID: id}
|
||||
|
||||
// Title.
|
||||
j.Title = trimText(doc.Find("#c-journalTitleTop__subject h3").First())
|
||||
if j.Title == "" {
|
||||
j.Title = firstNonEmpty(
|
||||
trimText(doc.Find("h2.journal-title").First()),
|
||||
trimText(doc.Find("h3.journal-title").First()),
|
||||
)
|
||||
}
|
||||
if j.Title == "" {
|
||||
return nil, fmt.Errorf("%w: journal %d: missing title", ErrParse, id)
|
||||
}
|
||||
|
||||
// Author from the userpage nav at the top of the rendered page.
|
||||
authorLink := doc.Find("a.c-usernameBlock__displayName[href^='/user/']").First()
|
||||
if authorLink.Length() > 0 {
|
||||
href, _ := authorLink.Attr("href")
|
||||
j.Author = UserRef{
|
||||
DisplayName: trimText(authorLink.Find("span.js-displayName").First()),
|
||||
AvatarURL: urls.AbsoluteCDN(trimAttr(doc.Find("img.user-nav-avatar").First(), "src")),
|
||||
}
|
||||
if j.Author.DisplayName == "" {
|
||||
j.Author.DisplayName = trimText(authorLink)
|
||||
}
|
||||
if parts := strings.Split(strings.Trim(href, "/"), "/"); len(parts) >= 2 {
|
||||
j.Author.Name = strings.ToLower(parts[1])
|
||||
}
|
||||
}
|
||||
|
||||
// Date from the journal title block.
|
||||
j.PostedAt = parsePopupDate(doc.Find("#c-journalTitleTop__date span.popup_date").First())
|
||||
if j.PostedAt.IsZero() {
|
||||
j.PostedAt = parsePopupDate(doc.Find("span.popup_date").First())
|
||||
}
|
||||
|
||||
// Body.
|
||||
body := firstNonEmptySel(doc,
|
||||
"div.section-body.journal-body-theme div.journal-content",
|
||||
"div.journal-content",
|
||||
"div.journal-body",
|
||||
"section.journal-body",
|
||||
)
|
||||
if body != nil {
|
||||
j.BodyHTML = htmlOf(body)
|
||||
j.BodyText = strings.TrimSpace(body.Text())
|
||||
}
|
||||
|
||||
return j, nil
|
||||
}
|
||||
|
||||
// parseUserJournalsPage parses a /journals/{user}/[page]/ listing page,
|
||||
// returning the journal entries it contains and whether a next page exists.
|
||||
//
|
||||
// FA renders each entry inside the listing differently from the standalone
|
||||
// journal page; selectors here target the listing's tile structure.
|
||||
func parseUserJournalsPage(doc *goquery.Document) (entries []*Journal, hasNext bool) {
|
||||
doc.Find("section.journal, section[id^=jid], div.journal[id^=jid]").Each(func(_ int, sel *goquery.Selection) {
|
||||
j := &Journal{}
|
||||
idAttr, _ := sel.Attr("id")
|
||||
idAttr = strings.TrimPrefix(idAttr, "jid:")
|
||||
idAttr = strings.TrimPrefix(idAttr, "journal-")
|
||||
if n, err := parseID[JournalID](strings.TrimSpace(idAttr)); err == nil {
|
||||
j.ID = n
|
||||
}
|
||||
j.Title = firstNonEmpty(
|
||||
trimText(sel.Find("h2 a, h3 a").First()),
|
||||
trimText(sel.Find("h2, h3").First()),
|
||||
)
|
||||
j.PostedAt = parsePopupDate(sel.Find("span.popup_date").First())
|
||||
body := sel.Find("div.journal-body, div.journal-content").First()
|
||||
j.BodyHTML = htmlOf(body)
|
||||
j.BodyText = strings.TrimSpace(body.Text())
|
||||
if j.ID != 0 || j.Title != "" {
|
||||
entries = append(entries, j)
|
||||
}
|
||||
})
|
||||
hasNext = detectNextPage(doc)
|
||||
return entries, hasNext
|
||||
}
|
||||
Reference in New Issue
Block a user