Files
go-fa-api/notes_parser.go
2026-05-25 22:27:18 +02:00

134 lines
4.9 KiB
Go

package fa
import (
"fmt"
"strings"
"github.com/PuerkitoBio/goquery"
"git.anthrove.art/public/go-fa-api/internal/urls"
)
// parseNotesInboxPage parses one page of the /msg/pms/ inbox listing,
// returning the previews and the absolute URL of the next page (or "" if
// there's no further page).
//
// Beta theme renders each thread as a <div class="c-noteListItem"> with:
// - subject link <a class="notelink"> whose href encodes the note id
// (/msg/pms/{folder}/{noteID}/#message)
// - sender block <div class="note-list-sender"> with a c-usernameBlock
// - send date <div class="note-list-senddate"> with a popup_date span
// - read-state class "note-read" / "note-unread" on the subject link
func parseNotesInboxPage(doc *goquery.Document) (items []*NotePreview, nextURL string) {
doc.Find("div#notes-list div.c-noteListItem").Each(func(_ int, item *goquery.Selection) {
np := parseNoteListItem(item)
if np != nil {
items = append(items, np)
}
})
// Pagination control on the message center.
if next := doc.Find("div.messagecenter-navigation a.button.more").First(); next.Length() > 0 {
href, _ := next.Attr("href")
nextURL = urls.AbsoluteCDN(href)
}
return items, nextURL
}
// parseNoteListItem lifts one <div class="c-noteListItem"> row.
func parseNoteListItem(item *goquery.Selection) *NotePreview {
subjectLink := item.Find("a.notelink").First()
if subjectLink.Length() == 0 {
return nil
}
href, _ := subjectLink.Attr("href")
np := &NotePreview{
Subject: trimText(subjectLink.Find(".c-noteListItem__subject").First()),
ThreadURL: urls.AbsoluteCDN(href),
}
if np.Subject == "" {
np.Subject = trimText(subjectLink)
}
// Note ID lives in the href: /msg/pms/{folder}/{id}/#message. Strip the
// fragment first so extractIntFromHref picks the trailing numeric path.
if i := strings.Index(href, "#"); i != -1 {
href = href[:i]
}
np.ID = NoteID(extractIntFromHref(href))
// Read/unread: classes on the subject link.
if class, _ := subjectLink.Attr("class"); strings.Contains(class, "note-unread") || strings.Contains(class, "unread") && !strings.Contains(class, "note-read") {
np.Unread = true
}
senderBox := item.Find("div.note-list-sender")
np.Sender = userRefFromUsernameBlock(senderBox)
// FA marks notes from removed accounts with <span class="user-name-deleted">.
// In that case there is no usernameBlock and Name stays empty by design;
// surface the visible "[deleted]" string in DisplayName so callers can
// distinguish "no sender info" from "sender's account is gone".
if np.Sender.Name == "" && np.Sender.DisplayName == "" {
if deleted := senderBox.Find("span.user-name-deleted").First(); deleted.Length() > 0 {
np.Sender = UserRef{DisplayName: trimText(deleted)}
}
}
np.SentAt = parsePopupDate(item.Find("div.note-list-senddate span.popup_date").First())
return np
}
// parseNote lifts a single private-message thread from /viewmessage/{id}/.
//
// FA renders the note inside a <section> whose section-header contains the
// avatar, subject (<h2>), sender block, sent date, and recipient block.
// The body lives in the following <div class="section-body"> wrapped in a
// .user-submitted-links div.
func parseNote(id NoteID, doc *goquery.Document) (*Note, error) {
n := &Note{ID: id}
header := doc.Find("div.message-center-note-information.addresses").First()
if header.Length() == 0 {
// Older / alternative layout: try the parent block.
header = doc.Find("div.message-center-note-information").First()
}
n.Subject = trimText(header.Find("h2").First())
if n.Subject == "" {
return nil, fmt.Errorf("%w: note %d: missing subject", ErrParse, id)
}
// Sender + recipient: the first and second c-usernameBlock inside the
// header, in document order (FA writes "Sent by … To …" sequentially).
blocks := header.Find("div.c-usernameBlock")
if blocks.Length() >= 1 {
n.From = userRefFromUsernameBlock(blocks.Eq(0))
}
if blocks.Length() >= 2 {
n.To = userRefFromUsernameBlock(blocks.Eq(1))
}
// Avatar lives in a sibling div within the surrounding container.
avatarSrc := trimAttr(doc.Find("div.message-center-note-information.avatar img.avatar").First(), "src")
if avatarSrc == "" {
avatarSrc = trimAttr(doc.Find("div.message-center-note-information img").First(), "src")
}
if avatarSrc != "" && n.From.AvatarURL == "" {
n.From.AvatarURL = urls.AbsoluteCDN(avatarSrc)
}
n.SentAt = parsePopupDate(header.Find("span.popup_date").First())
// Body. FA wraps it in section .section-body > .user-submitted-links and
// occasionally prepends a scam-warning div which we strip from the
// plaintext convenience field but leave intact in the raw HTML.
body := doc.Find("section div.section-body div.user-submitted-links").First()
if body.Length() == 0 {
body = doc.Find("section div.section-body").First()
}
n.BodyHTML = htmlOf(body)
bodyTextSel := body.Clone()
bodyTextSel.Find(".noteWarningMessage").Remove()
n.BodyText = strings.TrimSpace(bodyTextSel.Text())
return n, nil
}