inital commit
This commit is contained in:
133
notes_parser.go
Normal file
133
notes_parser.go
Normal file
@@ -0,0 +1,133 @@
|
||||
package fa
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
|
||||
"git.anthrove.art/public/go-fa-api/internal/urls"
|
||||
)
|
||||
|
||||
// parseNotesInboxPage parses one page of the /msg/pms/ inbox listing,
|
||||
// returning the previews and the absolute URL of the next page (or "" if
|
||||
// there's no further page).
|
||||
//
|
||||
// Beta theme renders each thread as a <div class="c-noteListItem"> with:
|
||||
// - subject link <a class="notelink"> whose href encodes the note id
|
||||
// (/msg/pms/{folder}/{noteID}/#message)
|
||||
// - sender block <div class="note-list-sender"> with a c-usernameBlock
|
||||
// - send date <div class="note-list-senddate"> with a popup_date span
|
||||
// - read-state class "note-read" / "note-unread" on the subject link
|
||||
func parseNotesInboxPage(doc *goquery.Document) (items []*NotePreview, nextURL string) {
|
||||
doc.Find("div#notes-list div.c-noteListItem").Each(func(_ int, item *goquery.Selection) {
|
||||
np := parseNoteListItem(item)
|
||||
if np != nil {
|
||||
items = append(items, np)
|
||||
}
|
||||
})
|
||||
|
||||
// Pagination control on the message center.
|
||||
if next := doc.Find("div.messagecenter-navigation a.button.more").First(); next.Length() > 0 {
|
||||
href, _ := next.Attr("href")
|
||||
nextURL = urls.AbsoluteCDN(href)
|
||||
}
|
||||
return items, nextURL
|
||||
}
|
||||
|
||||
// parseNoteListItem lifts one <div class="c-noteListItem"> row.
|
||||
func parseNoteListItem(item *goquery.Selection) *NotePreview {
|
||||
subjectLink := item.Find("a.notelink").First()
|
||||
if subjectLink.Length() == 0 {
|
||||
return nil
|
||||
}
|
||||
href, _ := subjectLink.Attr("href")
|
||||
|
||||
np := &NotePreview{
|
||||
Subject: trimText(subjectLink.Find(".c-noteListItem__subject").First()),
|
||||
ThreadURL: urls.AbsoluteCDN(href),
|
||||
}
|
||||
if np.Subject == "" {
|
||||
np.Subject = trimText(subjectLink)
|
||||
}
|
||||
|
||||
// Note ID lives in the href: /msg/pms/{folder}/{id}/#message. Strip the
|
||||
// fragment first so extractIntFromHref picks the trailing numeric path.
|
||||
if i := strings.Index(href, "#"); i != -1 {
|
||||
href = href[:i]
|
||||
}
|
||||
np.ID = NoteID(extractIntFromHref(href))
|
||||
|
||||
// Read/unread: classes on the subject link.
|
||||
if class, _ := subjectLink.Attr("class"); strings.Contains(class, "note-unread") || strings.Contains(class, "unread") && !strings.Contains(class, "note-read") {
|
||||
np.Unread = true
|
||||
}
|
||||
|
||||
senderBox := item.Find("div.note-list-sender")
|
||||
np.Sender = userRefFromUsernameBlock(senderBox)
|
||||
// FA marks notes from removed accounts with <span class="user-name-deleted">.
|
||||
// In that case there is no usernameBlock and Name stays empty by design;
|
||||
// surface the visible "[deleted]" string in DisplayName so callers can
|
||||
// distinguish "no sender info" from "sender's account is gone".
|
||||
if np.Sender.Name == "" && np.Sender.DisplayName == "" {
|
||||
if deleted := senderBox.Find("span.user-name-deleted").First(); deleted.Length() > 0 {
|
||||
np.Sender = UserRef{DisplayName: trimText(deleted)}
|
||||
}
|
||||
}
|
||||
np.SentAt = parsePopupDate(item.Find("div.note-list-senddate span.popup_date").First())
|
||||
return np
|
||||
}
|
||||
|
||||
// parseNote lifts a single private-message thread from /viewmessage/{id}/.
|
||||
//
|
||||
// FA renders the note inside a <section> whose section-header contains the
|
||||
// avatar, subject (<h2>), sender block, sent date, and recipient block.
|
||||
// The body lives in the following <div class="section-body"> wrapped in a
|
||||
// .user-submitted-links div.
|
||||
func parseNote(id NoteID, doc *goquery.Document) (*Note, error) {
|
||||
n := &Note{ID: id}
|
||||
|
||||
header := doc.Find("div.message-center-note-information.addresses").First()
|
||||
if header.Length() == 0 {
|
||||
// Older / alternative layout: try the parent block.
|
||||
header = doc.Find("div.message-center-note-information").First()
|
||||
}
|
||||
|
||||
n.Subject = trimText(header.Find("h2").First())
|
||||
if n.Subject == "" {
|
||||
return nil, fmt.Errorf("%w: note %d: missing subject", ErrParse, id)
|
||||
}
|
||||
|
||||
// Sender + recipient: the first and second c-usernameBlock inside the
|
||||
// header, in document order (FA writes "Sent by … To …" sequentially).
|
||||
blocks := header.Find("div.c-usernameBlock")
|
||||
if blocks.Length() >= 1 {
|
||||
n.From = userRefFromUsernameBlock(blocks.Eq(0))
|
||||
}
|
||||
if blocks.Length() >= 2 {
|
||||
n.To = userRefFromUsernameBlock(blocks.Eq(1))
|
||||
}
|
||||
// Avatar lives in a sibling div within the surrounding container.
|
||||
avatarSrc := trimAttr(doc.Find("div.message-center-note-information.avatar img.avatar").First(), "src")
|
||||
if avatarSrc == "" {
|
||||
avatarSrc = trimAttr(doc.Find("div.message-center-note-information img").First(), "src")
|
||||
}
|
||||
if avatarSrc != "" && n.From.AvatarURL == "" {
|
||||
n.From.AvatarURL = urls.AbsoluteCDN(avatarSrc)
|
||||
}
|
||||
|
||||
n.SentAt = parsePopupDate(header.Find("span.popup_date").First())
|
||||
|
||||
// Body. FA wraps it in section .section-body > .user-submitted-links and
|
||||
// occasionally prepends a scam-warning div which we strip from the
|
||||
// plaintext convenience field but leave intact in the raw HTML.
|
||||
body := doc.Find("section div.section-body div.user-submitted-links").First()
|
||||
if body.Length() == 0 {
|
||||
body = doc.Find("section div.section-body").First()
|
||||
}
|
||||
n.BodyHTML = htmlOf(body)
|
||||
bodyTextSel := body.Clone()
|
||||
bodyTextSel.Find(".noteWarningMessage").Remove()
|
||||
n.BodyText = strings.TrimSpace(bodyTextSel.Text())
|
||||
return n, nil
|
||||
}
|
||||
Reference in New Issue
Block a user