package fa import ( "context" "iter" "strconv" "strings" "time" "github.com/PuerkitoBio/goquery" "git.anthrove.art/public/go-fa-api/internal/urls" ) // SubmissionInbox iterates the new-submission inbox at /msg/submissions/ — // the feed of submissions posted since you last cleared the inbox by users // you watch. Requires a logged-in client; anonymous calls hit the login // gate and surface as [ErrUnauthorized]. // // Each yielded *Submission carries ID, Title, Author, ThumbURL, Rating, // and PostedAt (derived from the date-divider's data-date timestamp). // Items are yielded in document order newest first, grouped by day. // // Pagination follows FA's cursor scheme (the "Next 72" link encodes // "submissions newer than ID X, 72 per page" in its href). When FA serves // a full page but omits that link, the iterator synthesizes the next // cursor from the oldest submission on the page so a large inbox is not // truncated to its first page. Iteration stops once a page yields no new // submissions, or returns fewer than a full page with no cursor link. // // Use [ListOptions.MaxPages] to bound the crawl; the inbox can hold // hundreds of pending items if you watch many active artists. // // ListOptions.StartPage is ignored the inbox is cursor-paginated by // FA (the "Next 72" link encodes a from-id), not page-numbered, so there // is nothing meaningful to start from. func (c *Client) SubmissionInbox(ctx context.Context, opts ListOptions, reqOpts ...Option) iter.Seq2[*Submission, error] { return func(yield func(*Submission, error) bool) { nextURL := urls.MsgSubmissions() pagesFetched := 0 visited := make(map[string]bool) seen := make(map[SubmissionID]bool) for nextURL != "" { if opts.reachedLimit(pagesFetched) { return } // Loop guard: FA (or a synthesized cursor) can point back at a // page already crawled; stop rather than spin forever. if visited[nextURL] { return } visited[nextURL] = true var ( items []*Submission next string ) err := c.fetch(ctx, nextURL, func(doc *goquery.Document) error { items, next = parseSubmissionInboxPage(doc, c.cfg.jsonListings) return nil }, reqOpts...) if err != nil { yield(nil, err) return } pagesFetched++ newCount := 0 minID := SubmissionID(0) for _, s := range items { if minID == 0 || s.ID < minID { minID = s.ID } if seen[s.ID] { continue } seen[s.ID] = true newCount++ if !yield(s, nil) { return } } // A page that adds nothing new is the natural end of the crawl. if newCount == 0 { return } // FA renders a "Next 72" cursor link on every page that has a // successor but it can omit it even when the inbox holds more. // When the page came back full, trust the item count over the // missing link and synthesize the cursor from the oldest id. if next == "" { if len(items) >= urls.InboxPageSize && minID > 0 { next = urls.MsgSubmissionsCursor(int64(minID)) } else { return } } nextURL = next } } } // parseSubmissionInboxPage walks /msg/submissions/ (or one of its cursor- // paginated variants), returning each yielded submission and the absolute // URL of the "Next 72" cursor page, or "" if there's no further page. // // Inbox items are grouped under
wrappers; the parser lifts the group timestamp // onto each contained submission's PostedAt so callers don't have to // re-derive it. // // useJSON controls the experimental JSON-first merge see parseGalleryPage. func parseSubmissionInboxPage(doc *goquery.Document, useJSON bool) (items []*Submission, nextURL string) { var jsonData listingJSONMap if useJSON { jsonData = readListingJSON(doc) } doc.Find("#messagecenter-submissions div.notifications-by-date").Each(func(_ int, group *goquery.Selection) { groupTime := groupDate(group) group.Find("figure[id^=sid-]").Each(func(_ int, sel *goquery.Selection) { s := parseGalleryFigure(sel, jsonData) if s == nil { return } if s.PostedAt.IsZero() && !groupTime.IsZero() { s.PostedAt = groupTime } items = append(items, s) }) }) if len(items) == 0 { doc.Find("#messagecenter-submissions figure[id^=sid-]").Each(func(_ int, sel *goquery.Selection) { if s := parseGalleryFigure(sel, jsonData); s != nil { items = append(items, s) } }) } // Last resort: a cursor page may drop the #messagecenter-submissions // wrapper entirely. /msg/submissions/ carries no figures other than the // inbox gallery, so a document-wide sweep is safe here. if len(items) == 0 { doc.Find("figure[id^=sid-]").Each(func(_ int, sel *goquery.Selection) { if s := parseGalleryFigure(sel, jsonData); s != nil { items = append(items, s) } }) } if next := doc.Find("div.messagecenter-navigation a.button.more").First(); next.Length() > 0 { href, _ := next.Attr("href") nextURL = urls.AbsoluteCDN(href) } return items, nextURL } // groupDate reads the unix timestamp from a notifications-by-date wrapper's // data-date attribute. Returns zero time when missing/unparseable. func groupDate(group *goquery.Selection) time.Time { v := strings.TrimSpace(trimAttr(group, "data-date")) if v == "" { return time.Time{} } secs, err := strconv.ParseInt(v, 10, 64) if err != nil || secs <= 0 { return time.Time{} } return time.Unix(secs, 0).UTC() }