package fa import ( "strings" "github.com/PuerkitoBio/goquery" ) // classifySystemMessage looks at a fetched document and, when it is one of // FA's gate/error templates rather than a real content page, maps it to the // most specific sentinel error. Returns nil for normal content pages so // parsing continues. // // FA emits at least two distinct gate templates: // // - System Error generic "not found / no permission / // rate limited" page with the human message in a section-body. // - Login Required … auth-gated content (some submissions // are not viewable anonymously). Always maps to ErrUnauthorized. // // We classify by first to pick a template, then by message text // within the section-body when needed. Anything else is left for the // downstream parser to handle. func classifySystemMessage(doc *goquery.Document) error { pageTitle := strings.ToLower(strings.TrimSpace(doc.Find("title").First().Text())) switch { case strings.HasPrefix(pageTitle, "login required"): return ErrUnauthorized case pageTitle == "system error", strings.HasPrefix(pageTitle, "system error"): // fall through to body classification below default: return nil } headerTitle := trimText(doc.Find("section .section-header h2").First()) if headerTitle == "" { headerTitle = trimText(doc.Find("h2").First()) } body := strings.TrimSpace(doc.Find("section .section-body").First().Text()) if body == "" { body = strings.TrimSpace(doc.Find(".section-body").First().Text()) } low := strings.ToLower(headerTitle + " " + body) switch { case strings.Contains(low, "not in our database"), strings.Contains(low, "submission not found"), strings.Contains(low, "user not found"), strings.Contains(low, "journal not found"), strings.Contains(low, "page not found"), strings.Contains(low, "no such"): return ErrNotFound case strings.Contains(low, "you must be logged in"), strings.Contains(low, "log in to view"), strings.Contains(low, "permission to view"), strings.Contains(low, "permission to access"): return ErrUnauthorized case strings.Contains(low, "rate limit"), strings.Contains(low, "too many requests"): return ErrRateLimited } return &SystemMessageError{Title: headerTitle, Body: body} }