aboutsummaryrefslogtreecommitdiff
path: root/cmds/serve.go
diff options
context:
space:
mode:
authorSam Scholten2025-12-15 19:34:17 +1000
committerSam Scholten2025-12-15 19:34:59 +1000
commit9f5978186ac3de07f4325975fecf4f538fe713b6 (patch)
tree41440b703054fe59eb561ba81d80fd60380c1f7a /cmds/serve.go
downloadscholscan-9f5978186ac3de07f4325975fecf4f538fe713b6.tar.gz
scholscan-9f5978186ac3de07f4325975fecf4f538fe713b6.zip
Init v0.1.0
Diffstat (limited to 'cmds/serve.go')
-rw-r--r--cmds/serve.go1010
1 files changed, 1010 insertions, 0 deletions
diff --git a/cmds/serve.go b/cmds/serve.go
new file mode 100644
index 0000000..92aa64c
--- /dev/null
+++ b/cmds/serve.go
@@ -0,0 +1,1010 @@
+// Serve command: HTTP server for web UI and APIs.
+//
+// Two main flows: live-feed (cached + background refresh) and tools (on-demand scoring).
+// Live-feed rescans all configured RSS feeds on a timer (default 24h), caches results,
+// serves filtered articles via web UI and JSON/RSS APIs.
+// Tools provides real-time /score (single title) and /scan (ad-hoc feed) endpoints.
+// Background refresh continues despite individual feed failures; RWMutex allows
+// many concurrent readers with exclusive writer updates.
+// Templates are embedded for single-binary deployment.
+package cmds
+
+import (
+ "bufio"
+ "context"
+ "embed"
+ "encoding/json"
+ "flag"
+ "fmt"
+ "html/template"
+ "io"
+ "log"
+ "net/http"
+ "net/url"
+ "os"
+ "os/signal"
+ "path/filepath"
+ "regexp"
+ "sort"
+ "strings"
+ "sync"
+ "syscall"
+ "time"
+
+ "github.com/PuerkitoBio/goquery"
+ "github.com/mmcdole/gofeed"
+ "scholscan/core"
+)
+
+//go:embed templates/*.html
+var templateFS embed.FS
+
+// ============================================================================
+// ┏━╸┏┳┓╺┳┓ ┏━┓┏┓ ┏┓
+// ┃ ┃┃┃ ┃┃ ┃ ┃┣┻┓ ┃
+// ┗━╸╹ ╹╺┻┛ ┗━┛┗━┛┗━┛
+// ============================================================================
+
+type ServeCommand struct {
+ Port int
+ RSSWorldPath string
+ RefreshInterval string
+ ModelPath string
+ Title string
+
+ // Parsed interval
+ refreshInterval time.Duration
+ // Loaded model (cached)
+ model *core.ModelEnvelope
+ modelMu sync.RWMutex
+ // Cached filtered RSS results and timestamp.
+ // RWMutex allows many concurrent readers (HTTP handlers) with exclusive writer (background refresh).
+ filteredResults []*core.Article
+ filteredResultsTime time.Time
+ resultsMu sync.RWMutex
+ // Loaded templates
+ tmpl *template.Template
+}
+
+func (c *ServeCommand) Name() string { return "serve" }
+
+// Init configures the serve command with robust input validation.
+// Prevents directory traversal, validates paths, and sets sensible defaults.
+// Ensures only one configuration is possible to reduce runtime complexity.
+func (c *ServeCommand) Init(args []string) error {
+ fs := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
+ fs.Usage = func() {
+ fmt.Fprint(fs.Output(), `Usage: scholscan serve [options]
+
+ Start HTTP server for filtered RSS and scoring web UI.
+
+ Flags:
+ `)
+ fs.PrintDefaults()
+ fmt.Fprint(fs.Output(), `
+ Examples:
+ scholscan serve --port 8080 --rss-world rss_world.txt --model model.json
+ scholscan serve --refresh-interval 24h --model ./model.json --rss-world feeds.txt
+ `)
+ }
+
+ fs.IntVar(&c.Port, "port", 8080, "Port to listen on")
+ fs.StringVar(&c.RSSWorldPath, "rss-world", "rss_world.txt", "Path to RSS world file (one feed URL per line)")
+ fs.StringVar(&c.RefreshInterval, "refresh-interval", "24h", "Interval for background rescans (e.g., 24h, 1h)")
+ fs.StringVar(&c.ModelPath, "model", "model.json", "Path to trained model JSON file")
+ fs.StringVar(&c.Title, "title", "", "Custom title for the web interface")
+
+ if err := fs.Parse(args); err != nil {
+ return err
+ }
+
+ if fs.NArg() != 0 {
+ return fmt.Errorf("unexpected arguments provided: %v", fs.Args())
+ }
+
+ // Parse refresh interval
+ interval, err := time.ParseDuration(c.RefreshInterval)
+ if err != nil {
+ return fmt.Errorf("invalid refresh-interval %q: %w", c.RefreshInterval, err)
+ }
+ c.refreshInterval = interval
+
+ if strings.Contains(filepath.Clean(c.RSSWorldPath), "..") {
+ return fmt.Errorf("invalid rss-world path: directory traversal not allowed")
+ }
+ if strings.Contains(filepath.Clean(c.ModelPath), "..") {
+ return fmt.Errorf("invalid model path: directory traversal not allowed")
+ }
+
+ return nil
+}
+
+func (c *ServeCommand) Run(stdin io.Reader, stdout io.Writer) error {
+ log.Printf("Starting scholscan server on port %d", c.Port)
+
+ // Initialize filteredResultsTime to server start time
+ c.resultsMu.Lock()
+ c.filteredResultsTime = time.Now()
+ c.resultsMu.Unlock()
+
+ // Load templates at startup
+ tmpl, err := template.ParseFS(templateFS, "templates/*.html")
+ if err != nil {
+ return fmt.Errorf("failed to parse templates: %w", err)
+ }
+ c.tmpl = tmpl
+ log.Printf("Templates loaded successfully")
+
+ // Load model at startup
+ model, err := c.loadModel()
+ if err != nil {
+ return fmt.Errorf("failed to load model at startup: %w", err)
+ }
+ c.modelMu.Lock()
+ c.model = model
+ c.modelMu.Unlock()
+
+ log.Printf("Model loaded successfully")
+
+ // Start background ticker for periodic refresh
+ ticker := time.NewTicker(c.refreshInterval)
+ go c.backgroundRefresh(ticker)
+
+ // Perform initial scan asynchronously
+ go func() {
+ log.Println("Starting initial feed scan...")
+ if err := c.refreshFilteredResults(); err != nil {
+ log.Printf("Warning: initial scan failed: %v", err)
+ } else {
+ c.resultsMu.RLock()
+ count := len(c.filteredResults)
+ c.resultsMu.RUnlock()
+ log.Printf("Initial scan complete, %d articles filtered", count)
+ }
+ }()
+
+ // Setup HTTP handlers
+ http.HandleFunc("/", c.handleRoot)
+ http.HandleFunc("/live-feed", c.handleLiveFeed)
+ http.HandleFunc("/tools", c.handleTools)
+ http.HandleFunc("/score", c.handleScore)
+ http.HandleFunc("/scan", c.handleScan)
+ http.HandleFunc("/api/filtered/feed", c.handleFilteredFeed)
+ http.HandleFunc("/api/filtered/rss", c.handleFilteredRSS)
+ http.HandleFunc("/api/health", c.handleHealth)
+
+ // Setup server with graceful shutdown
+ server := &http.Server{
+ Addr: fmt.Sprintf(":%d", c.Port),
+ Handler: http.DefaultServeMux,
+ ReadTimeout: core.DefaultReadTimeout,
+ WriteTimeout: core.DefaultWriteTimeout,
+ IdleTimeout: core.DefaultIdleTimeout,
+ }
+
+ // Handle shutdown signals
+ sigChan := make(chan os.Signal, 1)
+ signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
+
+ go func() {
+ <-sigChan
+ log.Println("Shutdown signal received")
+ ticker.Stop()
+ ctx, cancel := context.WithTimeout(context.Background(), core.DefaultShutdownTimeout)
+ defer cancel()
+ if err := server.Shutdown(ctx); err != nil {
+ log.Printf("Server shutdown error: %v", err)
+ }
+ }()
+
+ log.Printf("Server listening on http://localhost:%d", c.Port)
+ if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
+ return fmt.Errorf("server error: %w", err)
+ }
+
+ return nil
+}
+
+// ============================================================================
+// ┏━╸┏━┓┏━┓┏━╸ ╻ ┏━┓┏━╸╻┏━╸
+// ┃ ┃ ┃┣┳┛┣╸ ┃ ┃ ┃┃╺┓┃┃
+// ┗━╸┗━┛╹┗╸┗━╸ ┗━╸┗━┛┗━┛╹┗━╸
+// ============================================================================
+
+func (c *ServeCommand) loadModel() (*core.ModelEnvelope, error) {
+ f, err := os.Open(c.ModelPath)
+ if err != nil {
+ return nil, fmt.Errorf("failed to open model file %s: %w", c.ModelPath, err)
+ }
+ defer f.Close()
+
+ var model core.ModelEnvelope
+ if err := json.NewDecoder(f).Decode(&model); err != nil {
+ return nil, fmt.Errorf("failed to decode model: %w", err)
+ }
+
+ return &model, nil
+}
+
+func (c *ServeCommand) scoreArticle(article *core.Article, vectorizer *core.TFIDFVectorizer, model *core.ModelEnvelope) float64 {
+ docs := []string{strings.TrimSpace(article.Title)}
+ vectors := vectorizer.Transform(docs)
+
+ if len(vectors) == 0 || len(vectors[0]) == 0 {
+ return 0.0
+ }
+
+ score, err := core.PredictScore(vectors[0], model.Weights)
+ if err != nil {
+ // Return 0.0 on error (below threshold). Malformed articles don't break the display,
+ // they just get filtered out. Log the error for diagnostics.
+ log.Printf("Error scoring article: %v", err)
+ return 0.0
+ }
+
+ return score
+}
+
+func (c *ServeCommand) getThreshold(model *core.ModelEnvelope) (float64, error) {
+ if model.Meta != nil {
+ if threshold, ok := model.Meta["recommended_threshold"].(float64); ok {
+ return threshold, nil
+ }
+ }
+ return core.DefaultScoreThreshold, nil
+}
+
+// scoreAndFormatArticles scores a list of articles and returns them formatted for templates.
+// Articles are scored using the model and vectorizer, then returned with human-readable ratings.
+func (c *ServeCommand) scoreAndFormatArticles(articles []*core.Article, vectorizer *core.TFIDFVectorizer, model *core.ModelEnvelope, threshold float64) []map[string]interface{} {
+ type ArticleResponse struct {
+ Title string `json:"title"`
+ URL string `json:"url"`
+ Source string `json:"source,omitempty"`
+ Rating int `json:"rating"`
+ Score float64 `json:"score"`
+ }
+
+ scored := make([]ArticleResponse, 0, len(articles))
+ for _, article := range articles {
+ score := c.scoreArticle(article, vectorizer, model)
+ rating := core.ScoreToScale(score, threshold)
+
+ scored = append(scored, ArticleResponse{
+ Title: article.Title,
+ URL: article.URL,
+ Source: article.Source,
+ Rating: rating,
+ Score: score,
+ })
+ }
+
+ result := make([]map[string]interface{}, len(scored))
+ for i, a := range scored {
+ result[i] = map[string]interface{}{
+ "Title": a.Title,
+ "URL": a.URL,
+ "Source": a.Source,
+ "Rating": a.Rating,
+ "Score": a.Score,
+ }
+ }
+ return result
+}
+
+// ============================================================================
+// ┏━┓┏━┓┏━┓ ┏━┓╺┳╸╻ ╻┏━╸┏━╸
+// ┣┳┛┗━┓┗━┓ ┗━┓ ┃ ┃ ┃┣╸ ┣╸
+// ╹┗╸┗━┛┗━┛ ┗━┛ ╹ ┗━┛╹ ╹
+// ============================================================================
+
+func (c *ServeCommand) readRSSWorldFeeds() ([]string, error) {
+ f, err := os.Open(c.RSSWorldPath)
+ if err != nil {
+ return nil, fmt.Errorf("failed to open rss_world file %s: %w", c.RSSWorldPath, err)
+ }
+ defer f.Close()
+
+ var feeds []string
+ scanner := bufio.NewScanner(f)
+ for scanner.Scan() {
+ line := strings.TrimSpace(scanner.Text())
+ if line != "" && !strings.HasPrefix(line, "#") {
+ feeds = append(feeds, line)
+ }
+ }
+
+ if err := scanner.Err(); err != nil {
+ return nil, fmt.Errorf("error reading rss_world file: %w", err)
+ }
+
+ return feeds, nil
+}
+
+func (c *ServeCommand) refreshFilteredResults() error {
+ feeds, err := c.readRSSWorldFeeds()
+ if err != nil {
+ return err
+ }
+
+ c.modelMu.RLock()
+ model := c.model
+ c.modelMu.RUnlock()
+
+ if model == nil {
+ return fmt.Errorf("model not loaded")
+ }
+
+ // Scan all feeds. Continue on individual feed failures to maximize results.
+ // RSS feeds are often flaky; one down shouldn't prevent others from being processed.
+ var allArticles []*core.Article
+ for _, feed := range feeds {
+ articles, err := c.fetchRSSFeed(feed)
+ if err != nil {
+ log.Printf("Warning: failed to fetch feed %s: %v", feed, err)
+ continue
+ }
+ allArticles = append(allArticles, articles...)
+ }
+
+ // Score and filter articles
+ threshold, err := c.getThreshold(model)
+ if err != nil {
+ return err
+ }
+
+ vectorizer := core.CreateVectorizerFromModel(model)
+
+ filtered := make([]*core.Article, 0, len(allArticles))
+ for _, article := range allArticles {
+ score := c.scoreArticle(article, vectorizer, model)
+ if score >= threshold {
+ // Create a copy with score to avoid reference issues
+ articleCopy := *article
+ articleCopy.Score = &score
+ filtered = append(filtered, &articleCopy)
+ }
+ }
+
+ c.resultsMu.Lock()
+ c.filteredResults = filtered
+ c.filteredResultsTime = time.Now()
+ c.resultsMu.Unlock()
+
+ return nil
+}
+
+// backgroundRefresh runs in a goroutine, rescanning all RSS feeds on interval.
+// Failures in individual feeds don't affect others - we log and continue.
+func (c *ServeCommand) backgroundRefresh(ticker *time.Ticker) {
+ for range ticker.C {
+ log.Println("Background refresh started")
+ if err := c.refreshFilteredResults(); err != nil {
+ log.Printf("Background refresh error (continuing): %v", err)
+ } else {
+ c.resultsMu.RLock()
+ count := len(c.filteredResults)
+ c.resultsMu.RUnlock()
+ log.Printf("Background refresh complete, %d articles filtered", count)
+ }
+ }
+}
+
+func (c *ServeCommand) fetchRSSFeed(url string) ([]*core.Article, error) {
+ client := &http.Client{Timeout: core.DefaultHTTPTimeout}
+
+ req, err := http.NewRequest("GET", url, nil)
+ if err != nil {
+ return nil, fmt.Errorf("error building request: %w", err)
+ }
+ req.Header.Set("User-Agent", core.PoliteUserAgent)
+
+ ctx, cancel := context.WithTimeout(context.Background(), core.DefaultHTTPTimeout)
+ defer cancel()
+
+ resp, err := client.Do(req.WithContext(ctx))
+ if err != nil {
+ return nil, fmt.Errorf("error fetching %s: %w", url, err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ return nil, fmt.Errorf("HTTP %d from %s", resp.StatusCode, url)
+ }
+
+ body, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, fmt.Errorf("error reading response from %s: %w", url, err)
+ }
+
+ fp := gofeed.NewParser()
+ feed, err := fp.Parse(strings.NewReader(string(body)))
+ if err != nil {
+ return nil, fmt.Errorf("error parsing feed from %s: %w", url, err)
+ }
+
+ var articles []*core.Article
+ for _, item := range feed.Items {
+ article := &core.Article{
+ URL: item.Link,
+ Title: strings.TrimSpace(item.Title),
+ Source: feed.Title,
+ }
+
+ if item.PublishedParsed != nil {
+ article.PublishedAt = item.PublishedParsed
+ }
+
+ if len(article.Title) >= core.MinTitleLength {
+ articles = append(articles, article)
+ }
+ }
+
+ return articles, nil
+}
+
+// ============================================================================
+// ╻ ╻┏━╸┏┓ ╻ ╻╻
+// ┃╻┃┣╸ ┣┻┓ ┃ ┃┃
+// ┗┻┛┗━╸┗━┛ ┗━┛╹
+// ============================================================================
+
+func (c *ServeCommand) handleRoot(w http.ResponseWriter, r *http.Request) {
+ if r.URL.Path != "/" {
+ http.NotFound(w, r)
+ return
+ }
+
+ // Redirect to live feed
+ http.Redirect(w, r, "/live-feed", http.StatusMovedPermanently)
+}
+
+func (c *ServeCommand) handleLiveFeed(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodGet {
+ http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+ return
+ }
+
+ c.resultsMu.RLock()
+ articles := c.filteredResults
+ resultsTime := c.filteredResultsTime
+ c.resultsMu.RUnlock()
+
+ c.modelMu.RLock()
+ model := c.model
+ c.modelMu.RUnlock()
+
+ if model == nil {
+ http.Error(w, "Model not loaded", http.StatusInternalServerError)
+ return
+ }
+
+ threshold, _ := c.getThreshold(model)
+
+ // Parse filter parameter (day, week, or all)
+ filter := r.URL.Query().Get("filter")
+ if filter == "" {
+ filter = "all"
+ }
+
+ // Filter articles by date if needed
+ now := time.Now()
+ filtered := articles
+ if filter == "day" || filter == "week" {
+ var cutoff time.Time
+ if filter == "day" {
+ cutoff = now.Add(-24 * time.Hour)
+ } else if filter == "week" {
+ cutoff = now.Add(-7 * 24 * time.Hour)
+ }
+
+ filtered = make([]*core.Article, 0, len(articles))
+ for _, article := range articles {
+ // Always include articles without PublishedAt
+ if article.PublishedAt == nil || article.PublishedAt.After(cutoff) {
+ filtered = append(filtered, article)
+ }
+ }
+ }
+
+ // Convert articles to template format
+ type TemplateArticle struct {
+ Title string
+ URL string
+ Source string
+ Rating int
+ Score float64
+ PublishedAt string
+ }
+
+ templateArticles := make([]TemplateArticle, 0, len(filtered))
+ for _, article := range filtered {
+ score := 0.0
+ if article.Score != nil {
+ score = *article.Score
+ }
+ rating := core.ScoreToScale(score, threshold)
+
+ publishedAt := ""
+ if article.PublishedAt != nil {
+ publishedAt = article.PublishedAt.Format("2006-01-02")
+ }
+
+ templateArticles = append(templateArticles, TemplateArticle{
+ Title: article.Title,
+ URL: article.URL,
+ Source: article.Source,
+ Rating: rating,
+ Score: score,
+ PublishedAt: publishedAt,
+ })
+ }
+
+ // Sort articles by score (highest first)
+ sort.Slice(templateArticles, func(i, j int) bool {
+ return templateArticles[i].Score > templateArticles[j].Score
+ })
+
+ data := map[string]interface{}{
+ "Page": "live-feed",
+ "Articles": templateArticles,
+ "Threshold": threshold,
+ "UpdatedAt": resultsTime.Format("2006-01-02 15:04:05"),
+ "Filter": filter,
+ "Title": displayTitle(c.Title),
+ }
+
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+ if err := c.tmpl.ExecuteTemplate(w, "live-feed", data); err != nil {
+ http.Error(w, fmt.Sprintf("Template error: %v", err), http.StatusInternalServerError)
+ }
+}
+
+func (c *ServeCommand) handleTools(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodGet {
+ http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+ return
+ }
+
+ data := map[string]interface{}{
+ "Page": "tools",
+ "Title": displayTitle(c.Title),
+ }
+
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+ if err := c.tmpl.ExecuteTemplate(w, "tools", data); err != nil {
+ http.Error(w, fmt.Sprintf("Template error: %v", err), http.StatusInternalServerError)
+ }
+}
+
+func (c *ServeCommand) handleScore(w http.ResponseWriter, r *http.Request) {
+ if r.Method == http.MethodGet {
+ c.handleTools(w, r)
+ return
+ }
+
+ if r.Method != http.MethodPost {
+ http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+ return
+ }
+
+ c.modelMu.RLock()
+ model := c.model
+ c.modelMu.RUnlock()
+
+ if model == nil {
+ http.Error(w, "Model not loaded", http.StatusInternalServerError)
+ return
+ }
+
+ if err := r.ParseForm(); err != nil {
+ http.Error(w, fmt.Sprintf("Failed to parse form: %v", err), http.StatusBadRequest)
+ return
+ }
+
+ title := strings.TrimSpace(r.FormValue("title"))
+ url := strings.TrimSpace(r.FormValue("url"))
+
+ // If URL provided, fetch and extract title from it; otherwise use provided title.
+ if url != "" {
+ extractedTitle, err := extractTitleFromURL(url)
+ if err != nil {
+ c.renderResultsError(w, fmt.Sprintf("Failed to extract title from URL: %v", err), title)
+ return
+ }
+ title = extractedTitle
+ }
+
+ // Validate input before scoring
+ if valErr := c.validateTitle(title); valErr != "" {
+ c.renderResultsError(w, valErr, title)
+ return
+ }
+
+ vectorizer := core.CreateVectorizerFromModel(model)
+ article := &core.Article{Title: title}
+ score := c.scoreArticle(article, vectorizer, model)
+
+ threshold, _ := c.getThreshold(model)
+ rating := core.ScoreToScale(score, threshold)
+
+ data := map[string]interface{}{
+ "Page": "tools",
+ "IsScoreResult": true,
+ "Title": title,
+ "Rating": rating,
+ "Score": score,
+ "Threshold": threshold,
+ "PageTitle": displayTitle(c.Title),
+ }
+
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+ if err := c.tmpl.ExecuteTemplate(w, "results", data); err != nil {
+ http.Error(w, fmt.Sprintf("Template error: %v", err), http.StatusInternalServerError)
+ }
+}
+
+func (c *ServeCommand) handleScan(w http.ResponseWriter, r *http.Request) {
+ if r.Method == http.MethodGet {
+ c.handleTools(w, r)
+ return
+ }
+
+ if r.Method != http.MethodPost {
+ http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+ return
+ }
+
+ c.modelMu.RLock()
+ model := c.model
+ c.modelMu.RUnlock()
+
+ if model == nil {
+ http.Error(w, "Model not loaded", http.StatusInternalServerError)
+ return
+ }
+
+ if err := r.ParseForm(); err != nil {
+ http.Error(w, fmt.Sprintf("Failed to parse form: %v", err), http.StatusBadRequest)
+ return
+ }
+
+ feedURL := strings.TrimSpace(r.FormValue("feed_url"))
+
+ // Validate and fetch the feed
+ if valErr := c.validateFeedURL(feedURL); valErr != "" {
+ c.renderScanResultsError(w, valErr, feedURL)
+ return
+ }
+
+ articles, err := c.fetchRSSFeed(feedURL)
+ if err != nil {
+ c.renderScanResultsError(w, fmt.Sprintf("Failed to fetch feed: %v", err), feedURL)
+ return
+ }
+
+ // Score articles
+ threshold, _ := c.getThreshold(model)
+ vectorizer := core.CreateVectorizerFromModel(model)
+ scored := c.scoreAndFormatArticles(articles, vectorizer, model, threshold)
+
+ sort.Slice(scored, func(i, j int) bool {
+ iScore := scored[i]["Score"].(float64)
+ jScore := scored[j]["Score"].(float64)
+ return iScore > jScore
+ })
+
+ data := map[string]interface{}{
+ "Page": "tools",
+ "IsScanResult": true,
+ "FeedURL": feedURL,
+ "Articles": scored,
+ "Threshold": threshold,
+ "PageTitle": displayTitle(c.Title),
+ }
+
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+ if err := c.tmpl.ExecuteTemplate(w, "results", data); err != nil {
+ http.Error(w, fmt.Sprintf("Template error: %v", err), http.StatusInternalServerError)
+ }
+}
+
+// ============================================================================
+// ┏━┓┏━┓╻ ┏━╸┏┓╻╺┳┓┏━┓┏━┓╻┏┓╻╺┳╸┏━┓
+// ┣━┫┣━┛┃ ┣╸ ┃┗┫ ┃┃┣━┛┃ ┃┃┃┗┫ ┃ ┗━┓
+// ╹ ╹╹ ╹ ┗━╸╹ ╹╺┻┛╹ ┗━┛╹╹ ╹ ╹ ┗━┛
+// ============================================================================
+
+func (c *ServeCommand) handleFilteredFeed(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodGet {
+ http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+ return
+ }
+
+ c.resultsMu.RLock()
+ articles := c.filteredResults
+ resultsTime := c.filteredResultsTime
+ c.resultsMu.RUnlock()
+
+ c.modelMu.RLock()
+ model := c.model
+ c.modelMu.RUnlock()
+
+ threshold, _ := c.getThreshold(model)
+
+ type ArticleResponse struct {
+ Title string `json:"title"`
+ URL string `json:"url"`
+ Source string `json:"source,omitempty"`
+ Rating int `json:"rating"`
+ Score float64 `json:"score"`
+ }
+
+ scored := make([]ArticleResponse, 0, len(articles))
+ for _, article := range articles {
+ score := 0.0
+ if article.Score != nil {
+ score = *article.Score
+ }
+ rating := core.ScoreToScale(score, threshold)
+
+ scored = append(scored, ArticleResponse{
+ Title: article.Title,
+ URL: article.URL,
+ Source: article.Source,
+ Rating: rating,
+ Score: score,
+ })
+ }
+
+ w.Header().Set("Content-Type", "application/json")
+ w.Header().Set("Cache-Control", "no-cache, no-store, must-revalidate")
+
+ if err := json.NewEncoder(w).Encode(map[string]interface{}{
+ "total": len(articles),
+ "threshold": threshold,
+ "updated_at": resultsTime,
+ "articles": scored,
+ }); err != nil {
+ http.Error(w, "Failed to encode response", http.StatusInternalServerError)
+ }
+}
+
+func (c *ServeCommand) handleFilteredRSS(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodGet {
+ http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+ return
+ }
+
+ c.resultsMu.RLock()
+ articles := c.filteredResults
+ c.resultsMu.RUnlock()
+
+ c.modelMu.RLock()
+ model := c.model
+ c.modelMu.RUnlock()
+
+ w.Header().Set("Content-Type", "application/rss+xml")
+ w.Header().Set("Cache-Control", "public, max-age=3600")
+
+ // Generate RSS feed
+ fmt.Fprintf(w, `<?xml version="1.0" encoding="UTF-8"?>
+ <rss version="2.0">
+ <channel>
+ <title>%s - Filtered Articles</title>
+ <link>http://scholscan.local</link>
+ <description>Articles filtered by your learned preferences (scored 1-10)</description>
+ `, displayTitle(c.Title))
+
+ for _, article := range articles {
+ rawScore := 0.0
+ if article.Score != nil {
+ rawScore = *article.Score
+ }
+
+ threshold, _ := c.getThreshold(model)
+ scaledScore := core.ScoreToScale(rawScore, threshold)
+
+ title := escapeXML(article.Title)
+ url := escapeXML(article.URL)
+ description := fmt.Sprintf("SCHOLSCAN SCORE = %d/10 (raw: %.3f)", scaledScore, rawScore)
+
+ fmt.Fprintf(w, ` <item>
+ <title>%s</title>
+ <link>%s</link>
+ <description>%s</description>
+ </item>
+ `, title, url, description)
+ }
+
+ fmt.Fprint(w, ` </channel>
+ </rss>`)
+}
+
+func (c *ServeCommand) handleHealth(w http.ResponseWriter, r *http.Request) {
+ if r.Method != http.MethodGet {
+ http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+ return
+ }
+
+ c.modelMu.RLock()
+ modelLoaded := c.model != nil
+ c.modelMu.RUnlock()
+
+ status := "ok"
+ if !modelLoaded {
+ status = "model_not_loaded"
+ w.WriteHeader(http.StatusInternalServerError)
+ }
+
+ w.Header().Set("Content-Type", "application/json")
+ if err := json.NewEncoder(w).Encode(map[string]interface{}{
+ "status": status,
+ "model_loaded": modelLoaded,
+ "timestamp": time.Now().Unix(),
+ }); err != nil {
+ http.Error(w, "Failed to encode response", http.StatusInternalServerError)
+ }
+}
+
+// ============================================================================
+// ╻ ╻┏━╸╻ ┏━┓┏━╸┏━┓┏━┓
+// ┣━┫┣╸ ┃ ┣━┛┣╸ ┣┳┛┗━┓
+// ╹ ╹┗━╸┗━╸╹ ┗━╸╹┗╸┗━┛
+// ============================================================================
+
+func displayTitle(custom string) string {
+ if custom != "" {
+ return custom
+ }
+ return "ScholScan"
+}
+
+// extractTitleFromURL fetches the content from a URL and extracts the title from the HTML.
+// Designed to be resilient: tries multiple title sources, handles various URL formats,
+// and provides meaningful error feedback if extraction fails.
+func extractTitleFromURL(rawURL string) (string, error) {
+ if rawURL == "" {
+ return "", fmt.Errorf("empty URL")
+ }
+
+ // Check if it's a DOI
+ if strings.HasPrefix(rawURL, "10.") {
+ // Convert DOI to URL
+ rawURL = fmt.Sprintf("https://doi.org/%s", rawURL)
+ } else if !strings.HasPrefix(rawURL, "http://") && !strings.HasPrefix(rawURL, "https://") {
+ rawURL = "https://" + rawURL
+ }
+
+ ctx, cancel := context.WithTimeout(context.Background(), core.DefaultContextTimeout)
+ defer cancel()
+
+ req, err := http.NewRequestWithContext(ctx, "GET", rawURL, nil)
+ if err != nil {
+ return "", fmt.Errorf("invalid URL: %w", err)
+ }
+ req.Header.Set("User-Agent", core.PoliteUserAgent)
+ req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
+
+ resp, err := core.DoRequestWithRetry(ctx, core.DefaultHTTPClient, req)
+ if err != nil {
+ return "", fmt.Errorf("failed to fetch URL: %w", err)
+ }
+ defer resp.Body.Close()
+
+ if resp.StatusCode != http.StatusOK {
+ return "", fmt.Errorf("HTTP %d: %s", resp.StatusCode, resp.Status)
+ }
+
+ doc, err := goquery.NewDocumentFromReader(resp.Body)
+ if err != nil {
+ return "", fmt.Errorf("failed to parse HTML: %w", err)
+ }
+
+ // Fallback chain: <title> → og:title → twitter:title → <h1>
+ // Different sites populate these differently; trying multiple increases success rate.
+ title := ""
+
+ if t := doc.Find("title").Text(); t != "" {
+ title = strings.TrimSpace(t)
+ }
+
+ if title == "" {
+ if t, exists := doc.Find(`meta[property="og:title"]`).Attr("content"); exists && t != "" {
+ title = strings.TrimSpace(t)
+ }
+ }
+
+ if title == "" {
+ if t, exists := doc.Find(`meta[name="twitter:title"]`).Attr("content"); exists && t != "" {
+ title = strings.TrimSpace(t)
+ }
+ }
+
+ if title == "" {
+ if t := doc.Find("h1").First().Text(); t != "" {
+ title = strings.TrimSpace(t)
+ }
+ }
+
+ if title == "" {
+ return "", fmt.Errorf("could not extract title from page")
+ }
+
+ // Clean up common title patterns
+ reClean := regexp.MustCompile(`\s*\|\s*`)
+ title = reClean.ReplaceAllString(title, "")
+
+ rePub := regexp.MustCompile(`^[^|]*\|\s*`)
+ title = rePub.ReplaceAllString(title, "")
+ title = strings.TrimSpace(title)
+
+ if len(title) < 10 {
+ return "", fmt.Errorf("extracted title too short: %q", title)
+ }
+
+ return title, nil
+}
+
+// validateTitle checks that a title is suitable for scoring.
+// Returns an error message string if invalid, empty string if valid.
+func (c *ServeCommand) validateTitle(title string) string {
+ if strings.TrimSpace(title) == "" {
+ return "Title cannot be empty"
+ }
+ if len(title) > 1000 {
+ return "Title too long (max 1000 characters)"
+ }
+ return ""
+}
+
+// renderResultsError renders the results template with an error message.
+func (c *ServeCommand) renderResultsError(w http.ResponseWriter, errMsg, title string) {
+ data := map[string]interface{}{
+ "Page": "tools",
+ "IsScoreResult": true,
+ "Error": errMsg,
+ "Title": title,
+ "PageTitle": displayTitle(c.Title),
+ }
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+ if err := c.tmpl.ExecuteTemplate(w, "results", data); err != nil {
+ http.Error(w, fmt.Sprintf("Template error: %v", err), http.StatusInternalServerError)
+ }
+}
+
+// validateFeedURL checks that a feed URL is non-empty and has valid format.
+// Returns an error message string if invalid, empty string if valid.
+func (c *ServeCommand) validateFeedURL(feedURL string) string {
+ if feedURL == "" {
+ return "Feed URL cannot be empty"
+ }
+ if _, err := url.Parse(feedURL); err != nil {
+ return "Invalid URL format"
+ }
+ return ""
+}
+
+// renderScanResultsError renders the results template with an error for scan operation.
+func (c *ServeCommand) renderScanResultsError(w http.ResponseWriter, errMsg, feedURL string) {
+ data := map[string]interface{}{
+ "Page": "tools",
+ "IsScanResult": true,
+ "Error": errMsg,
+ "FeedURL": feedURL,
+ "PageTitle": displayTitle(c.Title),
+ }
+ w.Header().Set("Content-Type", "text/html; charset=utf-8")
+ if err := c.tmpl.ExecuteTemplate(w, "results", data); err != nil {
+ http.Error(w, fmt.Sprintf("Template error: %v", err), http.StatusInternalServerError)
+ }
+}
+
+func escapeXML(s string) string {
+ s = strings.ReplaceAll(s, "&", "&amp;")
+ s = strings.ReplaceAll(s, "<", "&lt;")
+ s = strings.ReplaceAll(s, ">", "&gt;")
+ s = strings.ReplaceAll(s, "\"", "&quot;")
+ s = strings.ReplaceAll(s, "'", "&apos;")
+ return s
+}