Init v0.1.0

author: Sam Scholten 2025-12-15 19:34:17 +1000
committer: Sam Scholten 2025-12-15 19:34:59 +1000
commit: 9f5978186ac3de07f4325975fecf4f538fe713b6 (patch)
tree: 41440b703054fe59eb561ba81d80fd60380c1f7a
download: scholscan-9f5978186ac3de07f4325975fecf4f538fe713b6.tar.gz
scholscan-9f5978186ac3de07f4325975fecf4f538fe713b6.zip
22 files changed, 4169 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b57a04a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,33 @@
+scholscan
+
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+
+*.test
+
+*.out
+
+go.work
+
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+.DS_Store
+Thumbs.db
+
+data/
+!README.md
+
+*.log
+
+.env*
+config.*
+secrets.*# RSS world data file
+rss_world.txt
+*.kate-swp
diff --git a/Containerfile b/Containerfile
new file mode 100644
index 0000000..58f011f
--- /dev/null
+++ b/Containerfile
@@ -0,0 +1,11 @@
+# Copy & customize: mount model.json and rss_world.txt, set --title flag as needed
+FROM golang:1.25-alpine AS builder
+RUN apk add --no-cache git
+WORKDIR /build
+RUN git clone https://your-git-repo-url/scholscan.git .
+RUN go build -o scholscan .
+
+FROM alpine:latest
+COPY --from=builder /build/scholscan /app/scholscan
+WORKDIR /app
+ENTRYPOINT ["/app/scholscan"]
diff --git a/DESIGN.md b/DESIGN.md
new file mode 100644
index 0000000..dba3394
--- /dev/null
+++ b/DESIGN.md
@@ -0,0 +1,81 @@
+Scholscan Design
+=================
+
+Article filter that learns from positive examples then filters RSS feeds automatically. Classifier uses TF-IDF on article titles plus logistic regression - fast, no content scraping needed.
+
+Code Structure
+---------------
+
+main.go - Entry point, validates commands, dispatches
+
+cmds/
+  train.go - Load positive articles, fetch RSS as negatives, train model, output JSON
+  scan.go - Fetch articles from RSS, score with model, output filtered results
+  serve.go - HTTP server with background feed refresh, embedded web UI, RSS output
+
+core/
+  types.go - Article struct holds article data, Config struct for app settings, Command interface
+  ml.go - TF-IDF implementation with n-gram support, logistic regression classifier
+  model.go - ModelEnvelope for serialized models, model save/load functions
+  scoring.go - Score conversion from raw 0-1 to display 1-10 scale
+  text.go - HTML content extraction, word tokenization, text cleaning
+  http.go - HTTP client with retries, timeouts, user agents
+  constants.go - Default timeouts, thresholds, chunk sizes
+
+Training Flow
+-------------
+
+Command loads positive examples from JSONL file. Reads RSS URLs from text file (one per line, # comments allowed). Fetches RSS feeds in parallel, removes any articles matching positive URLs. Trains TF-IDF vectorizer then logistic regression on balanced dataset. Finds optimal threshold on validation split using Youden's J metric. Outputs complete model JSON to stdout.
+
+Scanning Flow
+-------------
+
+Command fetches specified RSS feed, scores each article using trained model. Articles scoring above threshold output as JSON-Lines (same format as input). Includes enrichment metadata if available. Verbose mode shows fetch and scoring progress to stderr.
+
+Server Flow
+-----------
+
+Server loads model and RSS world feed list on startup. Background goroutine refreshes all feeds in parallel every N minutes (configurable). Results cached in memory with RWMutex. HTTP handlers serve both HTML UI and JSON/RSS API endpoints.
+
+API Endpoints
+-------------
+
+### HTML Pages
+- GET `/` - Redirect to /live-feed
+- GET `/live-feed` - Filtered articles web interface (server-rendered)
+- GET `/tools` - Manual article scoring interface (server-rendered)
+
+### HTTP Handlers
+- GET `/api/filtered/feed` - Articles as JSON array (for external consumption)
+- GET `/api/health` - Health check returns {"status":"ok"}
+- POST `/score` - Score single article via form post
+- POST `/scan` - Scan RSS feed via form post
+
+### RSS Output
+- GET `/api/filtered/rss` - Scored articles as RSS feed
+
+Model Details
+-------------
+
+Vectorizer uses unigrams plus bigrams. Minimum document frequency 2 (removes typos), maximum 80% (removes stopwords). Vocabulary capped at 50000 terms. Logistic regression with L2 regularization lambda=0.001, learning rate 0.5, 500 iterations. Validation split 80/20 with seed 42 for reproducible results. Threshold selected using Youden's J to balance false positives against false negatives.
+
+Server Implementation
+---------------------
+
+HTML templates embedded in binary using embed.FS. All rendering is server-side with no JavaScript. Tools page uses standard HTML forms with POST submissions. Live feed displays cached background results with server-side rendering. Background refresh uses separate goroutine per feed. Results cached with last update time for each feed. RSS output repackages filtered articles into RSS format for consumption.
+
+Key Implementation Notes
+------------------------
+
+- Articles processed in 50-item chunks for memory efficiency
+- File paths validated against directory traversal attacks
+- HTTP requests use custom polite user agent with email contact
+- RSS parsing handles both RSS and Atom via gofeed library
+- TF-IDF vectorizer stores vocabulary as sorted string array for deterministic ordering
+- Model version field allows future format changes
+- Background refresh errors logged but don't crash server
+
+External Dependencies
+---------------------
+
+gofeed mmcdole for RSS/Atom parsing. All other functionality uses Go standard library only.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..870bf34
--- /dev/null
+++ b/README.md
@@ -0,0 +1,37 @@
+# Scholscan
+
+Filters academic articles using TF-IDF on titles plus logistic regression.
+
+## Build
+```
+go build -o scholscan .
+```
+
+## Usage
+```
+# Train model from articles you like
+./scholscan train positives.jsonl --rss-feeds feeds.txt > model.json
+
+# Score new RSS feed
+./scholscan scan --url RSS_URL --model model.json > results.jsonl
+
+# Run web server
+./scholscan serve --port 8080 --model model.json --rss-world rss_world.txt
+```
+
+## Endpoints
+
+- GET `/` - redirect to live feed
+- GET `/live-feed` - filtered articles web UI
+- GET `/tools` - score individual articles
+- POST `/score` - API for scoring titles
+- POST `/scan` - API for scanning RSS
+- GET `/api/filtered/feed` - JSON feed
+- GET `/api/filtered/rss` - RSS feed
+- GET `/api/health` - health check
+
+## Model settings
+
+- TF-IDF: unigrams + bigrams, MinDF=2, MaxDF=0.8
+- Logistic regression: λ=0.001, L2 regularization
+- Class balancing: downsample majority to 1:1 ratio
+\ No newline at end of file
diff --git a/cmds/scan.go b/cmds/scan.go
new file mode 100644
index 0000000..789157c
--- /dev/null
+++ b/cmds/scan.go
@@ -0,0 +1,416 @@
+// Scan command: filters articles using trained model.
+//
+// takes articles from RSS feed, text, or JSONL. Scores & outputs those passing.
+// Batches processing (default 50) to allow continuous streaming.
+package cmds
+
+import (
+	"bufio"
+	"context"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/mmcdole/gofeed"
+	"scholscan/core"
+)
+
+
+// ============================================================================
+// ┏━╸┏━┓┏┳┓┏┳┓┏━┓┏┓╻╺┳┓
+// ┃  ┃ ┃┃┃┃┃┃┃┣━┫┃┗┫ ┃┃
+// ┗━╸┗━┛╹ ╹╹ ╹╹ ╹╹ ╹╺┻┛
+// ============================================================================
+
+
+// scores articles with trained model and outputs filtered results above thresh
+type ScanCommand struct {
+	URL          string
+	FromText     bool
+	FromArticles bool
+
+	ModelPath string
+	Threshold string
+
+	MinTitleLength int
+	ChunkSize      int
+
+	EventsOut  string
+	MetricsOut string
+	Verbose    bool
+}
+
+func (c *ScanCommand) Name() string { return "scan" }
+
+func (c *ScanCommand) Init(args []string) error {
+	fs := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
+	fs.Usage = func() {
+		fmt.Fprint(fs.Output(), `Usage: scholscan scan [options]
+
+Fetches articles, scores with model, outputs matched (>thresh) ones.
+
+Source options (exactly one required):
+  --url <feed_url>     Fetch articles from RSS/Atom feed
+  --from-text          Extract URLs from text on stdin
+  --from-articles      Use Article JSONL from stdin directly
+
+Model and filtering:
+  --model <path>       Path to trained model JSON file (required)
+  --threshold <float>  Score threshold (if not provided, uses model's recommended threshold)
+
+Enrichment options:
+`)
+		fs.PrintDefaults()
+		fmt.Fprint(fs.Output(), `
+Examples:
+  scholscan scan --url "http://some.blog/rss.xml" --model model.json > interesting.jsonl
+  echo "see https://example.com" | scholscan scan --from-text --model model.json
+  cat articles.jsonl | scholscan scan --from-articles --model model.json
+`)
+	}
+
+	fs.StringVar(&c.URL, "url", "", "RSS/Atom feed URL to fetch")
+	fs.BoolVar(&c.FromText, "from-text", false, "Extract URLs from text on stdin")
+	fs.BoolVar(&c.FromArticles, "from-articles", false, "Use Article JSONL from stdin")
+	fs.StringVar(&c.ModelPath, "model", "", "Path to trained model JSON file (required)")
+	fs.StringVar(&c.Threshold, "threshold", "", "Score threshold for filtering (if not provided, uses model's recommended threshold)")
+	fs.IntVar(&c.MinTitleLength, "min-title-length", core.MinTitleLength, "Minimum title length to consider valid")
+	fs.IntVar(&c.ChunkSize, "chunk-size", core.DefaultChunkSize, "Number of articles to process in each batch")
+	fs.StringVar(&c.EventsOut, "events-out", "events.jsonl", "Write per-article events to a JSONL file")
+	fs.StringVar(&c.MetricsOut, "metrics-out", "metrics.json", "Write summary metrics to a JSON file")
+	fs.BoolVar(&c.Verbose, "verbose", false, "Show progress information")
+
+	if err := fs.Parse(args); err != nil {
+		return err
+	}
+
+	if fs.NArg() != 0 {
+		return fmt.Errorf("unexpected arguments provided: %v", fs.Args())
+	}
+
+	// one src opt required
+	sourceCount := 0
+	if c.URL != "" {
+		sourceCount++
+	}
+	if c.FromText {
+		sourceCount++
+	}
+	if c.FromArticles {
+		sourceCount++
+	}
+
+	if sourceCount == 0 {
+		return fmt.Errorf("exactly one source option must be specified: --url, --from-text, or --from-articles")
+	}
+	if sourceCount > 1 {
+		return fmt.Errorf("only one source option may be specified: --url, --from-text, or --from-articles")
+	}
+
+	if c.ModelPath == "" {
+		return fmt.Errorf("--model flag is required")
+	}
+
+	// prevent dir traversal
+	if strings.Contains(filepath.Clean(c.ModelPath), "..") {
+		return fmt.Errorf("invalid model path: directory traversal not allowed")
+	}
+
+	if c.URL != "" {
+		if _, err := url.Parse(c.URL); err != nil {
+			return fmt.Errorf("invalid URL format: %w", err)
+		}
+	}
+
+	return nil
+}
+
+// Run runs the scan: load the model, decide on a threshold, get articles, then score them in chunks.
+// We bail out early on config problems but try to keep going even if some articles fail to fetch.
+func (c *ScanCommand) Run(stdin io.Reader, stdout io.Writer) error {
+	if c.Verbose {
+		log.SetOutput(os.Stderr)
+		log.Println("Starting scan workflow...")
+		log.Printf("Source: %v", c.getSourceDescription())
+		log.Printf("Model: %s", c.ModelPath)
+	}
+
+	model, err := c.loadModel()
+	if err != nil {
+		return fmt.Errorf("failed to load model: %w", err)
+	}
+
+	threshold, err := c.getThreshold(model)
+	if err != nil {
+		return fmt.Errorf("failed to determine threshold: %w", err)
+	}
+
+	if c.Verbose {
+		log.Printf("Using threshold: %.3f", threshold)
+	}
+
+	var articles []*core.Article
+	if c.FromArticles {
+		articles, err = c.readArticlesFromStdin(stdin)
+	} else {
+		articles, err = c.fetchArticles()
+	}
+	if err != nil {
+		return fmt.Errorf("failed to get articles: %w", err)
+	}
+
+	if c.Verbose {
+		log.Printf("Processing %d articles", len(articles))
+	}
+
+	// process articles in chunks
+	return c.processArticles(articles, model, threshold, stdout, stdin)
+}
+
+
+// ============================================================================
+// ┏┳┓┏━┓╺┳┓┏━╸╻     ┏┓     ┏━╸┏━┓┏┓╻┏━╸╻┏━╸
+// ┃┃┃┃ ┃ ┃┃┣╸ ┃     ┃╺╋╸   ┃  ┃ ┃┃┗┫┣╸ ┃┃╺┓
+// ╹ ╹┗━┛╺┻┛┗━╸┗━╸   ┗━┛    ┗━╸┗━┛╹ ╹╹  ╹┗━┛
+// ============================================================================
+
+
+
+func (c *ScanCommand) getSourceDescription() string {
+	if c.URL != "" {
+		return fmt.Sprintf("RSS feed: %s", c.URL)
+	}
+	if c.FromText {
+		return "text from stdin"
+	}
+	if c.FromArticles {
+		return "articles from stdin"
+	}
+	return "unknown"
+}
+
+// loadModel reads and parses the model JSON file.
+// The envelope contains weights, vocabulary, and optionally a recommended threshold.
+func (c *ScanCommand) loadModel() (*core.ModelEnvelope, error) {
+	f, err := os.Open(c.ModelPath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to open model file %s: %w", c.ModelPath, err)
+	}
+	defer f.Close()
+
+	var model core.ModelEnvelope
+	if err := json.NewDecoder(f).Decode(&model); err != nil {
+		return nil, fmt.Errorf("failed to decode model: %w", err)
+	}
+
+	return &model, nil
+}
+
+func (c *ScanCommand) getThreshold(model *core.ModelEnvelope) (float64, error) {
+	if c.Threshold != "" {
+		var threshold float64
+		_, err := fmt.Sscanf(c.Threshold, "%f", &threshold)
+		if err == nil {
+			return threshold, nil
+		}
+	}
+
+	if model.Meta != nil {
+		if meta, ok := model.Meta["recommended_threshold"].(float64); ok {
+			return meta, nil
+		}
+	}
+
+	return core.DefaultScoreThreshold, nil
+}
+
+// ============================================================================
+// ┏━┓┏━┓╺┳╸╻┏━╸╻  ┏━╸   ┏━┓┏━┓┏━╸┏━┓
+// ┣━┫┣┳┛ ┃ ┃┃  ┃  ┣╸    ┗━┓┣┳┛┃  ┗━┓
+// ╹ ╹╹┗╸ ╹ ╹┗━╸┗━╸┗━╸   ┗━┛╹┗╸┗━╸┗━┛
+// ============================================================================
+
+
+func (c *ScanCommand) fetchArticles() ([]*core.Article, error) {
+	if c.FromText {
+		return c.extractURLsFromText(os.Stdin)
+	}
+	if c.URL != "" {
+		return c.fetchRSSFeed(c.URL)
+	}
+	return nil, fmt.Errorf("no valid source specified")
+}
+
+// extractURLsFromText pulls URLs from plain text on stdin.
+// We create minimal Article objects since only the URL is needed for scoring.
+func (c *ScanCommand) extractURLsFromText(stdin io.Reader) ([]*core.Article, error) {
+	var urls []string
+	s := bufio.NewScanner(stdin)
+	for s.Scan() {
+		line := s.Text()
+		// url extraction
+		fields := strings.Fields(line)
+		for _, field := range fields {
+			if strings.HasPrefix(field, "http://") || strings.HasPrefix(field, "https://") {
+				urls = append(urls, field)
+			}
+		}
+	}
+
+	// create Article objs for URLs
+	articles := make([]*core.Article, len(urls))
+	for i, url := range urls {
+		articles[i] = &core.Article{
+			URL:     url,
+			Title:   fmt.Sprintf("Article from %s", url),
+			Content: "",
+		}
+	}
+
+	return articles, s.Err()
+}
+
+// fetchRSSFeed fetches and parses a single RSS feed with a 30s timeout.
+// We skip articles with short titles since they're usually noise or truncated.
+func (c *ScanCommand) fetchRSSFeed(url string) ([]*core.Article, error) {
+	client := &http.Client{Timeout: core.DefaultHTTPTimeout}
+
+	req, err := http.NewRequest("GET", url, nil)
+	if err != nil {
+		return nil, fmt.Errorf("error building request: %w", err)
+	}
+	req.Header.Set("User-Agent", core.PoliteUserAgent)
+
+	ctx, cancel := context.WithTimeout(context.Background(), core.DefaultHTTPTimeout)
+	defer cancel()
+
+	resp, err := client.Do(req.WithContext(ctx))
+	if err != nil {
+		return nil, fmt.Errorf("error fetching %s: %w", url, err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("HTTP %d from %s", resp.StatusCode, url)
+	}
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("error reading response from %s: %w", url, err)
+	}
+
+	// parse feed
+	fp := gofeed.NewParser()
+	feed, err := fp.Parse(strings.NewReader(string(body)))
+	if err != nil {
+		return nil, fmt.Errorf("error parsing feed from %s: %w", url, err)
+	}
+
+	var articles []*core.Article
+	for _, item := range feed.Items {
+		article := &core.Article{
+			URL:   item.Link,
+			Title: strings.TrimSpace(item.Title),
+		}
+
+		if len(article.Title) >= c.MinTitleLength {
+			articles = append(articles, article)
+		}
+	}
+
+	return articles, nil
+}
+
+// readArticlesFromStdin reads Article objects from JSONL on stdin.
+// Malformed lines are skipped to allow partial processing of corrupted input.
+func (c *ScanCommand) readArticlesFromStdin(stdin io.Reader) ([]*core.Article, error) {
+	var articles []*core.Article
+	decoder := json.NewDecoder(stdin)
+	for {
+		var article core.Article
+		if err := decoder.Decode(&article); err != nil {
+			if err == io.EOF {
+				break
+			}
+			continue
+		}
+
+		if len(article.Title) >= c.MinTitleLength {
+			articles = append(articles, &article)
+		}
+	}
+	return articles, nil
+}
+
+
+
+// ============================================================================
+// ┏━┓┏━┓┏━┓┏━╸┏━╸┏━┓┏━┓   ┏━┓┏━┓╺┳╸╻┏━╸╻  ┏━╸┏━┓
+// ┣━┛┣┳┛┃ ┃┃  ┣╸ ┗━┓┗━┓   ┣━┫┣┳┛ ┃ ┃┃  ┃  ┣╸ ┗━┓
+// ╹  ╹┗╸┗━┛┗━╸┗━╸┗━┛┗━┛   ╹ ╹╹┗╸ ╹ ╹┗━╸┗━╸┗━╸┗━┛
+// ============================================================================
+
+
+// processArticles handles scoring and filtering in batches to keep memory usage predictable.
+// Scoring errors don't crash the process - we log them and continue with the next article.
+func (c *ScanCommand) processArticles(articles []*core.Article, model *core.ModelEnvelope, threshold float64, stdout io.Writer, stdin io.Reader) error {
+	vectorizer := core.CreateVectorizerFromModel(model)
+
+	encoder := json.NewEncoder(stdout)
+
+	// process each batch
+	for i := 0; i < len(articles); i += c.ChunkSize {
+		end := i + c.ChunkSize
+		if end > len(articles) {
+			end = len(articles)
+		}
+
+		chunk := articles[i:end]
+		if c.Verbose {
+			log.Printf("Processing chunk %d-%d of %d articles", i+1, end, len(articles))
+		}
+
+		// calc score for batch
+		docs := make([]string, len(chunk))
+		for j, article := range chunk {
+			docs[j] = strings.TrimSpace(article.Title)
+		}
+
+		vectors := vectorizer.Transform(docs)
+		scores := make([]float64, len(chunk))
+
+		for j, vector := range vectors {
+			score, err := core.PredictScore(vector, model.Weights)
+			if err != nil {
+				log.Printf("Error computing score for article %d: %v", i+j, err)
+				scores[j] = 0.0
+			} else {
+				scores[j] = score
+			}
+		}
+
+		for j, article := range chunk {
+			score := scores[j]
+			article.Score = &score
+
+			if score >= threshold {
+				if err := encoder.Encode(article); err != nil {
+					log.Printf("Error encoding article: %v", err)
+				}
+			}
+		}
+	}
+
+	if c.Verbose {
+		log.Println("Scan complete")
+	}
+
+	return nil
+}
diff --git a/cmds/serve.go b/cmds/serve.go
new file mode 100644
index 0000000..92aa64c
--- /dev/null
+++ b/cmds/serve.go
@@ -0,0 +1,1010 @@
+// Serve command: HTTP server for web UI and APIs.
+//
+// Two main flows: live-feed (cached + background refresh) and tools (on-demand scoring).
+// Live-feed rescans all configured RSS feeds on a timer (default 24h), caches results,
+// serves filtered articles via web UI and JSON/RSS APIs.
+// Tools provides real-time /score (single title) and /scan (ad-hoc feed) endpoints.
+// Background refresh continues despite individual feed failures; RWMutex allows
+// many concurrent readers with exclusive writer updates.
+// Templates are embedded for single-binary deployment.
+package cmds
+
+import (
+	"bufio"
+	"context"
+	"embed"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"html/template"
+	"io"
+	"log"
+	"net/http"
+	"net/url"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"regexp"
+	"sort"
+	"strings"
+	"sync"
+	"syscall"
+	"time"
+
+	"github.com/PuerkitoBio/goquery"
+	"github.com/mmcdole/gofeed"
+	"scholscan/core"
+)
+
+//go:embed templates/*.html
+var templateFS embed.FS
+
+// ============================================================================
+// ┏━╸┏┳┓╺┳┓   ┏━┓┏┓  ┏┓
+// ┃  ┃┃┃ ┃┃   ┃ ┃┣┻┓  ┃
+// ┗━╸╹ ╹╺┻┛   ┗━┛┗━┛┗━┛
+// ============================================================================
+
+type ServeCommand struct {
+	Port            int
+	RSSWorldPath    string
+	RefreshInterval string
+	ModelPath       string
+	Title           string
+
+	// Parsed interval
+	refreshInterval time.Duration
+	// Loaded model (cached)
+	model   *core.ModelEnvelope
+	modelMu sync.RWMutex
+	// Cached filtered RSS results and timestamp.
+	// RWMutex allows many concurrent readers (HTTP handlers) with exclusive writer (background refresh).
+	filteredResults     []*core.Article
+	filteredResultsTime time.Time
+	resultsMu           sync.RWMutex
+	// Loaded templates
+	tmpl *template.Template
+}
+
+func (c *ServeCommand) Name() string { return "serve" }
+
+// Init configures the serve command with robust input validation.
+// Prevents directory traversal, validates paths, and sets sensible defaults.
+// Ensures only one configuration is possible to reduce runtime complexity.
+func (c *ServeCommand) Init(args []string) error {
+	fs := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
+	fs.Usage = func() {
+		fmt.Fprint(fs.Output(), `Usage: scholscan serve [options]
+
+		Start HTTP server for filtered RSS and scoring web UI.
+
+		Flags:
+		`)
+		fs.PrintDefaults()
+		fmt.Fprint(fs.Output(), `
+		Examples:
+		scholscan serve --port 8080 --rss-world rss_world.txt --model model.json
+		scholscan serve --refresh-interval 24h --model ./model.json --rss-world feeds.txt
+		`)
+	}
+
+	fs.IntVar(&c.Port, "port", 8080, "Port to listen on")
+	fs.StringVar(&c.RSSWorldPath, "rss-world", "rss_world.txt", "Path to RSS world file (one feed URL per line)")
+	fs.StringVar(&c.RefreshInterval, "refresh-interval", "24h", "Interval for background rescans (e.g., 24h, 1h)")
+	fs.StringVar(&c.ModelPath, "model", "model.json", "Path to trained model JSON file")
+	fs.StringVar(&c.Title, "title", "", "Custom title for the web interface")
+
+	if err := fs.Parse(args); err != nil {
+		return err
+	}
+
+	if fs.NArg() != 0 {
+		return fmt.Errorf("unexpected arguments provided: %v", fs.Args())
+	}
+
+	// Parse refresh interval
+	interval, err := time.ParseDuration(c.RefreshInterval)
+	if err != nil {
+		return fmt.Errorf("invalid refresh-interval %q: %w", c.RefreshInterval, err)
+	}
+	c.refreshInterval = interval
+
+	if strings.Contains(filepath.Clean(c.RSSWorldPath), "..") {
+		return fmt.Errorf("invalid rss-world path: directory traversal not allowed")
+	}
+	if strings.Contains(filepath.Clean(c.ModelPath), "..") {
+		return fmt.Errorf("invalid model path: directory traversal not allowed")
+	}
+
+	return nil
+}
+
+func (c *ServeCommand) Run(stdin io.Reader, stdout io.Writer) error {
+	log.Printf("Starting scholscan server on port %d", c.Port)
+
+	// Initialize filteredResultsTime to server start time
+	c.resultsMu.Lock()
+	c.filteredResultsTime = time.Now()
+	c.resultsMu.Unlock()
+
+	// Load templates at startup
+	tmpl, err := template.ParseFS(templateFS, "templates/*.html")
+	if err != nil {
+		return fmt.Errorf("failed to parse templates: %w", err)
+	}
+	c.tmpl = tmpl
+	log.Printf("Templates loaded successfully")
+
+	// Load model at startup
+	model, err := c.loadModel()
+	if err != nil {
+		return fmt.Errorf("failed to load model at startup: %w", err)
+	}
+	c.modelMu.Lock()
+	c.model = model
+	c.modelMu.Unlock()
+
+	log.Printf("Model loaded successfully")
+
+	// Start background ticker for periodic refresh
+	ticker := time.NewTicker(c.refreshInterval)
+	go c.backgroundRefresh(ticker)
+
+	// Perform initial scan asynchronously
+	go func() {
+		log.Println("Starting initial feed scan...")
+		if err := c.refreshFilteredResults(); err != nil {
+			log.Printf("Warning: initial scan failed: %v", err)
+		} else {
+			c.resultsMu.RLock()
+			count := len(c.filteredResults)
+			c.resultsMu.RUnlock()
+			log.Printf("Initial scan complete, %d articles filtered", count)
+		}
+	}()
+
+	// Setup HTTP handlers
+	http.HandleFunc("/", c.handleRoot)
+	http.HandleFunc("/live-feed", c.handleLiveFeed)
+	http.HandleFunc("/tools", c.handleTools)
+	http.HandleFunc("/score", c.handleScore)
+	http.HandleFunc("/scan", c.handleScan)
+	http.HandleFunc("/api/filtered/feed", c.handleFilteredFeed)
+	http.HandleFunc("/api/filtered/rss", c.handleFilteredRSS)
+	http.HandleFunc("/api/health", c.handleHealth)
+
+	// Setup server with graceful shutdown
+	server := &http.Server{
+		Addr:         fmt.Sprintf(":%d", c.Port),
+		Handler:      http.DefaultServeMux,
+		ReadTimeout:  core.DefaultReadTimeout,
+		WriteTimeout: core.DefaultWriteTimeout,
+		IdleTimeout:  core.DefaultIdleTimeout,
+	}
+
+	// Handle shutdown signals
+	sigChan := make(chan os.Signal, 1)
+	signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
+
+	go func() {
+		<-sigChan
+		log.Println("Shutdown signal received")
+		ticker.Stop()
+		ctx, cancel := context.WithTimeout(context.Background(), core.DefaultShutdownTimeout)
+		defer cancel()
+		if err := server.Shutdown(ctx); err != nil {
+			log.Printf("Server shutdown error: %v", err)
+		}
+	}()
+
+	log.Printf("Server listening on http://localhost:%d", c.Port)
+	if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
+		return fmt.Errorf("server error: %w", err)
+	}
+
+	return nil
+}
+
+// ============================================================================
+// ┏━╸┏━┓┏━┓┏━╸   ╻  ┏━┓┏━╸╻┏━╸
+// ┃  ┃ ┃┣┳┛┣╸    ┃  ┃ ┃┃╺┓┃┃
+// ┗━╸┗━┛╹┗╸┗━╸   ┗━╸┗━┛┗━┛╹┗━╸
+// ============================================================================
+
+func (c *ServeCommand) loadModel() (*core.ModelEnvelope, error) {
+	f, err := os.Open(c.ModelPath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to open model file %s: %w", c.ModelPath, err)
+	}
+	defer f.Close()
+
+	var model core.ModelEnvelope
+	if err := json.NewDecoder(f).Decode(&model); err != nil {
+		return nil, fmt.Errorf("failed to decode model: %w", err)
+	}
+
+	return &model, nil
+}
+
+func (c *ServeCommand) scoreArticle(article *core.Article, vectorizer *core.TFIDFVectorizer, model *core.ModelEnvelope) float64 {
+	docs := []string{strings.TrimSpace(article.Title)}
+	vectors := vectorizer.Transform(docs)
+
+	if len(vectors) == 0 || len(vectors[0]) == 0 {
+		return 0.0
+	}
+
+	score, err := core.PredictScore(vectors[0], model.Weights)
+	if err != nil {
+		// Return 0.0 on error (below threshold). Malformed articles don't break the display,
+		// they just get filtered out. Log the error for diagnostics.
+		log.Printf("Error scoring article: %v", err)
+		return 0.0
+	}
+
+	return score
+}
+
+func (c *ServeCommand) getThreshold(model *core.ModelEnvelope) (float64, error) {
+	if model.Meta != nil {
+		if threshold, ok := model.Meta["recommended_threshold"].(float64); ok {
+			return threshold, nil
+		}
+	}
+	return core.DefaultScoreThreshold, nil
+}
+
+// scoreAndFormatArticles scores a list of articles and returns them formatted for templates.
+// Articles are scored using the model and vectorizer, then returned with human-readable ratings.
+func (c *ServeCommand) scoreAndFormatArticles(articles []*core.Article, vectorizer *core.TFIDFVectorizer, model *core.ModelEnvelope, threshold float64) []map[string]interface{} {
+	type ArticleResponse struct {
+		Title  string  `json:"title"`
+		URL    string  `json:"url"`
+		Source string  `json:"source,omitempty"`
+		Rating int     `json:"rating"`
+		Score  float64 `json:"score"`
+	}
+
+	scored := make([]ArticleResponse, 0, len(articles))
+	for _, article := range articles {
+		score := c.scoreArticle(article, vectorizer, model)
+		rating := core.ScoreToScale(score, threshold)
+
+		scored = append(scored, ArticleResponse{
+			Title:  article.Title,
+			URL:    article.URL,
+			Source: article.Source,
+			Rating: rating,
+			Score:  score,
+		})
+	}
+
+	result := make([]map[string]interface{}, len(scored))
+	for i, a := range scored {
+		result[i] = map[string]interface{}{
+			"Title":  a.Title,
+			"URL":    a.URL,
+			"Source": a.Source,
+			"Rating": a.Rating,
+			"Score":  a.Score,
+		}
+	}
+	return result
+}
+
+// ============================================================================
+// ┏━┓┏━┓┏━┓   ┏━┓╺┳╸╻ ╻┏━╸┏━╸
+// ┣┳┛┗━┓┗━┓   ┗━┓ ┃ ┃ ┃┣╸ ┣╸
+// ╹┗╸┗━┛┗━┛   ┗━┛ ╹ ┗━┛╹  ╹
+// ============================================================================
+
+func (c *ServeCommand) readRSSWorldFeeds() ([]string, error) {
+	f, err := os.Open(c.RSSWorldPath)
+	if err != nil {
+		return nil, fmt.Errorf("failed to open rss_world file %s: %w", c.RSSWorldPath, err)
+	}
+	defer f.Close()
+
+	var feeds []string
+	scanner := bufio.NewScanner(f)
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		if line != "" && !strings.HasPrefix(line, "#") {
+			feeds = append(feeds, line)
+		}
+	}
+
+	if err := scanner.Err(); err != nil {
+		return nil, fmt.Errorf("error reading rss_world file: %w", err)
+	}
+
+	return feeds, nil
+}
+
+func (c *ServeCommand) refreshFilteredResults() error {
+	feeds, err := c.readRSSWorldFeeds()
+	if err != nil {
+		return err
+	}
+
+	c.modelMu.RLock()
+	model := c.model
+	c.modelMu.RUnlock()
+
+	if model == nil {
+		return fmt.Errorf("model not loaded")
+	}
+
+	// Scan all feeds. Continue on individual feed failures to maximize results.
+	// RSS feeds are often flaky; one down shouldn't prevent others from being processed.
+	var allArticles []*core.Article
+	for _, feed := range feeds {
+		articles, err := c.fetchRSSFeed(feed)
+		if err != nil {
+			log.Printf("Warning: failed to fetch feed %s: %v", feed, err)
+			continue
+		}
+		allArticles = append(allArticles, articles...)
+	}
+
+	// Score and filter articles
+	threshold, err := c.getThreshold(model)
+	if err != nil {
+		return err
+	}
+
+	vectorizer := core.CreateVectorizerFromModel(model)
+
+	filtered := make([]*core.Article, 0, len(allArticles))
+	for _, article := range allArticles {
+		score := c.scoreArticle(article, vectorizer, model)
+		if score >= threshold {
+			// Create a copy with score to avoid reference issues
+			articleCopy := *article
+			articleCopy.Score = &score
+			filtered = append(filtered, &articleCopy)
+		}
+	}
+
+	c.resultsMu.Lock()
+	c.filteredResults = filtered
+	c.filteredResultsTime = time.Now()
+	c.resultsMu.Unlock()
+
+	return nil
+}
+
+// backgroundRefresh runs in a goroutine, rescanning all RSS feeds on interval.
+// Failures in individual feeds don't affect others - we log and continue.
+func (c *ServeCommand) backgroundRefresh(ticker *time.Ticker) {
+	for range ticker.C {
+		log.Println("Background refresh started")
+		if err := c.refreshFilteredResults(); err != nil {
+			log.Printf("Background refresh error (continuing): %v", err)
+		} else {
+			c.resultsMu.RLock()
+			count := len(c.filteredResults)
+			c.resultsMu.RUnlock()
+			log.Printf("Background refresh complete, %d articles filtered", count)
+		}
+	}
+}
+
+func (c *ServeCommand) fetchRSSFeed(url string) ([]*core.Article, error) {
+	client := &http.Client{Timeout: core.DefaultHTTPTimeout}
+
+	req, err := http.NewRequest("GET", url, nil)
+	if err != nil {
+		return nil, fmt.Errorf("error building request: %w", err)
+	}
+	req.Header.Set("User-Agent", core.PoliteUserAgent)
+
+	ctx, cancel := context.WithTimeout(context.Background(), core.DefaultHTTPTimeout)
+	defer cancel()
+
+	resp, err := client.Do(req.WithContext(ctx))
+	if err != nil {
+		return nil, fmt.Errorf("error fetching %s: %w", url, err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("HTTP %d from %s", resp.StatusCode, url)
+	}
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("error reading response from %s: %w", url, err)
+	}
+
+	fp := gofeed.NewParser()
+	feed, err := fp.Parse(strings.NewReader(string(body)))
+	if err != nil {
+		return nil, fmt.Errorf("error parsing feed from %s: %w", url, err)
+	}
+
+	var articles []*core.Article
+	for _, item := range feed.Items {
+		article := &core.Article{
+			URL:    item.Link,
+			Title:  strings.TrimSpace(item.Title),
+			Source: feed.Title,
+		}
+
+		if item.PublishedParsed != nil {
+			article.PublishedAt = item.PublishedParsed
+		}
+
+		if len(article.Title) >= core.MinTitleLength {
+			articles = append(articles, article)
+		}
+	}
+
+	return articles, nil
+}
+
+// ============================================================================
+// ╻ ╻┏━╸┏┓    ╻ ╻╻
+// ┃╻┃┣╸ ┣┻┓   ┃ ┃┃
+// ┗┻┛┗━╸┗━┛   ┗━┛╹
+// ============================================================================
+
+func (c *ServeCommand) handleRoot(w http.ResponseWriter, r *http.Request) {
+	if r.URL.Path != "/" {
+		http.NotFound(w, r)
+		return
+	}
+
+	// Redirect to live feed
+	http.Redirect(w, r, "/live-feed", http.StatusMovedPermanently)
+}
+
+func (c *ServeCommand) handleLiveFeed(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	c.resultsMu.RLock()
+	articles := c.filteredResults
+	resultsTime := c.filteredResultsTime
+	c.resultsMu.RUnlock()
+
+	c.modelMu.RLock()
+	model := c.model
+	c.modelMu.RUnlock()
+
+	if model == nil {
+		http.Error(w, "Model not loaded", http.StatusInternalServerError)
+		return
+	}
+
+	threshold, _ := c.getThreshold(model)
+
+	// Parse filter parameter (day, week, or all)
+	filter := r.URL.Query().Get("filter")
+	if filter == "" {
+		filter = "all"
+	}
+
+	// Filter articles by date if needed
+	now := time.Now()
+	filtered := articles
+	if filter == "day" || filter == "week" {
+		var cutoff time.Time
+		if filter == "day" {
+			cutoff = now.Add(-24 * time.Hour)
+		} else if filter == "week" {
+			cutoff = now.Add(-7 * 24 * time.Hour)
+		}
+
+		filtered = make([]*core.Article, 0, len(articles))
+		for _, article := range articles {
+			// Always include articles without PublishedAt
+			if article.PublishedAt == nil || article.PublishedAt.After(cutoff) {
+				filtered = append(filtered, article)
+			}
+		}
+	}
+
+	// Convert articles to template format
+	type TemplateArticle struct {
+		Title       string
+		URL         string
+		Source      string
+		Rating      int
+		Score       float64
+		PublishedAt string
+	}
+
+	templateArticles := make([]TemplateArticle, 0, len(filtered))
+	for _, article := range filtered {
+		score := 0.0
+		if article.Score != nil {
+			score = *article.Score
+		}
+		rating := core.ScoreToScale(score, threshold)
+
+		publishedAt := ""
+		if article.PublishedAt != nil {
+			publishedAt = article.PublishedAt.Format("2006-01-02")
+		}
+
+		templateArticles = append(templateArticles, TemplateArticle{
+			Title:       article.Title,
+			URL:         article.URL,
+			Source:      article.Source,
+			Rating:      rating,
+			Score:       score,
+			PublishedAt: publishedAt,
+		})
+	}
+
+	// Sort articles by score (highest first)
+	sort.Slice(templateArticles, func(i, j int) bool {
+		return templateArticles[i].Score > templateArticles[j].Score
+	})
+
+	data := map[string]interface{}{
+		"Page":      "live-feed",
+		"Articles":  templateArticles,
+		"Threshold": threshold,
+		"UpdatedAt": resultsTime.Format("2006-01-02 15:04:05"),
+		"Filter":    filter,
+		"Title":     displayTitle(c.Title),
+	}
+
+	w.Header().Set("Content-Type", "text/html; charset=utf-8")
+	if err := c.tmpl.ExecuteTemplate(w, "live-feed", data); err != nil {
+		http.Error(w, fmt.Sprintf("Template error: %v", err), http.StatusInternalServerError)
+	}
+}
+
+func (c *ServeCommand) handleTools(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	data := map[string]interface{}{
+		"Page":  "tools",
+		"Title": displayTitle(c.Title),
+	}
+
+	w.Header().Set("Content-Type", "text/html; charset=utf-8")
+	if err := c.tmpl.ExecuteTemplate(w, "tools", data); err != nil {
+		http.Error(w, fmt.Sprintf("Template error: %v", err), http.StatusInternalServerError)
+	}
+}
+
+func (c *ServeCommand) handleScore(w http.ResponseWriter, r *http.Request) {
+	if r.Method == http.MethodGet {
+		c.handleTools(w, r)
+		return
+	}
+
+	if r.Method != http.MethodPost {
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	c.modelMu.RLock()
+	model := c.model
+	c.modelMu.RUnlock()
+
+	if model == nil {
+		http.Error(w, "Model not loaded", http.StatusInternalServerError)
+		return
+	}
+
+	if err := r.ParseForm(); err != nil {
+		http.Error(w, fmt.Sprintf("Failed to parse form: %v", err), http.StatusBadRequest)
+		return
+	}
+
+	title := strings.TrimSpace(r.FormValue("title"))
+	url := strings.TrimSpace(r.FormValue("url"))
+
+	// If URL provided, fetch and extract title from it; otherwise use provided title.
+	if url != "" {
+		extractedTitle, err := extractTitleFromURL(url)
+		if err != nil {
+			c.renderResultsError(w, fmt.Sprintf("Failed to extract title from URL: %v", err), title)
+			return
+		}
+		title = extractedTitle
+	}
+
+	// Validate input before scoring
+	if valErr := c.validateTitle(title); valErr != "" {
+		c.renderResultsError(w, valErr, title)
+		return
+	}
+
+	vectorizer := core.CreateVectorizerFromModel(model)
+	article := &core.Article{Title: title}
+	score := c.scoreArticle(article, vectorizer, model)
+
+	threshold, _ := c.getThreshold(model)
+	rating := core.ScoreToScale(score, threshold)
+
+	data := map[string]interface{}{
+		"Page":          "tools",
+		"IsScoreResult": true,
+		"Title":         title,
+		"Rating":        rating,
+		"Score":         score,
+		"Threshold":     threshold,
+		"PageTitle":     displayTitle(c.Title),
+	}
+
+	w.Header().Set("Content-Type", "text/html; charset=utf-8")
+	if err := c.tmpl.ExecuteTemplate(w, "results", data); err != nil {
+		http.Error(w, fmt.Sprintf("Template error: %v", err), http.StatusInternalServerError)
+	}
+}
+
+func (c *ServeCommand) handleScan(w http.ResponseWriter, r *http.Request) {
+	if r.Method == http.MethodGet {
+		c.handleTools(w, r)
+		return
+	}
+
+	if r.Method != http.MethodPost {
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	c.modelMu.RLock()
+	model := c.model
+	c.modelMu.RUnlock()
+
+	if model == nil {
+		http.Error(w, "Model not loaded", http.StatusInternalServerError)
+		return
+	}
+
+	if err := r.ParseForm(); err != nil {
+		http.Error(w, fmt.Sprintf("Failed to parse form: %v", err), http.StatusBadRequest)
+		return
+	}
+
+	feedURL := strings.TrimSpace(r.FormValue("feed_url"))
+
+	// Validate and fetch the feed
+	if valErr := c.validateFeedURL(feedURL); valErr != "" {
+		c.renderScanResultsError(w, valErr, feedURL)
+		return
+	}
+
+	articles, err := c.fetchRSSFeed(feedURL)
+	if err != nil {
+		c.renderScanResultsError(w, fmt.Sprintf("Failed to fetch feed: %v", err), feedURL)
+		return
+	}
+
+	// Score articles
+	threshold, _ := c.getThreshold(model)
+	vectorizer := core.CreateVectorizerFromModel(model)
+	scored := c.scoreAndFormatArticles(articles, vectorizer, model, threshold)
+
+	sort.Slice(scored, func(i, j int) bool {
+		iScore := scored[i]["Score"].(float64)
+		jScore := scored[j]["Score"].(float64)
+		return iScore > jScore
+	})
+
+	data := map[string]interface{}{
+		"Page":         "tools",
+		"IsScanResult": true,
+		"FeedURL":      feedURL,
+		"Articles":     scored,
+		"Threshold":    threshold,
+		"PageTitle":    displayTitle(c.Title),
+	}
+
+	w.Header().Set("Content-Type", "text/html; charset=utf-8")
+	if err := c.tmpl.ExecuteTemplate(w, "results", data); err != nil {
+		http.Error(w, fmt.Sprintf("Template error: %v", err), http.StatusInternalServerError)
+	}
+}
+
+// ============================================================================
+// ┏━┓┏━┓╻   ┏━╸┏┓╻╺┳┓┏━┓┏━┓╻┏┓╻╺┳╸┏━┓
+// ┣━┫┣━┛┃   ┣╸ ┃┗┫ ┃┃┣━┛┃ ┃┃┃┗┫ ┃ ┗━┓
+// ╹ ╹╹  ╹   ┗━╸╹ ╹╺┻┛╹  ┗━┛╹╹ ╹ ╹ ┗━┛
+// ============================================================================
+
+func (c *ServeCommand) handleFilteredFeed(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	c.resultsMu.RLock()
+	articles := c.filteredResults
+	resultsTime := c.filteredResultsTime
+	c.resultsMu.RUnlock()
+
+	c.modelMu.RLock()
+	model := c.model
+	c.modelMu.RUnlock()
+
+	threshold, _ := c.getThreshold(model)
+
+	type ArticleResponse struct {
+		Title  string  `json:"title"`
+		URL    string  `json:"url"`
+		Source string  `json:"source,omitempty"`
+		Rating int     `json:"rating"`
+		Score  float64 `json:"score"`
+	}
+
+	scored := make([]ArticleResponse, 0, len(articles))
+	for _, article := range articles {
+		score := 0.0
+		if article.Score != nil {
+			score = *article.Score
+		}
+		rating := core.ScoreToScale(score, threshold)
+
+		scored = append(scored, ArticleResponse{
+			Title:  article.Title,
+			URL:    article.URL,
+			Source: article.Source,
+			Rating: rating,
+			Score:  score,
+		})
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	w.Header().Set("Cache-Control", "no-cache, no-store, must-revalidate")
+
+	if err := json.NewEncoder(w).Encode(map[string]interface{}{
+		"total":      len(articles),
+		"threshold":  threshold,
+		"updated_at": resultsTime,
+		"articles":   scored,
+	}); err != nil {
+		http.Error(w, "Failed to encode response", http.StatusInternalServerError)
+	}
+}
+
+func (c *ServeCommand) handleFilteredRSS(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	c.resultsMu.RLock()
+	articles := c.filteredResults
+	c.resultsMu.RUnlock()
+
+	c.modelMu.RLock()
+	model := c.model
+	c.modelMu.RUnlock()
+
+	w.Header().Set("Content-Type", "application/rss+xml")
+	w.Header().Set("Cache-Control", "public, max-age=3600")
+
+	// Generate RSS feed
+	fmt.Fprintf(w, `<?xml version="1.0" encoding="UTF-8"?>
+	<rss version="2.0">
+	<channel>
+	<title>%s - Filtered Articles</title>
+	<link>http://scholscan.local</link>
+	<description>Articles filtered by your learned preferences (scored 1-10)</description>
+	`, displayTitle(c.Title))
+
+	for _, article := range articles {
+		rawScore := 0.0
+		if article.Score != nil {
+			rawScore = *article.Score
+		}
+
+		threshold, _ := c.getThreshold(model)
+		scaledScore := core.ScoreToScale(rawScore, threshold)
+
+		title := escapeXML(article.Title)
+		url := escapeXML(article.URL)
+		description := fmt.Sprintf("SCHOLSCAN SCORE = %d/10 (raw: %.3f)", scaledScore, rawScore)
+
+		fmt.Fprintf(w, `    <item>
+		<title>%s</title>
+		<link>%s</link>
+		<description>%s</description>
+		</item>
+		`, title, url, description)
+	}
+
+	fmt.Fprint(w, `  </channel>
+	</rss>`)
+}
+
+func (c *ServeCommand) handleHealth(w http.ResponseWriter, r *http.Request) {
+	if r.Method != http.MethodGet {
+		http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
+		return
+	}
+
+	c.modelMu.RLock()
+	modelLoaded := c.model != nil
+	c.modelMu.RUnlock()
+
+	status := "ok"
+	if !modelLoaded {
+		status = "model_not_loaded"
+		w.WriteHeader(http.StatusInternalServerError)
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	if err := json.NewEncoder(w).Encode(map[string]interface{}{
+		"status":       status,
+		"model_loaded": modelLoaded,
+		"timestamp":    time.Now().Unix(),
+	}); err != nil {
+		http.Error(w, "Failed to encode response", http.StatusInternalServerError)
+	}
+}
+
+// ============================================================================
+// ╻ ╻┏━╸╻  ┏━┓┏━╸┏━┓┏━┓
+// ┣━┫┣╸ ┃  ┣━┛┣╸ ┣┳┛┗━┓
+// ╹ ╹┗━╸┗━╸╹  ┗━╸╹┗╸┗━┛
+// ============================================================================
+
+func displayTitle(custom string) string {
+	if custom != "" {
+		return custom
+	}
+	return "ScholScan"
+}
+
+// extractTitleFromURL fetches the content from a URL and extracts the title from the HTML.
+// Designed to be resilient: tries multiple title sources, handles various URL formats,
+// and provides meaningful error feedback if extraction fails.
+func extractTitleFromURL(rawURL string) (string, error) {
+	if rawURL == "" {
+		return "", fmt.Errorf("empty URL")
+	}
+
+	// Check if it's a DOI
+	if strings.HasPrefix(rawURL, "10.") {
+		// Convert DOI to URL
+		rawURL = fmt.Sprintf("https://doi.org/%s", rawURL)
+	} else if !strings.HasPrefix(rawURL, "http://") && !strings.HasPrefix(rawURL, "https://") {
+		rawURL = "https://" + rawURL
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), core.DefaultContextTimeout)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(ctx, "GET", rawURL, nil)
+	if err != nil {
+		return "", fmt.Errorf("invalid URL: %w", err)
+	}
+	req.Header.Set("User-Agent", core.PoliteUserAgent)
+	req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
+
+	resp, err := core.DoRequestWithRetry(ctx, core.DefaultHTTPClient, req)
+	if err != nil {
+		return "", fmt.Errorf("failed to fetch URL: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != http.StatusOK {
+		return "", fmt.Errorf("HTTP %d: %s", resp.StatusCode, resp.Status)
+	}
+
+	doc, err := goquery.NewDocumentFromReader(resp.Body)
+	if err != nil {
+		return "", fmt.Errorf("failed to parse HTML: %w", err)
+	}
+
+	// Fallback chain: <title> → og:title → twitter:title → <h1>
+	// Different sites populate these differently; trying multiple increases success rate.
+	title := ""
+
+	if t := doc.Find("title").Text(); t != "" {
+		title = strings.TrimSpace(t)
+	}
+
+	if title == "" {
+		if t, exists := doc.Find(`meta[property="og:title"]`).Attr("content"); exists && t != "" {
+			title = strings.TrimSpace(t)
+		}
+	}
+
+	if title == "" {
+		if t, exists := doc.Find(`meta[name="twitter:title"]`).Attr("content"); exists && t != "" {
+			title = strings.TrimSpace(t)
+		}
+	}
+
+	if title == "" {
+		if t := doc.Find("h1").First().Text(); t != "" {
+			title = strings.TrimSpace(t)
+		}
+	}
+
+	if title == "" {
+		return "", fmt.Errorf("could not extract title from page")
+	}
+
+	// Clean up common title patterns
+	reClean := regexp.MustCompile(`\s*\|\s*`)
+	title = reClean.ReplaceAllString(title, "")
+
+	rePub := regexp.MustCompile(`^[^|]*\|\s*`)
+	title = rePub.ReplaceAllString(title, "")
+	title = strings.TrimSpace(title)
+
+	if len(title) < 10 {
+		return "", fmt.Errorf("extracted title too short: %q", title)
+	}
+
+	return title, nil
+}
+
+// validateTitle checks that a title is suitable for scoring.
+// Returns an error message string if invalid, empty string if valid.
+func (c *ServeCommand) validateTitle(title string) string {
+	if strings.TrimSpace(title) == "" {
+		return "Title cannot be empty"
+	}
+	if len(title) > 1000 {
+		return "Title too long (max 1000 characters)"
+	}
+	return ""
+}
+
+// renderResultsError renders the results template with an error message.
+func (c *ServeCommand) renderResultsError(w http.ResponseWriter, errMsg, title string) {
+	data := map[string]interface{}{
+		"Page":          "tools",
+		"IsScoreResult": true,
+		"Error":         errMsg,
+		"Title":         title,
+		"PageTitle":     displayTitle(c.Title),
+	}
+	w.Header().Set("Content-Type", "text/html; charset=utf-8")
+	if err := c.tmpl.ExecuteTemplate(w, "results", data); err != nil {
+		http.Error(w, fmt.Sprintf("Template error: %v", err), http.StatusInternalServerError)
+	}
+}
+
+// validateFeedURL checks that a feed URL is non-empty and has valid format.
+// Returns an error message string if invalid, empty string if valid.
+func (c *ServeCommand) validateFeedURL(feedURL string) string {
+	if feedURL == "" {
+		return "Feed URL cannot be empty"
+	}
+	if _, err := url.Parse(feedURL); err != nil {
+		return "Invalid URL format"
+	}
+	return ""
+}
+
+// renderScanResultsError renders the results template with an error for scan operation.
+func (c *ServeCommand) renderScanResultsError(w http.ResponseWriter, errMsg, feedURL string) {
+	data := map[string]interface{}{
+		"Page":         "tools",
+		"IsScanResult": true,
+		"Error":        errMsg,
+		"FeedURL":      feedURL,
+		"PageTitle":    displayTitle(c.Title),
+	}
+	w.Header().Set("Content-Type", "text/html; charset=utf-8")
+	if err := c.tmpl.ExecuteTemplate(w, "results", data); err != nil {
+		http.Error(w, fmt.Sprintf("Template error: %v", err), http.StatusInternalServerError)
+	}
+}
+
+func escapeXML(s string) string {
+	s = strings.ReplaceAll(s, "&", "&amp;")
+	s = strings.ReplaceAll(s, "<", "&lt;")
+	s = strings.ReplaceAll(s, ">", "&gt;")
+	s = strings.ReplaceAll(s, "\"", "&quot;")
+	s = strings.ReplaceAll(s, "'", "&apos;")
+	return s
+}
diff --git a/cmds/templates/live-feed.html b/cmds/templates/live-feed.html
new file mode 100644
index 0000000..1529ee1
--- /dev/null
+++ b/cmds/templates/live-feed.html
@@ -0,0 +1,158 @@
+{{define "live-feed"}}
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{{.Title}} - Live Feed</title>
+    <style>
+        /* ========================================
+           BASE STYLE
+           ======================================== */
+        * { margin: 0; padding: 0; box-sizing: border-box; }
+        body { 
+            font-family: monospace; 
+            background: #fff; 
+            color: #000; 
+            padding: 20px; 
+            line-height: 1.6; 
+        }
+        h1 { 
+            font-size: 1.2em; 
+            font-weight: bold; 
+            margin-bottom: 20px; 
+        }
+
+        /* ========================================
+           NAV           (live-feed | score-scan)
+           ======================================== */
+        .nav { 
+            margin-bottom: 30px; 
+            display: flex; 
+            gap: 30px; 
+            border-bottom: 1px solid #000; 
+            padding-bottom: 10px; 
+        }
+        .nav a { 
+            text-decoration: none; 
+            color: #000; 
+            font-family: monospace; 
+        }
+        .nav a.active { 
+            border-bottom: 2px solid #000; 
+            padding-bottom: 5px; 
+        }
+
+        /* ========================================
+           ARTICLE LIST
+           ======================================== */
+        .article { 
+            margin-bottom: 15px; 
+            padding: 10px; 
+            border: 1px solid #ccc; 
+        }
+        .article a { 
+            color: #00f; 
+            text-decoration: underline; 
+        }
+        .article-meta { 
+            margin-top: 8px; 
+            color: #666; 
+            font-size: 0.9em; 
+        }
+
+        /* ========================================
+           ARTICLE LIST STUFF
+           ======================================== */
+        .summary { 
+            margin-bottom: 15px; 
+            padding: 10px; 
+            border: 1px solid #000; 
+            background: #f9f9f9; 
+        }
+        .rss-link { 
+            background: #f9f9f9; 
+            padding: 15px; 
+            border: 1px solid #000; 
+            margin-bottom: 20px; 
+        }
+        .rss-link a { 
+            color: #00f; 
+            text-decoration: underline; 
+        }
+        .feed-list { 
+            max-height: 600px; 
+            overflow-y: auto; 
+            border: 1px solid #000; 
+            padding: 10px; 
+        }
+
+        .error { 
+            color: #f00; 
+            margin-top: 10px; 
+            padding: 10px; 
+            border: 1px solid #f00; 
+        }
+    </style>
+</head>
+<body>
+    <h1><a href="/live-feed" style="color: inherit; text-decoration: none;">{{.Title}}</a></h1>
+    <div class="nav">
+        <a href="/live-feed" class="active">Live Feed</a>
+        <a href="/tools">Score & Scan</a>
+    </div>
+
+    <div class="rss-link">
+        <strong>Filtered RSS Feed:</strong> 
+        <a href="/api/filtered/rss" target="_blank">Subscribe to filtered articles</a>
+        <span style="margin-left: 10px; color: #666; font-size: 0.9em;">(rss link for feed readers)</span>
+        <div style="margin-top: 10px; padding-top: 10px; border-top: 1px solid #ccc; color: #666; font-size: 0.9em;">
+            Last updated: <span id="feedTimestamp">{{if .UpdatedAt}}{{.UpdatedAt}}{{else}}—{{end}}</span>
+        </div>
+    </div>
+
+    <div style="margin-bottom: 20px;">
+        <strong>Filter by date:</strong>
+        <div style="margin-top: 8px; display: flex; gap: 10px;">
+            <a href="/live-feed?filter=day" style="padding: 6px 12px; text-decoration: none; {{if eq .Filter "day"}}background: #000; color: #fff;{{else}}border: 1px solid #000; color: #000;{{end}}">Last 24h</a>
+            <a href="/live-feed?filter=week" style="padding: 6px 12px; text-decoration: none; {{if eq .Filter "week"}}background: #000; color: #fff;{{else}}border: 1px solid #000; color: #000;{{end}}">Last 7 days</a>
+            <a href="/live-feed?filter=all" style="padding: 6px 12px; text-decoration: none; {{if eq .Filter "all"}}background: #000; color: #fff;{{else}}border: 1px solid #000; color: #000;{{end}}">All</a>
+        </div>
+    </div>
+
+    <div class="feed-list">
+        {{if .Error}}
+            <div class="error">{{.Error}}</div>
+        {{else if .Articles}}
+            <div class="summary">
+                <strong>{{len .Articles}}</strong> articles (threshold: {{printf "%.2f" .Threshold}})
+            </div>
+            {{$threshold := .Threshold}}
+            {{range .Articles}}
+                {{$isGood := ge .Score $threshold}}
+                {{$bgColor := "white"}}
+                {{if $isGood}}
+                    {{$bgColor = "#e8f5e9"}}
+                {{else}}
+                    {{$bgColor = "#ffebee"}}
+                {{end}}
+                {{$indicator := "✗"}}
+                {{if $isGood}}
+                    {{$indicator = "✓"}}
+                {{end}}
+                <div class="article" style="background-color: {{$bgColor}};">
+                    <div style="font-weight: bold;">
+                        <a href="{{.URL}}" target="_blank">{{.Title}}</a>
+                    </div>
+                    <div class="article-meta">
+                        Rating: {{$indicator}} {{.Rating}}/10 (raw: {{printf "%.3f" .Score}}) · {{.Source}}{{if .PublishedAt}} · {{.PublishedAt}}{{end}}
+                    </div>
+                </div>
+            {{end}}
+        {{else}}
+            <p>No articles to display</p>
+        {{end}}
+    </div>
+</body>
+</html>
+{{end}}
diff --git a/cmds/templates/results.html b/cmds/templates/results.html
new file mode 100644
index 0000000..13f68e0
--- /dev/null
+++ b/cmds/templates/results.html
@@ -0,0 +1,279 @@
+{{define "results"}}
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{{.PageTitle}} - Results</title>
+    <style>
+        /* ========================================
+           BASE STYLE
+           ======================================== */
+        * { margin: 0; padding: 0; box-sizing: border-box; }
+        body { 
+            font-family: monospace; 
+            background: #fff; 
+            color: #000; 
+            padding: 20px; 
+            line-height: 1.6; 
+        }
+        h1 { 
+            font-size: 1.2em; 
+            font-weight: bold; 
+            margin-bottom: 20px; 
+        }
+        h2 { 
+            font-size: 1em; 
+            font-weight: bold; 
+            margin-bottom: 15px; 
+            border-bottom: 1px solid #000; 
+            padding-bottom: 10px; 
+        }
+
+        /* ========================================
+           NAV           (live-feed | score-scan)
+           ======================================== */
+        .nav { 
+            margin-bottom: 30px; 
+            display: flex; 
+            gap: 30px; 
+            border-bottom: 1px solid #000; 
+            padding-bottom: 10px; 
+        }
+        .nav a { 
+            text-decoration: none; 
+            color: #000; 
+            font-family: monospace; 
+        }
+        .nav a.active { 
+            border-bottom: 2px solid #000; 
+            padding-bottom: 5px; 
+        }
+
+        /* ========================================
+           LAYOUT  (2-column grid for score-scan)
+           ======================================== */
+        .container { 
+            max-width: 1200px; 
+            margin: 0 auto; 
+            display: grid; 
+            grid-template-columns: 1fr 1fr; 
+            gap: 30px; 
+        }
+        .section { 
+            border: 1px solid #000; 
+            padding: 20px; 
+        }
+
+        /* ========================================
+           FORMS          (input, textarea, button)
+           ======================================== */
+        label { 
+            display: block; 
+            margin-top: 15px; 
+            font-weight: bold; 
+        }
+        input, textarea { 
+            display: block; 
+            width: 100%; 
+            margin-top: 5px; 
+            padding: 5px; 
+            border: 1px solid #000; 
+            font-family: monospace; 
+        }
+        textarea { 
+            resize: vertical; 
+            min-height: 80px; 
+        }
+        button { 
+            margin-top: 15px; 
+            padding: 5px 15px; 
+            border: 1px solid #000; 
+            background: #fff; 
+            cursor: pointer; 
+            font-family: monospace; 
+        }
+        button:hover { 
+            background: #000; 
+            color: #fff; 
+        }
+        button:active { 
+            opacity: 0.8; 
+        }
+
+        /* ========================================
+           RESULT BOXES
+           ======================================== */
+        .result { 
+            margin-top: 20px; 
+            padding: 15px; 
+            border: 1px solid #000; 
+            background: #f5f5f5; 
+        }
+        .score { 
+            font-size: 3em; 
+            font-weight: bold; 
+            text-align: center; 
+            margin: 20px 0; 
+        }
+        .error { 
+            color: #f00; 
+            margin-top: 10px; 
+            padding: 10px; 
+            border: 1px solid #f00; 
+        }
+
+        /* ========================================
+           ARTICLE LIST
+           ======================================== */
+        .article { 
+            margin-bottom: 15px; 
+            padding: 10px; 
+            border: 1px solid #ccc; 
+        }
+        .article a { 
+            color: #00f; 
+            text-decoration: underline; 
+        }
+        .article-meta { 
+            margin-top: 8px; 
+            color: #666; 
+            font-size: 0.9em; 
+        }
+
+        /* ========================================
+           ARTICLE LIST STUFF
+           ======================================== */
+        .summary { 
+            margin-bottom: 15px; 
+            padding: 10px; 
+            border: 1px solid #000; 
+            background: #f9f9f9; 
+        }
+
+        small { 
+            display: block; 
+            margin-top: 5px; 
+            color: #666; 
+        }
+
+        /* ========================================
+           MOBILE
+           ======================================== */
+        @media (max-width: 960px) { 
+            .container { 
+                grid-template-columns: 1fr; 
+                gap: 20px; 
+            } 
+        }
+    </style>
+</head>
+<body>
+    <h1><a href="/live-feed" style="color: inherit; text-decoration: none;">{{.PageTitle}}</a></h1>
+    <div class="nav">
+        <a href="/live-feed">Live Feed</a>
+        <a href="/tools" class="active">Score & Scan</a>
+    </div>
+
+    <div class="container">
+        {{if .IsScoreResult}}
+        <div class="section">
+            <h2>Score Article</h2>
+            {{if .Error}}
+                <div class="error">{{.Error}}</div>
+                <form method="POST" action="/score" style="margin-top: 20px;">
+                    <label for="scoreTitle">Title:</label>
+                    <input type="text" id="scoreTitle" name="title" placeholder="Enter article title" value="{{.Title}}" />
+                    <label for="scoreURL">URL or DOI:</label>
+                    <input type="text" id="scoreURL" name="url" placeholder="https://example.com/article or 10.xxxx/doi" />
+                    <small>If URL is provided, title will be automatically extracted</small>
+                    <button type="submit">Score</button>
+                </form>
+            {{else}}
+                <div class="result">
+                    <div class="score">{{.Rating}}/10</div>
+                    <p style="text-align: center; color: #666;">Score: {{printf "%.3f" .Score}}</p>
+                    <p style="text-align: center; margin-top: 10px; font-size: 0.9em;">{{.Title}}</p>
+                </div>
+                <form method="POST" action="/score" style="margin-top: 20px;">
+                    <label for="scoreTitle">Title:</label>
+                    <input type="text" id="scoreTitle" name="title" placeholder="Enter article title" />
+                    <label for="scoreURL">URL or DOI:</label>
+                    <input type="text" id="scoreURL" name="url" placeholder="https://example.com/article or 10.xxxx/doi" />
+                    <small>If URL is provided, title will be automatically extracted</small>
+                    <button type="submit">Score Another</button>
+                </form>
+            {{end}}
+        </div>
+
+        <div class="section">
+            <h2>Scan Feed</h2>
+            <form method="POST" action="/scan">
+                <label for="feedURL">RSS Feed URL:</label>
+                <input type="text" id="feedURL" name="feed_url" placeholder="https://example.com/rss.xml" required />
+                <button type="submit">Scan</button>
+            </form>
+        </div>
+
+        {{else if .IsScanResult}}
+        <div class="section">
+            <h2>Score Article</h2>
+            <form method="POST" action="/score">
+                <label for="scoreTitle">Title:</label>
+                <input type="text" id="scoreTitle" name="title" placeholder="Enter article title" />
+                <label for="scoreURL">URL or DOI:</label>
+                <input type="text" id="scoreURL" name="url" placeholder="https://example.com/article or 10.xxxx/doi" />
+                <small>If URL is provided, title will be automatically extracted</small>
+                <button type="submit">Score</button>
+            </form>
+        </div>
+
+        <div class="section">
+            <h2>Scan Feed</h2>
+            {{if .Error}}
+                <div class="error">{{.Error}}</div>
+                <form method="POST" action="/scan" style="margin-top: 20px;">
+                    <label for="feedURL">RSS Feed URL:</label>
+                    <input type="text" id="feedURL" name="feed_url" placeholder="https://example.com/rss.xml" value="{{.FeedURL}}" required />
+                    <button type="submit">Try Again</button>
+                </form>
+            {{else}}
+                <div class="summary">
+                    <strong>{{len .Articles}}</strong> articles from {{.FeedURL}} (threshold: {{printf "%.2f" .Threshold}})
+                </div>
+                <div style="max-height: 500px; overflow-y: auto; border: 1px solid #ccc; padding: 10px;">
+                    {{$threshold := .Threshold}}
+                    {{range .Articles}}
+                        {{$isGood := ge .Score $threshold}}
+                        {{$bgColor := "white"}}
+                        {{if $isGood}}
+                            {{$bgColor = "#e8f5e9"}}
+                        {{else}}
+                            {{$bgColor = "#ffebee"}}
+                        {{end}}
+                        {{$indicator := "✗"}}
+                        {{if $isGood}}
+                            {{$indicator = "✓"}}
+                        {{end}}
+                        <div class="article" style="background-color: {{$bgColor}};">
+                            <div style="font-weight: bold;">
+                                <a href="{{.URL}}" target="_blank">{{.Title}}</a>
+                            </div>
+                            <div class="article-meta">
+                                Rating: {{$indicator}} {{.Rating}}/10 (raw: {{printf "%.3f" .Score}}) · {{.Source}}
+                            </div>
+                        </div>
+                    {{end}}
+                </div>
+                <form method="POST" action="/scan" style="margin-top: 20px;">
+                    <label for="feedURL">RSS Feed URL:</label>
+                    <input type="text" id="feedURL" name="feed_url" placeholder="https://example.com/rss.xml" required />
+                    <button type="submit">Scan Another</button>
+                </form>
+            {{end}}
+        </div>
+        {{end}}
+    </div>
+</body>
+</html>
+{{end}}
diff --git a/cmds/templates/tools.html b/cmds/templates/tools.html
new file mode 100644
index 0000000..def04fe
--- /dev/null
+++ b/cmds/templates/tools.html
@@ -0,0 +1,202 @@
+{{define "tools"}}
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{{.Title}} - Score & Scan</title>
+    <style>
+        /* ========================================
+           BASE STYLE
+           ======================================== */
+        * { margin: 0; padding: 0; box-sizing: border-box; }
+        body { 
+            font-family: monospace; 
+            background: #fff; 
+            color: #000; 
+            padding: 20px; 
+            line-height: 1.6; 
+        }
+        h1 { 
+            font-size: 1.2em; 
+            font-weight: bold; 
+            margin-bottom: 20px; 
+        }
+        h2 { 
+            font-size: 1em; 
+            font-weight: bold; 
+            margin-bottom: 15px; 
+            border-bottom: 1px solid #000; 
+            padding-bottom: 10px; 
+        }
+
+        /* ========================================
+           NAV           (live-feed | score-scan)
+           ======================================== */
+        .nav { 
+            margin-bottom: 30px; 
+            display: flex; 
+            gap: 30px; 
+            border-bottom: 1px solid #000; 
+            padding-bottom: 10px; 
+        }
+        .nav a { 
+            text-decoration: none; 
+            color: #000; 
+            font-family: monospace; 
+        }
+        .nav a.active { 
+            border-bottom: 2px solid #000; 
+            padding-bottom: 5px; 
+        }
+
+        /* ========================================
+           LAYOUT  (2-column grid for score-scan)
+           ======================================== */
+        .container { 
+            max-width: 1200px; 
+            margin: 0 auto; 
+            display: grid; 
+            grid-template-columns: 1fr 1fr; 
+            gap: 30px; 
+        }
+        .section { 
+            border: 1px solid #000; 
+            padding: 20px; 
+        }
+
+        /* ========================================
+           FORMS          (input, textarea, button)
+           ======================================== */
+        label { 
+            display: block; 
+            margin-top: 15px; 
+            font-weight: bold; 
+        }
+        input, textarea { 
+            display: block; 
+            width: 100%; 
+            margin-top: 5px; 
+            padding: 5px; 
+            border: 1px solid #000; 
+            font-family: monospace; 
+        }
+        textarea { 
+            resize: vertical; 
+            min-height: 80px; 
+        }
+        button { 
+            margin-top: 15px; 
+            padding: 5px 15px; 
+            border: 1px solid #000; 
+            background: #fff; 
+            cursor: pointer; 
+            font-family: monospace; 
+        }
+        button:hover { 
+            background: #000; 
+            color: #fff; 
+        }
+        button:active { 
+            opacity: 0.8; 
+        }
+
+        /* ========================================
+           RESULT BOXES
+           ======================================== */
+        .result { 
+            margin-top: 20px; 
+            padding: 15px; 
+            border: 1px solid #000; 
+            background: #f5f5f5; 
+        }
+        .score { 
+            font-size: 3em; 
+            font-weight: bold; 
+            text-align: center; 
+            margin: 20px 0; 
+        }
+        .error { 
+            color: #f00; 
+            margin-top: 10px; 
+            padding: 10px; 
+            border: 1px solid #f00; 
+        }
+
+        /* ========================================
+           ARTICLE LIST
+           ======================================== */
+        .article { 
+            margin-bottom: 15px; 
+            padding: 10px; 
+            border: 1px solid #ccc; 
+        }
+        .article a { 
+            color: #00f; 
+            text-decoration: underline; 
+        }
+        .article-meta { 
+            margin-top: 8px; 
+            color: #666; 
+            font-size: 0.9em; 
+        }
+
+        /* ========================================
+           ARTICLE LIST STUFF
+           ======================================== */
+        .summary { 
+            margin-bottom: 15px; 
+            padding: 10px; 
+            border: 1px solid #000; 
+            background: #f9f9f9; 
+        }
+
+        small { 
+            display: block; 
+            margin-top: 5px; 
+            color: #666; 
+        }
+
+        /* ========================================
+           MOBILE
+           ======================================== */
+        @media (max-width: 960px) { 
+            .container { 
+                grid-template-columns: 1fr; 
+                gap: 20px; 
+            } 
+        }
+    </style>
+</head>
+<body>
+    <h1><a href="/live-feed" style="color: inherit; text-decoration: none;">{{.Title}}</a></h1>
+    <div class="nav">
+        <a href="/live-feed">Live Feed</a>
+        <a href="/tools" class="active">Score & Scan</a>
+    </div>
+
+    <div class="container">
+        <div class="section">
+            <h2>Score Article</h2>
+            <form method="POST" action="/score">
+                <label for="scoreTitle">Title:</label>
+                <input type="text" id="scoreTitle" name="title" placeholder="Enter article title" />
+                <label for="scoreURL" style="margin-top: 10px;">URL or DOI:</label>
+                <input type="text" id="scoreURL" name="url" placeholder="https://example.com/article or 10.xxxx/doi" />
+                <small>If URL is provided, title will be automatically extracted</small>
+                <button type="submit">Score</button>
+            </form>
+        </div>
+
+        <div class="section">
+            <h2>Scan Feed</h2>
+            <form method="POST" action="/scan">
+                <label for="feedURL">RSS Feed URL:</label>
+                <input type="text" id="feedURL" name="feed_url" placeholder="https://example.com/rss.xml" required />
+                <button type="submit">Scan</button>
+            </form>
+        </div>
+    </div>
+</body>
+</html>
+{{end}}
diff --git a/cmds/train.go b/cmds/train.go
new file mode 100644
index 0000000..e7e8915
--- /dev/null
+++ b/cmds/train.go
@@ -0,0 +1,841 @@
+// Train command learns model from positive examples and RSS feeds.
+// Loads positives, fetches RSS feeds as negatives, excludes overlap,
+// trains TF-IDF + logistic regression with 1:1 class balancing.
+// Outputs model with validation threshold to stdout.
+package cmds
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"io"
+	"log"
+	"math"
+	"math/rand"
+	"net/http"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"github.com/mmcdole/gofeed"
+	"scholscan/core"
+)
+
+// ============================================================================
+// ┏━╸┏┳┓╺┳┓   ┏━┓┏┓  ┏┓
+// ┃  ┃┃┃ ┃┃   ┃ ┃┣┻┓  ┃
+// ┗━╸╹ ╹╺┻┛   ┗━┛┗━┛┗━┛
+// ============================================================================
+
+// Learns model from positive examples and RSS feeds
+// Outputs trained model JSON to stdout
+type TrainCommand struct {
+	positivesFile string
+	rssFeedsFile  string
+	verboseOutput bool
+	lambda        float64
+	minDF         int
+	maxDF         float64
+	ngramMax      int
+}
+
+func (c *TrainCommand) Name() string { return "train" }
+
+func (c *TrainCommand) Init(args []string) error {
+	fs := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
+	fs.Usage = func() {
+		fmt.Fprint(fs.Output(), `Usage: scholscan train POSITIVES_FILE --rss-feeds RSS_FEEDS_FILE > model.json
+
+Train a TF-IDF + logistic regression model from positive examples and RSS feeds.
+
+The training workflow:
+  1. Load positive examples from POSITIVES_FILE
+  2. Fetch articles from RSS feeds list
+  3. Exclude any positive examples from RSS feed articles
+  4. Train model with balanced classes
+  5. Output trained model to stdout as JSON
+
+Flags:
+`)
+		fs.PrintDefaults()
+		fmt.Fprint(fs.Output(), `
+Arguments:
+  POSITIVES_FILE      Path to JSONL file with positive examples (required)
+
+Example:
+  scholscan train positives.jsonl --rss-feeds rss_world.txt > model.json
+`)
+	}
+
+	fs.StringVar(&c.rssFeedsFile, "rss-feeds", "", "Path to text file with RSS feed URLs (required)")
+	fs.BoolVar(&c.verboseOutput, "verbose", false, "Show progress information")
+	fs.Float64Var(&c.lambda, "lambda", 0.001, "L2 regularization parameter for logistic regression")
+	fs.IntVar(&c.minDF, "min-df", 2, "Minimum document frequency (absolute count)")
+	fs.Float64Var(&c.maxDF, "max-df", 0.8, "Maximum document frequency (ratio, 0-1)")
+	fs.IntVar(&c.ngramMax, "ngram-max", 2, "Maximum n-gram size (e.g., 1=unigrams, 2=unigrams+bigrams)")
+
+	// Check for help flag first
+	for _, arg := range args {
+		if arg == "--help" || arg == "-h" {
+			fs.Usage()
+			return flag.ErrHelp
+		}
+	}
+
+	// Extract positional argument (POSITIVES_FILE) before parsing flags
+	if len(args) == 0 {
+		return fmt.Errorf("POSITIVES_FILE argument is required")
+	}
+	// The first argument should be the positives file, the rest are flags
+	c.positivesFile = args[0]
+	flagArgs := args[1:]
+
+	if err := fs.Parse(flagArgs); err != nil {
+		return err
+	}
+
+	if c.rssFeedsFile == "" {
+		return fmt.Errorf("--rss-feeds flag is required")
+	}
+
+	// Validate paths are safe (prevent directory traversal)
+	if strings.Contains(filepath.Clean(c.positivesFile), "..") {
+		return fmt.Errorf("invalid positives file path: directory traversal not allowed")
+	}
+	if strings.Contains(filepath.Clean(c.rssFeedsFile), "..") {
+		return fmt.Errorf("invalid RSS feeds file path: directory traversal not allowed")
+	}
+
+	return nil
+}
+
+func (c *TrainCommand) Run(stdin io.Reader, stdout io.Writer) error {
+	if c.verboseOutput {
+		log.SetOutput(os.Stderr)
+		log.Println("Starting training workflow...")
+		log.Printf("Positives: %s", c.positivesFile)
+		log.Printf("RSS feeds: %s", c.rssFeedsFile)
+	}
+
+	if c.verboseOutput {
+		log.Printf("Loading positives from %s...", c.positivesFile)
+	}
+	positives, err := c.loadArticles(c.positivesFile)
+	if err != nil {
+		return fmt.Errorf("failed to load positives: %w", err)
+	}
+	if c.verboseOutput {
+		log.Printf("Loaded %d positive examples", len(positives))
+	}
+
+	if c.verboseOutput {
+		log.Printf("Loading RSS feeds from %s...", c.rssFeedsFile)
+	}
+	rssURLs, err := c.loadRSSURLs(c.rssFeedsFile)
+	if err != nil {
+		return fmt.Errorf("failed to load RSS feeds: %w", err)
+	}
+	if c.verboseOutput {
+		log.Printf("Found %d RSS feeds to fetch", len(rssURLs))
+	}
+
+	negatives, err := c.fetchFromRSSFeeds(rssURLs)
+	if err != nil {
+		return fmt.Errorf("failed to fetch from RSS feeds: %w", err)
+	}
+	if c.verboseOutput {
+		log.Printf("Fetched %d articles from RSS feeds", len(negatives))
+	}
+
+	negatives = c.excludePositives(negatives, positives)
+	if c.verboseOutput {
+		log.Printf("After exclusion: %d negative examples", len(negatives))
+	}
+
+	if len(positives) == 0 || len(negatives) == 0 {
+		return fmt.Errorf("need both positive (%d) and negative (%d) examples for training", len(positives), len(negatives))
+	}
+
+	if c.verboseOutput {
+		log.Println("Training model...")
+	}
+	model, err := c.trainModel(positives, negatives)
+	if err != nil {
+		return fmt.Errorf("failed to train model: %w", err)
+	}
+
+	// Output model
+	encoder := json.NewEncoder(stdout)
+	encoder.SetIndent("", " ")
+	if err := encoder.Encode(model); err != nil {
+		return fmt.Errorf("failed to write model: %w", err)
+	}
+
+	return nil
+}
+
+// ============================================================================
+// ╺┳┓┏━┓╺┳╸┏━┓   ╻  ┏━┓┏━┓╺┳┓╻┏┓╻┏━╸
+//  ┃┃┣━┫ ┃ ┣━┫   ┃  ┃ ┃┣━┫ ┃┃┃┃┗┫┃╺┓
+// ╺┻┛╹ ╹ ╹ ╹ ╹   ┗━╸┗━┛╹ ╹╺┻┛╹╹ ╹┗━┛
+// ============================================================================
+
+func (c *TrainCommand) loadArticles(filename string) ([]*core.Article, error) {
+	file, err := os.Open(filename)
+	if err != nil {
+		return nil, err
+	}
+	defer file.Close()
+
+	var articles []*core.Article
+	decoder := json.NewDecoder(file)
+	lineCount := 0
+	for {
+		var article core.Article
+		if err := decoder.Decode(&article); err != nil {
+			if err == io.EOF {
+				break
+			}
+			// Skip malformed json lines, don't fail on bad input.
+			lineCount++
+			continue
+		}
+		articles = append(articles, &article)
+		lineCount++
+		if lineCount%500 == 0 && c.verboseOutput {
+			log.Printf("  Loaded %d articles so far", len(articles))
+		}
+	}
+	return articles, nil
+}
+
+// loadRSSURLs loads RSS feed URLs from a text file
+func (c *TrainCommand) loadRSSURLs(filename string) ([]string, error) {
+	file, err := os.Open(filename)
+	if err != nil {
+		return nil, err
+	}
+	defer file.Close()
+
+	var urls []string
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		line := strings.TrimSpace(scanner.Text())
+		if line != "" && !strings.HasPrefix(line, "#") {
+			urls = append(urls, line)
+		}
+	}
+	return urls, scanner.Err()
+}
+
+// fetchFromRSSFeeds fetches articles from multiple RSS feeds in parallel
+func (c *TrainCommand) fetchFromRSSFeeds(rssURLs []string) ([]*core.Article, error) {
+	client := core.DefaultHTTPClient
+	type result struct {
+		url      string
+		articles []*core.Article
+		err      error
+	}
+	resultChan := make(chan result, len(rssURLs))
+
+	for _, rssURL := range rssURLs {
+		go func(url string) {
+			articles, err := c.fetchRSSFeed(client, url)
+			resultChan <- result{url: url, articles: articles, err: err}
+		}(rssURL)
+	}
+
+	var allArticles []*core.Article
+	for i := 0; i < len(rssURLs); i++ {
+		res := <-resultChan
+		if res.err != nil {
+			if c.verboseOutput {
+				log.Printf("%s: failed to fetch", shortURL(res.url))
+			}
+		} else {
+			if c.verboseOutput {
+				log.Printf("%s: %d articles", shortURL(res.url), len(res.articles))
+			}
+			allArticles = append(allArticles, res.articles...)
+		}
+	}
+
+	return allArticles, nil
+}
+
+// ParseRSSFeed parses an RSS/Atom feed from the provided body into a slice of Articles.
+func ParseRSSFeed(body []byte, baseURL string) ([]*core.Article, error) {
+	fp := gofeed.NewParser()
+	feed, err := fp.Parse(bytes.NewReader(body))
+	if err != nil {
+		return nil, err
+	}
+
+	var articles []*core.Article
+	for _, item := range feed.Items {
+		// Prefer explicit content; fall back to description.
+		content := strings.TrimSpace(item.Content)
+		if content == "" {
+			content = item.Description
+		}
+		// Also check custom content field (for <content> tags in RSS)
+		if content == "" && item.Custom != nil {
+			if c, ok := item.Custom["content"]; ok && c != "" {
+				content = c
+			}
+		}
+
+		// Clean and limit content length
+		content = core.CleanFeedContent(content)
+
+		articles = append(articles, &core.Article{
+			URL:     item.Link,
+			Title:   item.Title,
+			Content: content,
+		})
+	}
+	return articles, nil
+}
+
+// fetchRSSFeed fetches and parses a single RSS feed
+func (c *TrainCommand) fetchRSSFeed(client *http.Client, rssURL string) ([]*core.Article, error) {
+	var body []byte
+	var err error
+
+	// Handle file:// URLs locally
+	if strings.HasPrefix(rssURL, "file://") {
+		// Remove file:// prefix
+		filePath := strings.TrimPrefix(rssURL, "file://")
+		body, err = os.ReadFile(filePath)
+		if err != nil {
+			return nil, fmt.Errorf("error reading file %s: %w", filePath, err)
+		}
+	} else {
+		// Handle HTTP/HTTPS URLs normally
+		req, err := http.NewRequest("GET", rssURL, nil)
+		if err != nil {
+			return nil, fmt.Errorf("error building request: %w", err)
+		}
+		req.Header.Set("User-Agent", core.PoliteUserAgent)
+
+		// Make request with retry logic
+		ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
+		defer cancel()
+
+		resp, err := core.DoRequestWithRetry(ctx, client, req)
+		if err != nil {
+			return nil, fmt.Errorf("error fetching %s: %w", rssURL, err)
+		}
+		defer resp.Body.Close()
+
+		if resp.StatusCode != http.StatusOK {
+			return nil, fmt.Errorf("HTTP %d from %s", resp.StatusCode, rssURL)
+		}
+
+		// Read response body
+		body, err = io.ReadAll(resp.Body)
+		if err != nil {
+			return nil, fmt.Errorf("error reading response from %s: %w", rssURL, err)
+		}
+	}
+
+	// Parse RSS/Atom feed
+	return ParseRSSFeed(body, rssURL)
+}
+
+// ============================================================================
+// ╺┳┓┏━┓╺┳╸┏━┓   ┏━┓┏━┓┏━╸┏━┓
+//  ┃┃┣━┫ ┃ ┣━┫   ┣━┛┣┳┛┣╸ ┣━┛
+// ╺┻┛╹ ╹ ╹ ╹ ╹   ╹  ╹┗╸┗━╸╹
+// ============================================================================
+
+func (c *TrainCommand) excludePositives(negatives, positives []*core.Article) []*core.Article {
+	// Build set of positive URLs for O(1) lookup
+	positiveURLs := make(map[string]bool)
+	for _, pos := range positives {
+		positiveURLs[pos.URL] = true
+	}
+
+	// Filter out positives
+	var filtered []*core.Article
+	for _, neg := range negatives {
+		if !positiveURLs[neg.URL] {
+			filtered = append(filtered, neg)
+		}
+	}
+
+	return filtered
+}
+
+// splitTrainingData performs a deterministic 80/20 split (seed=42).
+// Deterministic ensures reproducible model training across runs.
+func (c *TrainCommand) splitTrainingData(documents []string, labels []float64) (
+	trainDocs, valDocs []string,
+	trainLabels, valLabels []float64,
+) {
+	const validationSplitRatio = 0.2
+	const splitSeed = 42
+
+	if len(documents) < 3 {
+		// Not enough data to split, use all for training.
+		// A split requires at least 2 training documents to avoid MaxDF issues
+		// and at least 1 validation document.
+		return documents, nil, labels, nil
+	}
+
+	// Create a reproducible random source and shuffle indices.
+	rng := rand.New(rand.NewSource(splitSeed))
+	indices := make([]int, len(documents))
+	for i := range indices {
+		indices[i] = i
+	}
+	rng.Shuffle(len(indices), func(i, j int) {
+		indices[i], indices[j] = indices[j], indices[i]
+	})
+
+	splitIndex := int(float64(len(documents)) * (1.0 - validationSplitRatio))
+	trainIndices := indices[:splitIndex]
+	valIndices := indices[splitIndex:]
+
+	trainDocs = make([]string, len(trainIndices))
+	trainLabels = make([]float64, len(trainIndices))
+	for i, idx := range trainIndices {
+		trainDocs[i] = documents[idx]
+		trainLabels[i] = labels[idx]
+	}
+
+	valDocs = make([]string, len(valIndices))
+	valLabels = make([]float64, len(valIndices))
+	for i, idx := range valIndices {
+		valDocs[i] = documents[idx]
+		valLabels[i] = labels[idx]
+	}
+
+	return trainDocs, valDocs, trainLabels, valLabels
+}
+
+// Downsample majority class to 1:1 ratio AFTER vectorizer.Fit() to preserve IDF values.
+func (c *TrainCommand) downsampleToBalance(docs []string, labels []float64) ([]string, []float64) {
+	// Count positives and negatives
+	var posDocs, negDocs []string
+	var posLabels, negLabels []float64
+
+	for i, label := range labels {
+		if label == 1.0 {
+			posDocs = append(posDocs, docs[i])
+			posLabels = append(posLabels, label)
+		} else {
+			negDocs = append(negDocs, docs[i])
+			negLabels = append(negLabels, label)
+		}
+	}
+
+	// If already balanced, return as-is
+	if len(posDocs) == len(negDocs) {
+		return docs, labels
+	}
+
+	// Determine which class is majority
+	var majorityDocs, minorityDocs []string
+	var majorityLabels, minorityLabels []float64
+
+	if len(negDocs) > len(posDocs) {
+		// Negatives are majority
+		majorityDocs, minorityDocs = negDocs, posDocs
+		majorityLabels, minorityLabels = negLabels, posLabels
+	} else {
+		// Positives are majority (unlikely but handle)
+		majorityDocs, minorityDocs = posDocs, negDocs
+		majorityLabels, minorityLabels = posLabels, negLabels
+	}
+
+	// Downsample majority to match minority size
+	minoritySize := len(minorityDocs)
+	rng := rand.New(rand.NewSource(42)) // Use fixed seed for reproducibility
+
+	// Create random indices for downsampling
+	indices := make([]int, len(majorityDocs))
+	for i := range indices {
+		indices[i] = i
+	}
+	rng.Shuffle(len(indices), func(i, j int) {
+		indices[i], indices[j] = indices[j], indices[i]
+	})
+
+	// Select downsampled majority
+	downsampledDocs := make([]string, 0, minoritySize*2)
+	downsampledLabels := make([]float64, 0, minoritySize*2)
+
+	// Add all minority samples
+	downsampledDocs = append(downsampledDocs, minorityDocs...)
+	downsampledLabels = append(downsampledLabels, minorityLabels...)
+
+	// Add downsampled majority
+	for i := 0; i < minoritySize; i++ {
+		idx := indices[i]
+		downsampledDocs = append(downsampledDocs, majorityDocs[idx])
+		downsampledLabels = append(downsampledLabels, majorityLabels[idx])
+	}
+
+	return downsampledDocs, downsampledLabels
+}
+
+// ============================================================================
+// ╺┳╸┏━┓┏━┓╻┏┓╻   ┏┳┓┏━┓╺┳┓┏━╸╻
+//  ┃ ┣┳┛┣━┫┃┃┗┫   ┃┃┃┃ ┃ ┃┃┣╸ ┃
+//  ╹ ╹┗╸╹ ╹╹╹ ╹   ╹ ╹┗━┛╺┻┛┗━╸┗━╸
+// ============================================================================
+
+// trainModel trains a TF-IDF + logistic regression model
+func (c *TrainCommand) trainModel(positives, negatives []*core.Article) (*core.ModelEnvelope, error) {
+	// Combine datasets and create labels
+	var documents []string
+	var labels []float64
+
+	// Process positives
+	for _, article := range positives {
+		// Skip articles with titles that are too short
+		if len(article.Title) < 15 {
+			continue
+		}
+		documents = append(documents, article.Title)
+		labels = append(labels, 1.0)
+	}
+
+	// Process negatives
+	for _, article := range negatives {
+		// Skip articles with titles that are too short
+		if len(article.Title) < 15 {
+			continue
+		}
+		documents = append(documents, article.Title)
+		labels = append(labels, 0.0)
+	}
+
+	// Use parameters from CLI flags (with defaults matching Julia implementation)
+	const vocabCap = 50000
+
+	// Deterministic 80/20 split for train/validation
+	trainDocs, valDocs, trainLabels, valLabels := c.splitTrainingData(documents, labels)
+
+	// Create TF-IDF vectorizer with the specified parameters
+	vectorizer := &core.TFIDFVectorizer{
+		NgramMin:   1,
+		NgramMax:   c.ngramMax,
+		MinDF:      c.minDF,
+		MaxDF:      c.maxDF,
+		VocabCap:   vocabCap,
+		Vocabulary: make(map[string]float64),
+	}
+	// Fit vectorizer on UNBALANCED training data to match Julia implementation
+	// This preserves document frequencies properly
+	vectorizer.Fit(trainDocs)
+
+	// Downsample negatives to 1:1 ratio AFTER fitting (match Julia approach)
+	balancedTrainDocs, balancedTrainLabels := c.downsampleToBalance(trainDocs, trainLabels)
+
+	// Transform both training and validation sets
+	trainVectors := vectorizer.Transform(balancedTrainDocs)
+	valVectors := vectorizer.Transform(valDocs)
+
+	// Use uniform class weights since we've balanced the dataset
+	classWeights := map[float64]float64{
+		1.0: 1.0,
+		0.0: 1.0,
+	}
+
+	// Train logistic regression with the specified lambda parameter
+	lr := &core.LogisticRegression{
+		LearningRate: 0.5,
+		Lambda:       c.lambda,
+		Iterations:   500,
+		Tolerance:    0.000001,
+	}
+	lr.Validate()
+	weights, err := lr.Fit(trainVectors, balancedTrainLabels, classWeights)
+	if err != nil {
+		return nil, fmt.Errorf("failed to train logistic regression model: %w", err)
+	}
+
+	// Find the best threshold on the validation set
+	recommendedThreshold, scoreDistributions := c.findBestThreshold(valVectors, valLabels, weights)
+
+	// Count classes for metadata
+	var posCount, negCount float64
+	for _, label := range labels {
+		if label == 1.0 {
+			posCount++
+		} else {
+			negCount++
+		}
+	}
+
+	// Create model envelope
+	model := &core.ModelEnvelope{
+		Algorithm: "tfidf-go",
+		Impl:      "go",
+		Version:   "1",
+		CreatedAt: time.Now().UTC(),
+		Meta: map[string]any{
+			"positives": len(positives),
+			"negatives": len(negatives),
+			"class_counts": map[string]int{
+				"pos": int(posCount),
+				"neg": int(negCount),
+			},
+			"vectorizer_params": map[string]any{
+				"ngram_min": vectorizer.NgramMin,
+				"ngram_max": vectorizer.NgramMax,
+				"min_df":    vectorizer.MinDF,
+				"max_df":    vectorizer.MaxDF,
+				"vocab_cap": vectorizer.VocabCap,
+			},
+			"model_params": map[string]any{
+				"learning_rate": lr.LearningRate,
+				"lambda":        lr.Lambda,
+				"iterations":    lr.Iterations,
+				"tolerance":     lr.Tolerance,
+			},
+			"recommended_threshold": recommendedThreshold,
+			"score_distributions":   scoreDistributions,
+		},
+		Vectorizer:   vectorizer.Vocabulary,
+		OrderedVocab: vectorizer.OrderedVocab,
+		Weights:      weights,
+	}
+
+	return model, nil
+}
+
+// ============================================================================
+// ┏┳┓┏━╸╺┳╸┏━┓╻┏━╸┏━┓
+// ┃┃┃┣╸  ┃ ┣┳┛┃┃  ┗━┓
+// ╹ ╹┗━╸ ╹ ╹┗╸╹┗━╸┗━┛
+// ============================================================================
+
+// ClassificationMetrics holds the evaluation metrics
+type ClassificationMetrics struct {
+	TruePositives  int
+	TrueNegatives  int
+	FalsePositives int
+	FalseNegatives int
+	Accuracy       float64
+	Precision      float64
+	Recall         float64
+	F1Score        float64
+}
+
+// Calculate computes the metrics from raw counts
+func (m *ClassificationMetrics) Calculate() {
+	total := m.TruePositives + m.TrueNegatives + m.FalsePositives + m.FalseNegatives
+
+	if total > 0 {
+		m.Accuracy = float64(m.TruePositives+m.TrueNegatives) / float64(total)
+	}
+
+	if m.TruePositives+m.FalsePositives > 0 {
+		m.Precision = float64(m.TruePositives) / float64(m.TruePositives+m.FalsePositives)
+	}
+
+	if m.TruePositives+m.FalseNegatives > 0 {
+		m.Recall = float64(m.TruePositives) / float64(m.TruePositives+m.FalseNegatives)
+	}
+
+	if m.Precision+m.Recall > 0 {
+		m.F1Score = 2 * (m.Precision * m.Recall) / (m.Precision + m.Recall)
+	}
+}
+
+// findBestThreshold sweeps a range of thresholds on a validation set to find
+// the one that maximizes combined F1 + separation score.
+func (c *TrainCommand) findBestThreshold(
+	validationVectors [][]float64,
+	validationLabels []float64,
+	weights []float64,
+) (float64, map[string]any) {
+	if len(validationVectors) == 0 {
+		return 0.5, nil // Default if no validation data
+	}
+
+	scores := make([]float64, len(validationVectors))
+	for i, vector := range validationVectors {
+		score, err := core.PredictScore(vector, weights)
+		if err != nil {
+			// This should not happen with valid data, but as a fallback:
+			return 0.5, nil
+		}
+		scores[i] = score
+	}
+
+	// Collect score distributions by label
+	var posScores, negScores []float64
+	for i, score := range scores {
+		if validationLabels[i] == 1.0 {
+			posScores = append(posScores, score)
+		} else {
+			negScores = append(negScores, score)
+		}
+	}
+
+	// Compute stats for each class
+	posStats := computeScoreStats(posScores)
+	negStats := computeScoreStats(negScores)
+
+	// Calculate Cohen's d (effect size) to measure class separation in the learned space
+	posMean := posStats["mean"]
+	negMean := negStats["mean"]
+	posStd := posStats["std"]
+	negStd := negStats["std"]
+
+	var cohensD float64
+	if posStd > 0 && negStd > 0 {
+		pooledStd := math.Sqrt((posStd*posStd + negStd*negStd) / 2)
+		cohensD = math.Abs(posMean-negMean) / pooledStd
+	}
+
+	// Calculate separation ratio to understand how much the classes overlap on the score scale
+	totalRange := math.Max(posStats["max"], negStats["max"]) - math.Min(posStats["min"], negStats["min"])
+	overlapStart := math.Max(posStats["min"], negStats["min"])
+	overlapEnd := math.Min(posStats["max"], negStats["max"])
+	overlapRange := math.Max(0, overlapEnd-overlapStart)
+	separationRatio := 0.0
+	if totalRange > 0 {
+		separationRatio = (totalRange - overlapRange) / totalRange
+	}
+
+	// Find threshold that balances false positives and false negatives using Youden's J.
+	// This metric (Sensitivity + Specificity - 1) equally weights both false positive
+	// and false negative rates. Why not F1? F1 biases toward precision when classes
+	// are imbalanced; a validation set of 10 positives and 1000 negatives would push
+	// the threshold too high. Youden's J treats both types of error equally, which
+	// better reflects real use: missing a relevant article (false negative) is as bad
+	// as showing an irrelevant one (false positive).
+	bestCombinedScore := -1.0
+	bestThreshold := 0.5
+	var bestMetrics ClassificationMetrics
+
+	boolLabels := make([]bool, len(validationLabels))
+	for i, l := range validationLabels {
+		boolLabels[i] = l == 1.0
+	}
+
+	for i := 5; i <= 95; i++ {
+		threshold := float64(i) / 100.0
+		metrics := computeMetrics(scores, boolLabels, threshold)
+
+		sensitivity := metrics.Recall // TPR: TP / (TP + FN)
+		specificity := 0.0
+		if metrics.TrueNegatives+metrics.FalsePositives > 0 {
+			specificity = float64(metrics.TrueNegatives) / float64(metrics.TrueNegatives+metrics.FalsePositives)
+		}
+		youdenJ := sensitivity + specificity - 1.0
+
+		if youdenJ > bestCombinedScore {
+			bestCombinedScore = youdenJ
+			bestThreshold = threshold
+			bestMetrics = metrics
+		}
+	}
+
+	distributions := map[string]any{
+		"positive":         posStats,
+		"negative":         negStats,
+		"cohens_d":         cohensD,
+		"separation_ratio": separationRatio,
+		"best_f1":          bestMetrics.F1Score,
+		"best_precision":   bestMetrics.Precision,
+		"best_recall":      bestMetrics.Recall,
+	}
+
+	return bestThreshold, distributions
+}
+
+// computeScoreStats computes min, max, mean, and std for a slice of scores
+func computeScoreStats(scores []float64) map[string]float64 {
+	if len(scores) == 0 {
+		return map[string]float64{
+			"min":  0.0,
+			"max":  0.0,
+			"mean": 0.0,
+			"std":  0.0,
+		}
+	}
+
+	min, max := scores[0], scores[0]
+	sum := 0.0
+
+	for _, score := range scores {
+		if score < min {
+			min = score
+		}
+		if score > max {
+			max = score
+		}
+		sum += score
+	}
+
+	mean := sum / float64(len(scores))
+
+	// Calculate standard deviation
+	variance := 0.0
+	for _, score := range scores {
+		diff := score - mean
+		variance += diff * diff
+	}
+	variance /= float64(len(scores))
+	std := math.Sqrt(variance)
+
+	return map[string]float64{
+		"min":  min,
+		"max":  max,
+		"mean": mean,
+		"std":  std,
+	}
+}
+
+// computeMetrics calculates classification metrics
+func computeMetrics(scores []float64, labels []bool, threshold float64) ClassificationMetrics {
+	var metrics ClassificationMetrics
+	for i, score := range scores {
+		predicted := score > threshold
+		actual := labels[i]
+
+		if predicted && actual {
+			metrics.TruePositives++
+		} else if predicted && !actual {
+			metrics.FalsePositives++
+		} else if !predicted && actual {
+			metrics.FalseNegatives++
+		} else {
+			metrics.TrueNegatives++
+		}
+	}
+	metrics.Calculate()
+	return metrics
+}
+
+// ============================================================================
+// ╻ ╻┏━╸╻  ┏━┓┏━╸┏━┓┏━┓
+// ┣━┫┣╸ ┃  ┣━┛┣╸ ┣┳┛┗━┓
+// ╹ ╹┗━╸┗━╸╹  ┗━╸╹┗╸┗━┛
+// ============================================================================
+
+// shortURL formats a URL to be human-readable and not too long
+func shortURL(urlStr string) string {
+	u, err := url.Parse(urlStr)
+	if err != nil {
+		return urlStr
+	}
+
+	path := u.Path
+	if len(path) > 30 {
+		path = path[:30] + "..."
+	}
+
+	return u.Host + path
+}
diff --git a/cmds/train_test.go b/cmds/train_test.go
new file mode 100644
index 0000000..8298494
--- /dev/null
+++ b/cmds/train_test.go
@@ -0,0 +1,66 @@
+package cmds
+
+import (
+	"scholscan/core"
+	"strings"
+	"testing"
+)
+
+// test RSS parsing
+func TestParseRSSFeed(t *testing.T) {
+	rssXML := `<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0">
+<channel>
+<title>Test Feed</title>
+<item>
+<title>Test Article 1</title>
+<link>https://example.com/article1</link>
+<description>This is a test article with some content.</description>
+</item>
+<item>
+<title>Test Article 2</title>
+<link>https://example.com/article2</link>
+<content><![CDATA[<p>This is content with <b>HTML</b> tags.</p>]]></content>
+</item>
+</channel>
+</rss>`
+
+	articles, err := ParseRSSFeed([]byte(rssXML), "https://example.com/feed")
+	if err != nil {
+		t.Fatalf("Failed to parse RSS feed: %v", err)
+	}
+
+	if len(articles) != 2 {
+		t.Fatalf("Expected 2 articles, got %d", len(articles))
+	}
+
+	if articles[0].Title != "Test Article 1" {
+		t.Errorf("Expected title 'Test Article 1', got '%s'", articles[0].Title)
+	}
+	if articles[0].URL != "https://example.com/article1" {
+		t.Errorf("Expected URL 'https://example.com/article1', got '%s'", articles[0].URL)
+	}
+	if articles[0].Content != "This is a test article with some content." {
+		t.Errorf("Expected content 'This is a test article with some content.', got '%s'", articles[0].Content)
+	}
+
+	if articles[1].Title != "Test Article 2" {
+		t.Errorf("Expected title 'Test Article 2', got '%s'", articles[1].Title)
+	}
+	if articles[1].Content != "This is content with HTML tags." {
+		t.Errorf("Expected 'This is content with HTML tags.', got '%s'", articles[1].Content)
+	}
+}
+
+func TestCleanFeedContent(t *testing.T) {
+	longInput := strings.Repeat("test content ", 500) // 6000+ bytes
+	result := core.CleanFeedContent(longInput)
+
+	if len(result) <= 5000 {
+		t.Errorf("Expected content to be truncated to >5000 chars, got %d", len(result))
+	}
+
+	if !strings.HasSuffix(result, "...") {
+		t.Errorf("Expected truncated content to end with '...', got '%s'", result[len(result)-3:])
+	}
+}
diff --git a/core/constants.go b/core/constants.go
new file mode 100644
index 0000000..2dadac4
--- /dev/null
+++ b/core/constants.go
@@ -0,0 +1,21 @@
+// Default configuration constants.
+//
+// Timeouts are defensive: 30s for HTTP requests, 5s for graceful shutdown.
+// Score threshold 0.5 is neutral; models should learn their own.
+// MinTitleLength filters junk/broken titles (<15 chars rarely meaningful).
+// ChunkSize 50 balances memory usage vs batch efficiency.
+package core
+
+import "time"
+
+const (
+	DefaultHTTPTimeout     = 30 * time.Second
+	DefaultContextTimeout  = 10 * time.Second
+	DefaultReadTimeout     = 30 * time.Second
+	DefaultWriteTimeout    = 30 * time.Second
+	DefaultIdleTimeout     = 120 * time.Second
+	DefaultShutdownTimeout = 5 * time.Second
+	DefaultScoreThreshold  = 0.5
+	MinTitleLength         = 15
+	DefaultChunkSize       = 50
+)
diff --git a/core/http.go b/core/http.go
new file mode 100644
index 0000000..8629676
--- /dev/null
+++ b/core/http.go
@@ -0,0 +1,196 @@
+// HTTP client with exponential backoff retry.
+//
+// Handles transient network failures, timeouts, and rate limiting.
+//   - Backoff: 500ms → 1s → 2s → 4s max
+//   - Jitter prevents thundering herd
+//   - Respects 429 Retry-After header
+package core
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"math/rand"
+	"net"
+	"net/http"
+	"os"
+	"strconv"
+	"strings"
+	"time"
+)
+
+
+// ============================================================================
+// ╻ ╻╺┳╸╺┳╸┏━┓   ┏━┓┏━╸╺┳╸┏━┓╻ ╻
+// ┣━┫ ┃  ┃ ┣━┛   ┣┳┛┣╸  ┃ ┣┳┛┗┳┛
+// ╹ ╹ ╹  ╹ ╹     ╹┗╸┗━╸ ╹ ╹┗╸ ╹
+// ============================================================================
+
+
+const PoliteUserAgent = "scholscan/1.0 (https://github.com/mrichman/scholscan; mailto:matt@mrichman.net)"
+
+var DefaultHTTPClient = &http.Client{
+	Timeout: 30 * time.Second,
+}
+
+var (
+	retryMaxAttempts    = 4
+	retryInitialBackoff = 500 * time.Millisecond
+	retryMaxBackoff     = 5 * time.Second
+)
+
+// Makes HTTP request with exponential backoff retry
+func DoRequestWithRetry(
+	ctx context.Context,
+	client *http.Client,
+	req *http.Request,
+) (*http.Response, error) {
+	if client == nil {
+		client = DefaultHTTPClient
+	}
+	var lastErr error
+	backoff := retryInitialBackoff
+
+	for attempt := 1; attempt <= retryMaxAttempts; attempt++ {
+		// Make the request cancellable
+		reqWithCtx := req.WithContext(ctx)
+		resp, err := client.Do(reqWithCtx)
+		if err == nil {
+			if isRetriableStatus(resp.StatusCode) {
+				retryAfter := parseRetryAfter(resp.Header.Get("Retry-After"))
+				_ = resp.Body.Close()
+				sleep := backoff
+				if retryAfter > sleep {
+					sleep = retryAfter
+				}
+
+				// Add jitter to avoid thundering herd.
+				jitter := time.Duration(rand.Intn(int(backoff / 2)))
+				sleep += jitter
+
+				// Make sleep cancellable
+				timer := time.NewTimer(sleep)
+				select {
+				case <-ctx.Done():
+					timer.Stop()
+					return nil, ctx.Err()
+				case <-timer.C:
+				}
+
+				backoff = minDuration(backoff*2, retryMaxBackoff)
+				continue
+			}
+			return resp, nil
+		}
+		// Check for context cancellation
+		if ctx.Err() != nil {
+			return nil, ctx.Err()
+		}
+		// Network error: retry on timeouts, context deadline, transient net errors, and HTTP/2 stream errors
+		if os.IsTimeout(err) || errors.Is(err, context.DeadlineExceeded) || isTransientNetError(err) || isHTTP2StreamErr(err) {
+			lastErr = err
+
+			// Add jitter to avoid thundering herd.
+			jitter := time.Duration(rand.Intn(int(backoff / 2)))
+			sleep := backoff + jitter
+
+			// Make sleep cancellable
+			timer := time.NewTimer(sleep)
+			select {
+			case <-ctx.Done():
+				timer.Stop()
+				return nil, ctx.Err()
+			case <-timer.C:
+			}
+
+			backoff = minDuration(backoff*2, retryMaxBackoff)
+			continue
+		}
+		// Non-retriable error
+		return nil, err
+	}
+	if lastErr == nil {
+		lastErr = fmt.Errorf("request retries exhausted")
+	}
+	return nil, lastErr
+}
+
+
+// ============================================================================
+// ╻ ╻┏━╸╻  ┏━┓┏━╸┏━┓┏━┓
+// ┣━┫┣╸ ┃  ┣━┛┣╸ ┣┳┛┗━┓
+// ╹ ╹┗━╸┗━╸╹  ┗━╸╹┗╸┗━┛
+// ============================================================================
+
+
+func isRetriableStatus(code int) bool {
+	if code == http.StatusTooManyRequests {
+		return true
+	}
+	return code >= 500 && code != http.StatusNotImplemented
+}
+
+func parseRetryAfter(v string) time.Duration {
+	if v == "" {
+		return 0
+	}
+	if secs, err := strconv.Atoi(strings.TrimSpace(v)); err == nil && secs > 0 {
+		return time.Duration(secs) * time.Second
+	}
+	if t, err := http.ParseTime(v); err == nil {
+		if d := time.Until(t); d > 0 {
+			return d
+		}
+	}
+	return 0
+}
+
+func minDuration(a, b time.Duration) time.Duration {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+// isTransientNetError returns true for network errors which are commonly transient,
+// such as timeouts and common connection reset/closed cases.
+func isTransientNetError(err error) bool {
+	if err == nil {
+		return false
+	}
+	var ne net.Error
+	if errors.As(err, &ne) {
+		if ne.Timeout() {
+			return true
+		}
+	}
+	msg := strings.ToLower(err.Error())
+	switch {
+	case strings.Contains(msg, "use of closed network connection"):
+		return true
+	case strings.Contains(msg, "connection reset by peer"):
+		return true
+	case strings.Contains(msg, "connection aborted"):
+		return true
+	case strings.Contains(msg, "broken pipe"):
+		return true
+	case strings.Contains(msg, "eof"):
+		// Treat unexpected EOFs as transient when occurring at transport level.
+		return true
+	default:
+		return false
+	}
+}
+
+// isHTTP2StreamErr detects HTTP/2 stream-level errors which are often transient.
+func isHTTP2StreamErr(err error) bool {
+	if err == nil {
+		return false
+	}
+	msg := strings.ToLower(err.Error())
+	return strings.Contains(msg, "stream error") ||
+		strings.Contains(msg, "internal_error") ||
+		strings.Contains(msg, "rst_stream") ||
+		strings.Contains(msg, "goaway") ||
+		strings.Contains(msg, "http2:")
+}
diff --git a/core/ml.go b/core/ml.go
new file mode 100644
index 0000000..afdd2f3
--- /dev/null
+++ b/core/ml.go
@@ -0,0 +1,427 @@
+// ML implementation: TF-IDF + Logistic Regression for article filtering.
+//
+// Why title-only: Avoids content scraping overhead, titles are already informative.
+// MinDF=2: Removes typos and rare terms that don't generalize.
+// MaxDF=0.8: Removes common words that appear in >80% of documents.
+// λ=0.001: Light L2 regularization to prevent overfitting on small datasets.
+//
+// Public API:
+//   - TFIDFVectorizer.Fit(): Learn vocabulary from documents
+//   - TFIDFVectorizer.Transform(): Convert documents to TF-IDF vectors
+//   - LogisticRegression.Fit(): Train classifier on vectors
+//   - CreateVectorizerFromModel(): Reconstruct vectorizer from saved model
+//   - PredictScore(): Score article using trained weights
+package core
+
+import (
+	"fmt"
+	"math"
+	"regexp"
+	"sort"
+	"strings"
+)
+
+
+// ============================================================================
+// ╻ ╻┏━╸┏━╸╺┳╸┏━┓┏━┓╻┏━┓┏━╸┏━┓
+// ┃┏┛┣╸ ┃   ┃ ┃ ┃┣┳┛┃┗━┓┣╸ ┣┳┛
+// ┗┛ ┗━╸┗━╸ ╹ ┗━┛╹┗╸╹┗━┛┗━╸╹┗╸
+// ============================================================================
+
+
+var wordHyphenRegex = regexp.MustCompile("[^a-zA-Z0-9-]+")
+
+// StopWords: Common words that don't help distinguish articles.
+// Why: Reduces noise and improves model generalization.
+var stopWords = map[string]struct{}{
+	// Single letters and symbols
+	"s": {}, "-": {}, "0": {}, "1": {}, "2": {}, "3": {}, "4": {}, "5": {}, "6": {}, "7": {}, "8": {}, "9": {},
+
+	// Common English stop words
+	"a": {}, "about": {}, "above": {}, "after": {}, "again": {}, "against": {}, "al": {}, "all": {}, "am": {}, "an": {}, "and": {}, "any": {}, "are": {}, "aren't": {}, "as": {}, "at": {}, "be": {}, "because": {}, "been": {}, "before": {}, "being": {}, "below": {}, "between": {}, "both": {}, "but": {}, "by": {}, "can't": {}, "cannot": {}, "could": {}, "couldn't": {}, "did": {}, "didn't": {}, "do": {}, "does": {}, "doesn't": {}, "doing": {}, "don't": {}, "down": {}, "during": {}, "each": {}, "et": {}, "few": {}, "for": {}, "from": {}, "further": {}, "had": {}, "hadn't": {}, "has": {}, "hasn't": {}, "have": {}, "haven't": {}, "having": {}, "he": {}, "he'd": {}, "he'll": {}, "he's": {}, "her": {}, "here": {}, "here's": {}, "hers": {}, "herself": {}, "him": {}, "himself": {}, "his": {}, "how": {}, "how's": {}, "i": {}, "i'd": {}, "i'll": {}, "i'm": {}, "i've": {}, "if": {}, "in": {}, "into": {}, "is": {}, "isn't": {}, "it": {}, "it's": {}, "its": {}, "itself": {}, "let's": {}, "me": {}, "more": {}, "most": {}, "mustn't": {}, "my": {}, "myself": {}, "no": {}, "nor": {}, "not": {}, "of": {}, "off": {}, "on": {}, "once": {}, "only": {}, "or": {}, "other": {}, "ought": {}, "our": {}, "ours": {}, "ourselves": {}, "out": {}, "over": {}, "own": {}, "same": {}, "shan't": {}, "she": {}, "she'd": {}, "she'll": {}, "she's": {}, "should": {}, "shouldn't": {}, "so": {}, "some": {}, "such": {}, "than": {}, "that": {}, "that's": {}, "the": {}, "their": {}, "theirs": {}, "them": {}, "themselves": {}, "then": {}, "there": {}, "there's": {}, "these": {}, "they": {}, "they'd": {}, "they'll": {}, "they're": {}, "they've": {}, "this": {}, "those": {}, "through": {}, "to": {}, "too": {}, "under": {}, "until": {}, "up": {}, "very": {}, "was": {}, "wasn't": {}, "we": {}, "we'd": {}, "we'll": {}, "we're": {}, "we've": {}, "were": {}, "weren't": {}, "what": {}, "what's": {}, "when": {}, "when's": {}, "where": {}, "where's": {}, "which": {}, "while": {}, "who": {}, "who's": {}, "whom": {}, "why": {}, "why's": {}, "with": {}, "won't": {}, "would": {}, "wouldn't": {}, "you": {}, "you'd": {}, "you'll": {}, "you're": {}, "you've": {}, "your": {}, "yours": {}, "yourself": {}, "yourselves": {},
+}
+
+type TFIDFVectorizer struct {
+	Vocabulary   map[string]float64
+	OrderedVocab []string
+	NgramMin     int
+	NgramMax     int
+	MinDF        int     // Minimum document frequency (absolute)
+	MaxDF        float64 // Maximum document frequency (ratio)
+	VocabCap     int
+}
+
+func CreateVectorizerFromModel(model *ModelEnvelope) *TFIDFVectorizer {
+	return &TFIDFVectorizer{
+		Vocabulary:   model.Vectorizer,
+		OrderedVocab: model.OrderedVocab,
+	}
+}
+
+
+// Learns vocabulary and IDF from documents
+func (v *TFIDFVectorizer) Fit(documents []string) {
+	numDocs := len(documents)
+	docFreqs := make(map[string]int)
+
+	for _, doc := range documents {
+		unigrams := Tokenize(doc)
+		ngrams := generateNgrams(unigrams, v.NgramMin, v.NgramMax)
+		seenInDoc := make(map[string]struct{})
+		for _, ngram := range ngrams {
+			if _, seen := seenInDoc[ngram]; !seen {
+				docFreqs[ngram]++
+				seenInDoc[ngram] = struct{}{}
+			}
+		}
+	}
+
+	maxDocs := int(v.MaxDF * float64(numDocs))
+	filteredVocab := make(map[string]int)
+	for term, freq := range docFreqs {
+		if freq >= v.MinDF && freq <= maxDocs {
+			filteredVocab[term] = freq
+		}
+	}
+
+	if v.VocabCap > 0 && len(filteredVocab) > v.VocabCap {
+		type termFreq struct {
+			term string
+			freq int
+		}
+		terms := make([]termFreq, 0, len(filteredVocab))
+		for term, freq := range filteredVocab {
+			terms = append(terms, termFreq{term, freq})
+		}
+		sort.Slice(terms, func(i, j int) bool {
+			return terms[i].freq > terms[j].freq
+		})
+
+		cappedTerms := terms[:v.VocabCap]
+		filteredVocab = make(map[string]int, v.VocabCap)
+		for _, tf := range cappedTerms {
+			filteredVocab[tf.term] = tf.freq
+		}
+	}
+
+	v.OrderedVocab = make([]string, 0, len(filteredVocab))
+	for term := range filteredVocab {
+		v.OrderedVocab = append(v.OrderedVocab, term)
+	}
+	sort.Strings(v.OrderedVocab) // deterministic order
+
+	v.Vocabulary = make(map[string]float64, len(v.OrderedVocab))
+	for _, term := range v.OrderedVocab {
+		// IDF = log(total num of docs / num of docs with term)
+		idf := math.Log(float64(numDocs) / float64(filteredVocab[term]))
+		v.Vocabulary[term] = idf
+	}
+}
+
+// Converts documents to TF-IDF vectors using learned vocabulary
+func (v *TFIDFVectorizer) Transform(documents []string) [][]float64 {
+	vectors := make([][]float64, len(documents))
+
+	for i, doc := range documents {
+		unigrams := Tokenize(doc)
+		ngrams := generateNgrams(unigrams, v.NgramMin, v.NgramMax)
+		vector := make([]float64, len(v.OrderedVocab))
+
+		if len(ngrams) > 0 {
+			// tf: term frequency (normalized count of each n-gram in document)
+			tf := make(map[string]float64)
+			for _, ngram := range ngrams {
+				tf[ngram]++
+			}
+			numNgrams := float64(len(ngrams))
+			for ngram, count := range tf {
+				tf[ngram] = count / numNgrams
+			}
+
+			for j, term := range v.OrderedVocab {
+				if tfValue, ok := tf[term]; ok {
+					// only score terms that were in our training vocabulary
+					if idfValue, inVocab := v.Vocabulary[term]; inVocab {
+						vector[j] = tfValue * idfValue
+					}
+				}
+			}
+		}
+		vectors[i] = vector
+	}
+
+	return vectors
+}
+
+func Tokenize(text string) []string {
+	text = strings.ToLower(text)
+	words := wordHyphenRegex.Split(text, -1)
+	tokens := make([]string, 0, len(words))
+	for _, word := range words {
+		if word == "" {
+			continue
+		}
+		if _, isStopWord := stopWords[word]; isStopWord {
+			continue
+		}
+		tokens = append(tokens, word)
+	}
+	return tokens
+}
+
+func generateNgrams(tokens []string, minN, maxN int) []string {
+	if minN <= 0 {
+		minN = 1
+	}
+	if maxN < minN {
+		maxN = minN
+	}
+
+	numTokens := len(tokens)
+
+	estimatedCap := 0
+	for n := minN; n <= maxN; n++ {
+		if numTokens >= n {
+			estimatedCap += numTokens - n + 1
+		}
+	}
+	ngrams := make([]string, 0, estimatedCap)
+
+	for n := minN; n <= maxN; n++ {
+		if numTokens < n {
+			continue
+		}
+		for i := 0; i <= numTokens-n; i++ {
+			ngrams = append(ngrams, strings.Join(tokens[i:i+n], " "))
+		}
+	}
+	return ngrams
+}
+
+
+// ============================================================================
+// ┏━╸╻  ┏━┓┏━┓┏━┓╻┏━╸╻┏━╸┏━┓
+// ┃  ┃  ┣━┫┗━┓┗━┓┃┣╸ ┃┣╸ ┣┳┛
+// ┗━╸┗━╸╹ ╹┗━┛┗━┛╹╹  ╹┗━╸╹┗╸
+// ============================================================================
+
+
+// Binary logistic regression with L2 regularization
+// Bias term stored separately (not regularized)
+type LogisticRegression struct {
+	LearningRate float64
+	Lambda       float64 // L2 regularization parameter
+	Iterations   int
+	Tolerance    float64 // Convergence tolerance on loss improvement
+}
+
+// validate checks and clamps hyperparams to reasonable bounds.
+func (lr *LogisticRegression) Validate() *LogisticRegression {
+	const (
+		defaultLearningRate = 0.5
+		defaultIterations   = 500
+		defaultTolerance    = 0.000001
+	)
+
+	if lr.LearningRate <= 0 {
+		lr.LearningRate = defaultLearningRate
+	}
+	if lr.LearningRate > 10 {
+		lr.LearningRate = 10.0
+	}
+	if lr.Lambda < 0 {
+		lr.Lambda = 0.0
+	}
+	if lr.Iterations <= 0 {
+		lr.Iterations = defaultIterations
+	}
+	if lr.Tolerance <= 0 {
+		lr.Tolerance = defaultTolerance
+	}
+	return lr
+}
+
+// Fit trains via SGD with L2 regularization on feature weights (not bias).
+// Class weights reweight samples; unused in our pipeline (we downsample instead).
+// Returns weights with bias as last element.
+func (lr *LogisticRegression) Fit(vectors [][]float64, labels []float64, classWeights map[float64]float64) ([]float64, error) {
+	if len(vectors) == 0 {
+		return nil, fmt.Errorf("cannot train on empty dataset")
+	}
+	if len(vectors) != len(labels) {
+		return nil, fmt.Errorf(
+			"mismatch between number of vectors (%d) and labels (%d)",
+			len(vectors),
+			len(labels),
+		)
+	}
+
+	for i, y := range labels {
+		if y != 0 && y != 1 {
+			return nil, fmt.Errorf("invalid label at %d: %v (expected 0 or 1)", i, y)
+		}
+	}
+
+	numFeatures := len(vectors[0])
+	if numFeatures == 0 {
+		return nil, fmt.Errorf("cannot train with zero-length feature vectors")
+	}
+	for i := 1; i < len(vectors); i++ {
+		if len(vectors[i]) != numFeatures {
+			return nil, fmt.Errorf(
+				"inconsistent feature vector length at index %d: got %d, expected %d",
+				i,
+				len(vectors[i]),
+				numFeatures,
+			)
+		}
+	}
+	useUniformWeights := classWeights == nil
+	if useUniformWeights {
+		classWeights = map[float64]float64{0.0: 1.0, 1.0: 1.0}
+	}
+
+	numSamples := float64(len(vectors))
+	var totalWeight float64
+	if useUniformWeights {
+		totalWeight = numSamples
+	} else {
+		for _, y := range labels {
+			totalWeight += classWeights[y]
+		}
+	}
+	if totalWeight == 0 {
+		totalWeight = numSamples // Fallback
+	}
+
+	weights := make([]float64, numFeatures)
+	var bias float64
+
+	prevLoss := math.MaxFloat64
+
+	for i := 0; i < lr.Iterations; i++ {
+		gradWeights := make([]float64, numFeatures)
+		var gradBias float64
+		var currentLoss float64
+
+		for j, x := range vectors {
+			y := labels[j]
+			sampleWeight := classWeights[y]
+
+			z, err := dot(weights, x)
+			if err != nil {
+				return nil, fmt.Errorf("error calculating dot product for vector %d: %w", j, err)
+			}
+			p := Sigmoid(z + bias)
+
+			// Compute prediction error. This term gets multiplied by each feature value
+			// to accumulate gradients (higher error pushes weights harder).
+			errTerm := p - y
+			for k := 0; k < numFeatures; k++ {
+				gradWeights[k] += sampleWeight * errTerm * x[k]
+			}
+			gradBias += sampleWeight * errTerm
+
+			cp := clamp(p)
+			currentLoss += sampleWeight * (-(y*math.Log(cp) + (1-y)*math.Log(1-cp)))
+		}
+
+		// Update weights with L2 regularization (only on feature weights, not bias).
+		// This pulls weights toward zero, preventing overfitting on small datasets.
+		for k := 0; k < numFeatures; k++ {
+			regularizedGrad := (gradWeights[k] / totalWeight) + (lr.Lambda * weights[k])
+			weights[k] -= lr.LearningRate * regularizedGrad
+		}
+		gradBias /= totalWeight
+		bias -= lr.LearningRate * gradBias
+
+		// Check convergence: if loss change is below tolerance, we're done.
+		// We include the L2 penalty in total loss to assess true convergence.
+		avgLoss := currentLoss / totalWeight
+		var l2Penalty float64
+		for _, w := range weights {
+			l2Penalty += w * w
+		}
+		totalLoss := avgLoss + 0.5*lr.Lambda*l2Penalty
+		if math.Abs(prevLoss-totalLoss) < lr.Tolerance {
+			break
+		}
+		prevLoss = totalLoss
+	}
+
+	// bias is stored as the last element
+	return append(weights, bias), nil
+}
+
+// PredictScore computes the probability for a single vec given weights.
+// the last element of weights is the bias.
+func PredictScore(vector []float64, weights []float64) (float64, error) {
+	if len(weights) == 0 {
+		return 0, fmt.Errorf("weights cannot be empty")
+	}
+	if len(vector) != len(weights)-1 {
+		return 0, fmt.Errorf(
+			"vector length mismatch: expected %d features, got %d",
+			len(weights)-1,
+			len(vector),
+		)
+	}
+
+	for i, v := range vector {
+		if math.IsNaN(v) || math.IsInf(v, 0) {
+			return 0, fmt.Errorf("invalid value at vector[%d]: %v", i, v)
+		}
+	}
+	for i, w := range weights {
+		if math.IsNaN(w) || math.IsInf(w, 0) {
+			return 0, fmt.Errorf("invalid value at weights[%d]: %v", i, w)
+		}
+	}
+
+	featureWeights := weights[:len(weights)-1]
+	bias := weights[len(weights)-1]
+
+	z, err := dot(featureWeights, vector)
+	if err != nil {
+		return 0, fmt.Errorf("failed to compute dot product: %w", err)
+	}
+	return Sigmoid(z + bias), nil
+}
+
+
+// ============================================================================
+// ┏┳┓┏━┓╺┳╸╻ ╻┏━┓
+// ┃┃┃┣━┫ ┃ ┣━┫┗━┓
+// ╹ ╹╹ ╹ ╹ ╹ ╹┗━┛
+// ============================================================================
+
+
+func Sigmoid(z float64) float64 {
+	if z >= 0 {
+		return 1.0 / (1.0 + math.Exp(-z))
+	}
+	ez := math.Exp(z)
+	return ez / (1.0 + ez)
+}
+
+func dot(a, b []float64) (float64, error) {
+	if len(a) != len(b) {
+		return 0, fmt.Errorf("vector length mismatch: %d != %d", len(a), len(b))
+	}
+	var sum float64
+	for i := range a {
+		sum += a[i] * b[i]
+	}
+	return sum, nil
+}
+
+func clamp(p float64) float64 {
+	const probabilityClamp = 1e-15
+	if p < probabilityClamp {
+		return probabilityClamp
+	}
+	if p > 1.0-probabilityClamp {
+		return 1.0 - probabilityClamp
+	}
+	return p
+}
diff --git a/core/model.go b/core/model.go
new file mode 100644
index 0000000..28f4045
--- /dev/null
+++ b/core/model.go
@@ -0,0 +1,20 @@
+// Model envelope persists trained model to JSON. Contains Vectorizer for IDF values,
+// OrderedVocab for feature ordering, and Weights for logistic regression.
+// To score: recreate TFIDFVectorizer, transform, then PredictScore.
+package core
+
+import (
+	"time"
+)
+
+// ModelEnvelope - complete trained model for scoring articles
+type ModelEnvelope struct {
+	Algorithm    string             `json:"algorithm"`
+	Impl         string             `json:"impl"`
+	Version      string             `json:"version"`
+	CreatedAt    time.Time          `json:"created_at"`
+	Meta         map[string]any     `json:"meta"`
+	Vectorizer   map[string]float64 `json:"vectorizer"`
+	OrderedVocab []string           `json:"ordered_vocab"`
+	Weights      []float64          `json:"weights"`
+}
diff --git a/core/scoring.go b/core/scoring.go
new file mode 100644
index 0000000..9896c80
--- /dev/null
+++ b/core/scoring.go
@@ -0,0 +1,14 @@
+// Score conversion utilities.
+//
+// ScoreToScale: Maps probability (0-1) to user-friendly 1-10 scale.
+// Why: Users understand "8/10" better than "0.82 probability".
+package core
+
+import "math"
+
+// ScoreToScale turns probability into 1-10 display score
+func ScoreToScale(rawScore, threshold float64) int {
+	k := 10.0
+	adjustedScore := 1.0 / (1.0 + math.Exp(-k*(rawScore-threshold)))
+	return int(math.Round(1.0 + (adjustedScore * 9.0)))
+}
diff --git a/core/text.go b/core/text.go
new file mode 100644
index 0000000..ef4f861
--- /dev/null
+++ b/core/text.go
@@ -0,0 +1,36 @@
+// Text processing for RSS feed content.
+// Used for web UI previews and search indexing - not ML (title-only scoring).
+package core
+
+import (
+	"regexp"
+	"strings"
+)
+
+// CleanFeedContent strips HTML, normalizes whitespace, truncates to 5KB
+func CleanFeedContent(content string) string {
+	if content == "" {
+		return ""
+	}
+
+	content = StripHTMLTags(content)
+	content = NormalizeSpace(content)
+
+	maxLength := 5000
+	if len(content) > maxLength {
+		content = content[:maxLength] + "..."
+	}
+
+	return content
+}
+
+// StripHTMLTags removes HTML tags
+func StripHTMLTags(content string) string {
+	re := regexp.MustCompile(`<[^>]*>`)
+	return re.ReplaceAllString(content, "")
+}
+
+// NormalizeSpace collapses whitespace and trims
+func NormalizeSpace(s string) string {
+	return strings.Join(strings.Fields(strings.TrimSpace(s)), " ")
+}
diff --git a/core/types.go b/core/types.go
new file mode 100644
index 0000000..3bfa311
--- /dev/null
+++ b/core/types.go
@@ -0,0 +1,84 @@
+// Core type definitions for article filtering.
+//
+// Article: Represents paper with metadata, URL, title, optional content.
+//
+//	Score, LabelPositive, Classification for ML pipeline state.
+//
+// Config: Application settings (timeouts, user agent, enrich).
+// Command: Interface for CLI subcommands (train, scan, serve).
+package core
+
+import (
+	"io"
+	"time"
+)
+
+// Article represents a single article with enriched metadata and scoring.
+type Article struct {
+	// Basic article information
+	Title   string `json:"title"`
+	Content string `json:"content,omitempty"`
+	URL     string `json:"url"`
+
+	// Enrichment metadata
+	FetchedAt   *time.Time `json:"fetched_at,omitempty"`
+	PublishedAt *time.Time `json:"published_at,omitempty"`
+	Source      string     `json:"source,omitempty"`
+
+	// Machine learning fields
+	Score          *float64 `json:"score,omitempty"`
+	LabelPositive  *bool    `json:"label_positive,omitempty"`
+	Classification string   `json:"classification,omitempty"`
+
+	// Additional metadata
+	Authors []string `json:"authors,omitempty"`
+	Journal string   `json:"journal,omitempty"`
+	Year    *int     `json:"year,omitempty"`
+	DOI     string   `json:"doi,omitempty"`
+
+	// Raw extracted text from APIs or HTML
+	// Fields that may populate Title/Content
+	RawTitle   string `json:"raw_title,omitempty"`
+	RawContent string `json:"raw_content,omitempty"`
+}
+
+// Config represents the application configuration.
+type Config struct {
+	// Default model and threshold
+	Defaults struct {
+		Model     string   `json:"model"`
+		Threshold *float64 `json:"threshold"`
+		EventsOut string   `json:"events_out"`
+	} `json:"defaults"`
+
+	// HTTP behavior
+	UserAgent    string `json:"user_agent"`
+	ContactEmail string `json:"contact_email"`
+
+	// Enrichment settings
+	Enrich struct {
+		MinTitleLength int `json:"min_title_length"`
+		ChunkSize      int `json:"chunk_size"`
+	} `json:"enrich"`
+
+	// API provider settings
+	Providers struct {
+		SemanticScholar struct {
+			APIKey string `json:"api_key"`
+		} `json:"semantic_scholar"`
+	} `json:"providers"`
+}
+
+// Command defines the interface that all CLI subcommands must implement.
+type Command interface {
+	// Name returns the command name (e.g., "train", "scan", "clean").
+	Name() string
+
+	// Init parses command-line arguments and initializes the command.
+	// It should return flag.ErrHelp if --help was requested.
+	Init(args []string) error
+
+	// Run executes the command, reading from stdin and writing to stdout.
+	// The command should handle its own error reporting to stderr.
+	Run(stdin io.Reader, stdout io.Writer) error
+}
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..967c54a
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,19 @@
+module scholscan
+
+go 1.25.1
+
+require (
+	github.com/PuerkitoBio/goquery v1.10.3
+	github.com/mmcdole/gofeed v1.3.0
+)
+
+require (
+	github.com/andybalholm/cascadia v1.3.3 // indirect
+	github.com/json-iterator/go v1.1.12 // indirect
+	github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 // indirect
+	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
+	github.com/modern-go/reflect2 v1.0.2 // indirect
+	github.com/stretchr/testify v1.10.0 // indirect
+	golang.org/x/net v0.43.0 // indirect
+	golang.org/x/text v0.28.0 // indirect
+)
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..f82512c
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,96 @@
+github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiUkhzPo=
+github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y=
+github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
+github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
+github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
+github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
+github.com/mmcdole/gofeed v1.3.0 h1:5yn+HeqlcvjMeAI4gu6T+crm7d0anY85+M+v6fIFNG4=
+github.com/mmcdole/gofeed v1.3.0/go.mod h1:9TGv2LcJhdXePDzxiuMnukhV2/zb6VtnZt1mS+SjkLE=
+github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23 h1:Zr92CAlFhy2gL+V1F+EyIuzbQNbSgP4xhTODZtrXUtk=
+github.com/mmcdole/goxpp v1.1.1-0.20240225020742-a0c311522b23/go.mod h1:v+25+lT2ViuQ7mVxcncQ8ch1URund48oH+jhjiwEgS8=
+github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
+github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
+github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
+github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
+github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
+golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
+golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
+golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
+golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
+golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
+golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
+golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
+golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
+golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
+golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
+golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
+golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
+golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
+golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
+golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
+golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
+golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
+golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
+golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
+golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
+golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
+golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58=
+golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/justfile b/justfile
new file mode 100644
index 0000000..eabf06c
--- /dev/null
+++ b/justfile
@@ -0,0 +1,39 @@
+# ScholScan Go Implementation
+
+# Default recipe
+default:
+    @just --list
+
+# Build the binary
+build:
+    go build -o scholscan .
+
+# Install to system (optional)
+install:
+    go install .
+
+# Run tests
+test:
+    go test ./...
+
+# Clean cache (only works if running from project directory)
+clean-cache:
+    ./scholscan clean
+
+# Format Go code
+fmt:
+    go fmt ./...
+
+# Run linter (requires golangci-lint)
+lint:
+    golangci-lint run
+
+# Example: Train model from articles and RSS feeds (provide your own paths)
+example-train articles feeds:
+    @mkdir -p /tmp/scholscan
+    ./scholscan train {{articles}} --rss-feeds {{feeds}} > /tmp/scholscan/model.json
+    @echo "Model saved to /tmp/scholscan/model.json"
+
+# Example: Scan with trained model (provide your own paths)
+example-scan model url:
+    ./scholscan scan --model {{model}} --url {{url}}
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..d523332
--- /dev/null
+++ b/main.go
@@ -0,0 +1,83 @@
+// scholscan command-line tool
+// this is the main entry point, commands are implemented in cmds/
+// and basic logic in core/
+package main
+
+import (
+	"errors"
+	"flag"
+	"fmt"
+	"os"
+	"scholscan/cmds"
+	"scholscan/core"
+)
+
+func main() {
+	if len(os.Args) < 2 {
+		printHelp()
+		os.Exit(1)
+	}
+
+	cmdName := os.Args[1]
+	args := os.Args[2:]
+
+	// handle the help stuff
+	if cmdName == "help" || cmdName == "--help" || cmdName == "-h" {
+		printHelp()
+		return
+	}
+
+	// flag -> command
+	var cmd core.Command
+	switch cmdName {
+	case "train":
+		cmd = &cmds.TrainCommand{}
+	case "scan":
+		cmd = &cmds.ScanCommand{}
+	case "serve":
+		cmd = &cmds.ServeCommand{}
+	default:
+		fmt.Fprintf(os.Stderr, "Unknown command: %s\n\n", cmdName)
+		printHelp()
+		os.Exit(1)
+	}
+
+	// init the command, then run it
+	if err := cmd.Init(args); err != nil {
+		if errors.Is(err, flag.ErrHelp) {
+			os.Exit(0)
+		}
+		fmt.Fprintf(os.Stderr, "Error initializing %s command: %v\n", cmdName, err)
+		os.Exit(1)
+	}
+
+	if err := cmd.Run(os.Stdin, os.Stdout); err != nil {
+		fmt.Fprintf(os.Stderr, "Error running %s command: %v\n", cmdName, err)
+		os.Exit(1)
+	}
+}
+
+func printHelp() {
+	fmt.Printf(`scholscan <command> [arguments]
+
+A command-line tool for filtering articles based on learned user preferences.
+
+Commands:
+    train           Train a model from positives and RSS feeds
+    scan            Filter articles using a trained model
+    serve           Start HTTP server with filtered RSS and scoring API
+
+Usage:
+    scholscan train POSITIVES_FILE --rss-feeds RSS_FEEDS_FILE > model.json
+    scholscan scan --url RSS_URL --model MODEL > results.jsonl
+    scholscan serve --model MODEL --rss-world RSS_FEEDS_FILE  # Start server
+    scholscan serve --title "My Custom ScholScan"            # Custom title for web interface
+    scholscan help                           # Show this help message
+
+Examples:
+    scholscan train positives.jsonl --rss-feeds rss_world.txt > model.json
+    scholscan scan --url "https://feeds.reuters.com/reuters/topNews" --model model.json
+    scholscan serve --port 8080 --model model.json --rss-world rss_world.txt
+
+`)
+}
author	Sam Scholten	2025-12-15 19:34:17 +1000
committer	Sam Scholten	2025-12-15 19:34:59 +1000
commit	9f5978186ac3de07f4325975fecf4f538fe713b6 (patch)
tree	41440b703054fe59eb561ba81d80fd60380c1f7a
download	scholscan-9f5978186ac3de07f4325975fecf4f538fe713b6.tar.gz scholscan-9f5978186ac3de07f4325975fecf4f538fe713b6.zip