// Core type definitions for article filtering. // // Article: Represents paper with metadata, URL, title, optional content. // // Score, LabelPositive, Classification for ML pipeline state. // // Config: Application settings (timeouts, user agent, enrich). // Command: Interface for CLI subcommands (train, scan, serve). package core import ( "io" "time" ) // Article represents a single article with enriched metadata and scoring. type Article struct { // Basic article information Title string `json:"title"` Content string `json:"content,omitempty"` URL string `json:"url"` // Enrichment metadata FetchedAt *time.Time `json:"fetched_at,omitempty"` PublishedAt *time.Time `json:"published_at,omitempty"` Source string `json:"source,omitempty"` // Machine learning fields Score *float64 `json:"score,omitempty"` LabelPositive *bool `json:"label_positive,omitempty"` Classification string `json:"classification,omitempty"` // Additional metadata Authors []string `json:"authors,omitempty"` Journal string `json:"journal,omitempty"` Year *int `json:"year,omitempty"` DOI string `json:"doi,omitempty"` // Raw extracted text from APIs or HTML // Fields that may populate Title/Content RawTitle string `json:"raw_title,omitempty"` RawContent string `json:"raw_content,omitempty"` } // Config represents the application configuration. type Config struct { // Default model and threshold Defaults struct { Model string `json:"model"` Threshold *float64 `json:"threshold"` EventsOut string `json:"events_out"` } `json:"defaults"` // HTTP behavior UserAgent string `json:"user_agent"` ContactEmail string `json:"contact_email"` // Enrichment settings Enrich struct { MinTitleLength int `json:"min_title_length"` ChunkSize int `json:"chunk_size"` } `json:"enrich"` // API provider settings Providers struct { SemanticScholar struct { APIKey string `json:"api_key"` } `json:"semantic_scholar"` } `json:"providers"` } // Command defines the interface that all CLI subcommands must implement. type Command interface { // Name returns the command name (e.g., "train", "scan", "clean"). Name() string // Init parses command-line arguments and initializes the command. // It should return flag.ErrHelp if --help was requested. Init(args []string) error // Run executes the command, reading from stdin and writing to stdout. // The command should handle its own error reporting to stderr. Run(stdin io.Reader, stdout io.Writer) error }