1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
// Core type definitions for article filtering.
//
// Article: Represents paper with metadata, URL, title, optional content.
//
// Score, LabelPositive, Classification for ML pipeline state.
//
// Config: Application settings (timeouts, user agent, enrich).
// Command: Interface for CLI subcommands (train, scan, serve).
package core
import (
"io"
"time"
)
// Article represents a single article with enriched metadata and scoring.
type Article struct {
// Basic article information
Title string `json:"title"`
Content string `json:"content,omitempty"`
URL string `json:"url"`
// Enrichment metadata
FetchedAt *time.Time `json:"fetched_at,omitempty"`
PublishedAt *time.Time `json:"published_at,omitempty"`
Source string `json:"source,omitempty"`
// Machine learning fields
Score *float64 `json:"score,omitempty"`
LabelPositive *bool `json:"label_positive,omitempty"`
Classification string `json:"classification,omitempty"`
// Additional metadata
Authors []string `json:"authors,omitempty"`
Journal string `json:"journal,omitempty"`
Year *int `json:"year,omitempty"`
DOI string `json:"doi,omitempty"`
// Raw extracted text from APIs or HTML
// Fields that may populate Title/Content
RawTitle string `json:"raw_title,omitempty"`
RawContent string `json:"raw_content,omitempty"`
}
// Config represents the application configuration.
type Config struct {
// Default model and threshold
Defaults struct {
Model string `json:"model"`
Threshold *float64 `json:"threshold"`
EventsOut string `json:"events_out"`
} `json:"defaults"`
// HTTP behavior
UserAgent string `json:"user_agent"`
ContactEmail string `json:"contact_email"`
// Enrichment settings
Enrich struct {
MinTitleLength int `json:"min_title_length"`
ChunkSize int `json:"chunk_size"`
} `json:"enrich"`
// API provider settings
Providers struct {
SemanticScholar struct {
APIKey string `json:"api_key"`
} `json:"semantic_scholar"`
} `json:"providers"`
}
// Command defines the interface that all CLI subcommands must implement.
type Command interface {
// Name returns the command name (e.g., "train", "scan", "clean").
Name() string
// Init parses command-line arguments and initializes the command.
// It should return flag.ErrHelp if --help was requested.
Init(args []string) error
// Run executes the command, reading from stdin and writing to stdout.
// The command should handle its own error reporting to stderr.
Run(stdin io.Reader, stdout io.Writer) error
}
|