aboutsummaryrefslogtreecommitdiff
path: root/core/types.go
blob: 3bfa3113554e54b9cb5091d6d0bed5718eeb48be (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
// Core type definitions for article filtering.
//
// Article: Represents paper with metadata, URL, title, optional content.
//
//	Score, LabelPositive, Classification for ML pipeline state.
//
// Config: Application settings (timeouts, user agent, enrich).
// Command: Interface for CLI subcommands (train, scan, serve).
package core

import (
	"io"
	"time"
)

// Article represents a single article with enriched metadata and scoring.
type Article struct {
	// Basic article information
	Title   string `json:"title"`
	Content string `json:"content,omitempty"`
	URL     string `json:"url"`

	// Enrichment metadata
	FetchedAt   *time.Time `json:"fetched_at,omitempty"`
	PublishedAt *time.Time `json:"published_at,omitempty"`
	Source      string     `json:"source,omitempty"`

	// Machine learning fields
	Score          *float64 `json:"score,omitempty"`
	LabelPositive  *bool    `json:"label_positive,omitempty"`
	Classification string   `json:"classification,omitempty"`

	// Additional metadata
	Authors []string `json:"authors,omitempty"`
	Journal string   `json:"journal,omitempty"`
	Year    *int     `json:"year,omitempty"`
	DOI     string   `json:"doi,omitempty"`

	// Raw extracted text from APIs or HTML
	// Fields that may populate Title/Content
	RawTitle   string `json:"raw_title,omitempty"`
	RawContent string `json:"raw_content,omitempty"`
}

// Config represents the application configuration.
type Config struct {
	// Default model and threshold
	Defaults struct {
		Model     string   `json:"model"`
		Threshold *float64 `json:"threshold"`
		EventsOut string   `json:"events_out"`
	} `json:"defaults"`

	// HTTP behavior
	UserAgent    string `json:"user_agent"`
	ContactEmail string `json:"contact_email"`

	// Enrichment settings
	Enrich struct {
		MinTitleLength int `json:"min_title_length"`
		ChunkSize      int `json:"chunk_size"`
	} `json:"enrich"`

	// API provider settings
	Providers struct {
		SemanticScholar struct {
			APIKey string `json:"api_key"`
		} `json:"semantic_scholar"`
	} `json:"providers"`
}

// Command defines the interface that all CLI subcommands must implement.
type Command interface {
	// Name returns the command name (e.g., "train", "scan", "clean").
	Name() string

	// Init parses command-line arguments and initializes the command.
	// It should return flag.ErrHelp if --help was requested.
	Init(args []string) error

	// Run executes the command, reading from stdin and writing to stdout.
	// The command should handle its own error reporting to stderr.
	Run(stdin io.Reader, stdout io.Writer) error
}