diff options
| author | Sam Scholten | 2025-12-15 19:35:46 +1000 |
|---|---|---|
| committer | Sam Scholten | 2025-12-15 19:35:57 +1000 |
| commit | 3562d2fd34bb98d29c7cf6e4d4130129a7bb24f2 (patch) | |
| tree | 42b1f0e0a346a1cf087df90e29a100edbd66b3eb /client.go | |
| download | scholfetch-main.tar.gz scholfetch-main.zip | |
Diffstat (limited to 'client.go')
| -rw-r--r-- | client.go | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/client.go b/client.go new file mode 100644 index 0000000..39a3e34 --- /dev/null +++ b/client.go @@ -0,0 +1,133 @@ +// CLIENT LAYER - HTTP AND RATE LIMITING +// +// manages HTTP requests with retry logic and API-specific rate limits. +// +// RATE LIMITS: +// - arXiv: 1 second between requests (enforced to be safe) +// - Semantic Scholar: 100ms between requests (configurable via API key) +// +// STRATEGY: +// - retries on network failures and HTTP 429 +// - exponential backoff: 1s, 2s, 4s +// - all delays respect context cancellation +package main + +import ( + "context" + "net/http" + "os" + "time" +) + +// HTTPClient wraps an HTTP client with common behavior like user agent, +// rate limiting, and retry logic. +type HTTPClient struct { + client *http.Client + userAgent string + arxivDelay time.Duration + s2Delay time.Duration + maxRetries int +} + +// NewHTTPClient creates a new HTTP client wrapper with defaults. +func NewHTTPClient() *HTTPClient { + return &HTTPClient{ + client: &http.Client{ + Timeout: 30 * time.Second, + }, + userAgent: "scholfetch/1.0 (+https://samsci.com)", + arxivDelay: 1 * time.Second, + s2Delay: 100 * time.Millisecond, + maxRetries: 3, + } +} + +// Do performs an HTTP request with retry logic. +// retries on network errors and 429 (rate limit) responses. +func (c *HTTPClient) Do(req *http.Request) (*http.Response, error) { + // Set user agent if not already set + if req.Header.Get("User-Agent") == "" { + req.Header.Set("User-Agent", c.userAgent) + } + + var lastErr error + for attempt := 0; attempt < c.maxRetries; attempt++ { + if attempt > 0 { + // Exponential backoff: 1s, 2s, 4s + backoff := time.Duration(1<<uint(attempt-1)) * time.Second + select { + case <-time.After(backoff): + case <-req.Context().Done(): + return nil, req.Context().Err() + } + } + + resp, err := c.client.Do(req) + if err != nil { + lastErr = err + continue + } + + // Retry on 429 (rate limit) but not other errors + if resp.StatusCode == http.StatusTooManyRequests { + resp.Body.Close() + lastErr = nil // Reset error for retryable status code + continue + } + + return resp, nil + } + + return nil, lastErr +} + +// RateLimitArxiv adds a delay for arXiv API requests. +func (c *HTTPClient) RateLimitArxiv(ctx context.Context) error { + select { + case <-time.After(c.arxivDelay): + return nil + case <-ctx.Done(): + return ctx.Err() + } +} + +// RateLimitS2 adds a delay for Semantic Scholar API requests. +func (c *HTTPClient) RateLimitS2(ctx context.Context) error { + select { + case <-time.After(c.s2Delay): + return nil + case <-ctx.Done(): + return ctx.Err() + } +} + +// config for scholfetch. +type Config struct { + WithContent bool + Verbose bool + Logger Logger + HTTP *HTTPClient + ArxivBatch int + S2APIKey string +} + +// Logger interface for dependency injection +type Logger interface { + Printf(format string, v ...interface{}) +} + +func NewConfig() *Config { + return &Config{ + WithContent: false, + Verbose: false, + HTTP: NewHTTPClient(), + ArxivBatch: 50, + S2APIKey: os.Getenv("S2_API_KEY"), + } +} + +func NewConfigWithLogger(logger Logger) *Config { + cfg := NewConfig() + cfg.Logger = logger + return cfg +} |
