aboutsummaryrefslogtreecommitdiff
path: root/client.go
diff options
context:
space:
mode:
Diffstat (limited to 'client.go')
-rw-r--r--client.go133
1 files changed, 133 insertions, 0 deletions
diff --git a/client.go b/client.go
new file mode 100644
index 0000000..39a3e34
--- /dev/null
+++ b/client.go
@@ -0,0 +1,133 @@
+// CLIENT LAYER - HTTP AND RATE LIMITING
+//
+// manages HTTP requests with retry logic and API-specific rate limits.
+//
+// RATE LIMITS:
+// - arXiv: 1 second between requests (enforced to be safe)
+// - Semantic Scholar: 100ms between requests (configurable via API key)
+//
+// STRATEGY:
+// - retries on network failures and HTTP 429
+// - exponential backoff: 1s, 2s, 4s
+// - all delays respect context cancellation
+package main
+
+import (
+ "context"
+ "net/http"
+ "os"
+ "time"
+)
+
+// HTTPClient wraps an HTTP client with common behavior like user agent,
+// rate limiting, and retry logic.
+type HTTPClient struct {
+ client *http.Client
+ userAgent string
+ arxivDelay time.Duration
+ s2Delay time.Duration
+ maxRetries int
+}
+
+// NewHTTPClient creates a new HTTP client wrapper with defaults.
+func NewHTTPClient() *HTTPClient {
+ return &HTTPClient{
+ client: &http.Client{
+ Timeout: 30 * time.Second,
+ },
+ userAgent: "scholfetch/1.0 (+https://samsci.com)",
+ arxivDelay: 1 * time.Second,
+ s2Delay: 100 * time.Millisecond,
+ maxRetries: 3,
+ }
+}
+
+// Do performs an HTTP request with retry logic.
+// retries on network errors and 429 (rate limit) responses.
+func (c *HTTPClient) Do(req *http.Request) (*http.Response, error) {
+ // Set user agent if not already set
+ if req.Header.Get("User-Agent") == "" {
+ req.Header.Set("User-Agent", c.userAgent)
+ }
+
+ var lastErr error
+ for attempt := 0; attempt < c.maxRetries; attempt++ {
+ if attempt > 0 {
+ // Exponential backoff: 1s, 2s, 4s
+ backoff := time.Duration(1<<uint(attempt-1)) * time.Second
+ select {
+ case <-time.After(backoff):
+ case <-req.Context().Done():
+ return nil, req.Context().Err()
+ }
+ }
+
+ resp, err := c.client.Do(req)
+ if err != nil {
+ lastErr = err
+ continue
+ }
+
+ // Retry on 429 (rate limit) but not other errors
+ if resp.StatusCode == http.StatusTooManyRequests {
+ resp.Body.Close()
+ lastErr = nil // Reset error for retryable status code
+ continue
+ }
+
+ return resp, nil
+ }
+
+ return nil, lastErr
+}
+
+// RateLimitArxiv adds a delay for arXiv API requests.
+func (c *HTTPClient) RateLimitArxiv(ctx context.Context) error {
+ select {
+ case <-time.After(c.arxivDelay):
+ return nil
+ case <-ctx.Done():
+ return ctx.Err()
+ }
+}
+
+// RateLimitS2 adds a delay for Semantic Scholar API requests.
+func (c *HTTPClient) RateLimitS2(ctx context.Context) error {
+ select {
+ case <-time.After(c.s2Delay):
+ return nil
+ case <-ctx.Done():
+ return ctx.Err()
+ }
+}
+
+// config for scholfetch.
+type Config struct {
+ WithContent bool
+ Verbose bool
+ Logger Logger
+ HTTP *HTTPClient
+ ArxivBatch int
+ S2APIKey string
+}
+
+// Logger interface for dependency injection
+type Logger interface {
+ Printf(format string, v ...interface{})
+}
+
+func NewConfig() *Config {
+ return &Config{
+ WithContent: false,
+ Verbose: false,
+ HTTP: NewHTTPClient(),
+ ArxivBatch: 50,
+ S2APIKey: os.Getenv("S2_API_KEY"),
+ }
+}
+
+func NewConfigWithLogger(logger Logger) *Config {
+ cfg := NewConfig()
+ cfg.Logger = logger
+ return cfg
+}