// CLIENT LAYER - HTTP AND RATE LIMITING // // manages HTTP requests with retry logic and API-specific rate limits. // // RATE LIMITS: // - arXiv: 1 second between requests (enforced to be safe) // - Semantic Scholar: 100ms between requests (configurable via API key) // // STRATEGY: // - retries on network failures and HTTP 429 // - exponential backoff: 1s, 2s, 4s // - all delays respect context cancellation package main import ( "context" "net/http" "os" "time" ) // HTTPClient wraps an HTTP client with common behavior like user agent, // rate limiting, and retry logic. type HTTPClient struct { client *http.Client userAgent string arxivDelay time.Duration s2Delay time.Duration maxRetries int } // NewHTTPClient creates a new HTTP client wrapper with defaults. func NewHTTPClient() *HTTPClient { return &HTTPClient{ client: &http.Client{ Timeout: 30 * time.Second, }, userAgent: "scholfetch/1.0 (+https://samsci.com)", arxivDelay: 1 * time.Second, s2Delay: 100 * time.Millisecond, maxRetries: 3, } } // Do performs an HTTP request with retry logic. // retries on network errors and 429 (rate limit) responses. func (c *HTTPClient) Do(req *http.Request) (*http.Response, error) { // Set user agent if not already set if req.Header.Get("User-Agent") == "" { req.Header.Set("User-Agent", c.userAgent) } var lastErr error for attempt := 0; attempt < c.maxRetries; attempt++ { if attempt > 0 { // Exponential backoff: 1s, 2s, 4s backoff := time.Duration(1<