Init v0.1.0HEAD main

author: Sam Scholten 2025-12-15 19:35:46 +1000
committer: Sam Scholten 2025-12-15 19:35:57 +1000
commit: 3562d2fd34bb98d29c7cf6e4d4130129a7bb24f2 (patch)
tree: 42b1f0e0a346a1cf087df90e29a100edbd66b3eb /client.go
download: scholfetch-main.tar.gz
scholfetch-main.zip
1 files changed, 133 insertions, 0 deletions
diff --git a/client.go b/client.go
new file mode 100644
index 0000000..39a3e34
--- /dev/null
+++ b/client.go
@@ -0,0 +1,133 @@
+// CLIENT LAYER - HTTP AND RATE LIMITING
+//
+// manages HTTP requests with retry logic and API-specific rate limits.
+// 
+// RATE LIMITS:
+// - arXiv: 1 second between requests (enforced to be safe)
+// - Semantic Scholar: 100ms between requests (configurable via API key)
+//
+// STRATEGY:
+// - retries on network failures and HTTP 429
+// - exponential backoff: 1s, 2s, 4s
+// - all delays respect context cancellation
+package main
+
+import (
+	"context"
+	"net/http"
+	"os"
+	"time"
+)
+
+// HTTPClient wraps an HTTP client with common behavior like user agent,
+// rate limiting, and retry logic.
+type HTTPClient struct {
+	client     *http.Client
+	userAgent  string
+	arxivDelay time.Duration
+	s2Delay    time.Duration
+	maxRetries int
+}
+
+// NewHTTPClient creates a new HTTP client wrapper with defaults.
+func NewHTTPClient() *HTTPClient {
+	return &HTTPClient{
+		client: &http.Client{
+			Timeout: 30 * time.Second,
+		},
+		userAgent:  "scholfetch/1.0 (+https://samsci.com)",
+		arxivDelay: 1 * time.Second,
+		s2Delay:    100 * time.Millisecond,
+		maxRetries: 3,
+	}
+}
+
+// Do performs an HTTP request with retry logic.
+// retries on network errors and 429 (rate limit) responses.
+func (c *HTTPClient) Do(req *http.Request) (*http.Response, error) {
+	// Set user agent if not already set
+	if req.Header.Get("User-Agent") == "" {
+		req.Header.Set("User-Agent", c.userAgent)
+	}
+
+	var lastErr error
+	for attempt := 0; attempt < c.maxRetries; attempt++ {
+		if attempt > 0 {
+			// Exponential backoff: 1s, 2s, 4s
+			backoff := time.Duration(1<<uint(attempt-1)) * time.Second
+			select {
+			case <-time.After(backoff):
+			case <-req.Context().Done():
+				return nil, req.Context().Err()
+			}
+		}
+
+		resp, err := c.client.Do(req)
+		if err != nil {
+			lastErr = err
+			continue
+		}
+
+		// Retry on 429 (rate limit) but not other errors
+		if resp.StatusCode == http.StatusTooManyRequests {
+			resp.Body.Close()
+			lastErr = nil // Reset error for retryable status code
+			continue
+		}
+
+		return resp, nil
+	}
+
+	return nil, lastErr
+}
+
+// RateLimitArxiv adds a delay for arXiv API requests.
+func (c *HTTPClient) RateLimitArxiv(ctx context.Context) error {
+	select {
+	case <-time.After(c.arxivDelay):
+		return nil
+	case <-ctx.Done():
+		return ctx.Err()
+	}
+}
+
+// RateLimitS2 adds a delay for Semantic Scholar API requests.
+func (c *HTTPClient) RateLimitS2(ctx context.Context) error {
+	select {
+	case <-time.After(c.s2Delay):
+		return nil
+	case <-ctx.Done():
+		return ctx.Err()
+	}
+}
+
+// config for scholfetch.
+type Config struct {
+	WithContent bool
+	Verbose     bool
+	Logger      Logger
+	HTTP        *HTTPClient
+	ArxivBatch  int	
+	S2APIKey     string
+}
+
+// Logger interface for dependency injection
+type Logger interface {
+	Printf(format string, v ...interface{})
+}
+
+func NewConfig() *Config {
+	return &Config{
+		WithContent: false,
+		Verbose:     false,
+		HTTP:        NewHTTPClient(),
+		ArxivBatch:  50,
+		S2APIKey:    os.Getenv("S2_API_KEY"),
+	}
+}
+
+func NewConfigWithLogger(logger Logger) *Config {
+	cfg := NewConfig()
+	cfg.Logger = logger
+	return cfg
+}
author	Sam Scholten	2025-12-15 19:35:46 +1000
committer	Sam Scholten	2025-12-15 19:35:57 +1000
commit	3562d2fd34bb98d29c7cf6e4d4130129a7bb24f2 (patch)
tree	42b1f0e0a346a1cf087df90e29a100edbd66b3eb /client.go
download	scholfetch-main.tar.gz scholfetch-main.zip