diff options
| author | Sam Scholten | 2025-12-15 19:34:17 +1000 |
|---|---|---|
| committer | Sam Scholten | 2025-12-15 19:34:59 +1000 |
| commit | 9f5978186ac3de07f4325975fecf4f538fe713b6 (patch) | |
| tree | 41440b703054fe59eb561ba81d80fd60380c1f7a /cmds/train_test.go | |
| download | scholscan-9f5978186ac3de07f4325975fecf4f538fe713b6.tar.gz scholscan-9f5978186ac3de07f4325975fecf4f538fe713b6.zip | |
Init v0.1.0
Diffstat (limited to 'cmds/train_test.go')
| -rw-r--r-- | cmds/train_test.go | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/cmds/train_test.go b/cmds/train_test.go new file mode 100644 index 0000000..8298494 --- /dev/null +++ b/cmds/train_test.go @@ -0,0 +1,66 @@ +package cmds + +import ( + "scholscan/core" + "strings" + "testing" +) + +// test RSS parsing +func TestParseRSSFeed(t *testing.T) { + rssXML := `<?xml version="1.0" encoding="UTF-8"?> +<rss version="2.0"> +<channel> +<title>Test Feed</title> +<item> +<title>Test Article 1</title> +<link>https://example.com/article1</link> +<description>This is a test article with some content.</description> +</item> +<item> +<title>Test Article 2</title> +<link>https://example.com/article2</link> +<content><![CDATA[<p>This is content with <b>HTML</b> tags.</p>]]></content> +</item> +</channel> +</rss>` + + articles, err := ParseRSSFeed([]byte(rssXML), "https://example.com/feed") + if err != nil { + t.Fatalf("Failed to parse RSS feed: %v", err) + } + + if len(articles) != 2 { + t.Fatalf("Expected 2 articles, got %d", len(articles)) + } + + if articles[0].Title != "Test Article 1" { + t.Errorf("Expected title 'Test Article 1', got '%s'", articles[0].Title) + } + if articles[0].URL != "https://example.com/article1" { + t.Errorf("Expected URL 'https://example.com/article1', got '%s'", articles[0].URL) + } + if articles[0].Content != "This is a test article with some content." { + t.Errorf("Expected content 'This is a test article with some content.', got '%s'", articles[0].Content) + } + + if articles[1].Title != "Test Article 2" { + t.Errorf("Expected title 'Test Article 2', got '%s'", articles[1].Title) + } + if articles[1].Content != "This is content with HTML tags." { + t.Errorf("Expected 'This is content with HTML tags.', got '%s'", articles[1].Content) + } +} + +func TestCleanFeedContent(t *testing.T) { + longInput := strings.Repeat("test content ", 500) // 6000+ bytes + result := core.CleanFeedContent(longInput) + + if len(result) <= 5000 { + t.Errorf("Expected content to be truncated to >5000 chars, got %d", len(result)) + } + + if !strings.HasSuffix(result, "...") { + t.Errorf("Expected truncated content to end with '...', got '%s'", result[len(result)-3:]) + } +} |
