aboutsummaryrefslogtreecommitdiff
path: root/cmds/train_test.go
diff options
context:
space:
mode:
authorSam Scholten2025-12-15 19:34:17 +1000
committerSam Scholten2025-12-15 19:34:59 +1000
commit9f5978186ac3de07f4325975fecf4f538fe713b6 (patch)
tree41440b703054fe59eb561ba81d80fd60380c1f7a /cmds/train_test.go
downloadscholscan-9f5978186ac3de07f4325975fecf4f538fe713b6.tar.gz
scholscan-9f5978186ac3de07f4325975fecf4f538fe713b6.zip
Init v0.1.0
Diffstat (limited to 'cmds/train_test.go')
-rw-r--r--cmds/train_test.go66
1 files changed, 66 insertions, 0 deletions
diff --git a/cmds/train_test.go b/cmds/train_test.go
new file mode 100644
index 0000000..8298494
--- /dev/null
+++ b/cmds/train_test.go
@@ -0,0 +1,66 @@
+package cmds
+
+import (
+ "scholscan/core"
+ "strings"
+ "testing"
+)
+
+// test RSS parsing
+func TestParseRSSFeed(t *testing.T) {
+ rssXML := `<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0">
+<channel>
+<title>Test Feed</title>
+<item>
+<title>Test Article 1</title>
+<link>https://example.com/article1</link>
+<description>This is a test article with some content.</description>
+</item>
+<item>
+<title>Test Article 2</title>
+<link>https://example.com/article2</link>
+<content><![CDATA[<p>This is content with <b>HTML</b> tags.</p>]]></content>
+</item>
+</channel>
+</rss>`
+
+ articles, err := ParseRSSFeed([]byte(rssXML), "https://example.com/feed")
+ if err != nil {
+ t.Fatalf("Failed to parse RSS feed: %v", err)
+ }
+
+ if len(articles) != 2 {
+ t.Fatalf("Expected 2 articles, got %d", len(articles))
+ }
+
+ if articles[0].Title != "Test Article 1" {
+ t.Errorf("Expected title 'Test Article 1', got '%s'", articles[0].Title)
+ }
+ if articles[0].URL != "https://example.com/article1" {
+ t.Errorf("Expected URL 'https://example.com/article1', got '%s'", articles[0].URL)
+ }
+ if articles[0].Content != "This is a test article with some content." {
+ t.Errorf("Expected content 'This is a test article with some content.', got '%s'", articles[0].Content)
+ }
+
+ if articles[1].Title != "Test Article 2" {
+ t.Errorf("Expected title 'Test Article 2', got '%s'", articles[1].Title)
+ }
+ if articles[1].Content != "This is content with HTML tags." {
+ t.Errorf("Expected 'This is content with HTML tags.', got '%s'", articles[1].Content)
+ }
+}
+
+func TestCleanFeedContent(t *testing.T) {
+ longInput := strings.Repeat("test content ", 500) // 6000+ bytes
+ result := core.CleanFeedContent(longInput)
+
+ if len(result) <= 5000 {
+ t.Errorf("Expected content to be truncated to >5000 chars, got %d", len(result))
+ }
+
+ if !strings.HasSuffix(result, "...") {
+ t.Errorf("Expected truncated content to end with '...', got '%s'", result[len(result)-3:])
+ }
+}