From 9f5978186ac3de07f4325975fecf4f538fe713b6 Mon Sep 17 00:00:00 2001 From: Sam Scholten Date: Mon, 15 Dec 2025 19:34:17 +1000 Subject: Init v0.1.0 --- cmds/train_test.go | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 cmds/train_test.go (limited to 'cmds/train_test.go') diff --git a/cmds/train_test.go b/cmds/train_test.go new file mode 100644 index 0000000..8298494 --- /dev/null +++ b/cmds/train_test.go @@ -0,0 +1,66 @@ +package cmds + +import ( + "scholscan/core" + "strings" + "testing" +) + +// test RSS parsing +func TestParseRSSFeed(t *testing.T) { + rssXML := ` + + +Test Feed + +Test Article 1 +https://example.com/article1 +This is a test article with some content. + + +Test Article 2 +https://example.com/article2 +This is content with HTML tags.

]]>
+
+
+
` + + articles, err := ParseRSSFeed([]byte(rssXML), "https://example.com/feed") + if err != nil { + t.Fatalf("Failed to parse RSS feed: %v", err) + } + + if len(articles) != 2 { + t.Fatalf("Expected 2 articles, got %d", len(articles)) + } + + if articles[0].Title != "Test Article 1" { + t.Errorf("Expected title 'Test Article 1', got '%s'", articles[0].Title) + } + if articles[0].URL != "https://example.com/article1" { + t.Errorf("Expected URL 'https://example.com/article1', got '%s'", articles[0].URL) + } + if articles[0].Content != "This is a test article with some content." { + t.Errorf("Expected content 'This is a test article with some content.', got '%s'", articles[0].Content) + } + + if articles[1].Title != "Test Article 2" { + t.Errorf("Expected title 'Test Article 2', got '%s'", articles[1].Title) + } + if articles[1].Content != "This is content with HTML tags." { + t.Errorf("Expected 'This is content with HTML tags.', got '%s'", articles[1].Content) + } +} + +func TestCleanFeedContent(t *testing.T) { + longInput := strings.Repeat("test content ", 500) // 6000+ bytes + result := core.CleanFeedContent(longInput) + + if len(result) <= 5000 { + t.Errorf("Expected content to be truncated to >5000 chars, got %d", len(result)) + } + + if !strings.HasSuffix(result, "...") { + t.Errorf("Expected truncated content to end with '...', got '%s'", result[len(result)-3:]) + } +} -- cgit v1.2.3