Init v0.1.0

author: Sam Scholten 2025-12-15 19:34:17 +1000
committer: Sam Scholten 2025-12-15 19:34:59 +1000
commit: 9f5978186ac3de07f4325975fecf4f538fe713b6 (patch)
tree: 41440b703054fe59eb561ba81d80fd60380c1f7a /cmds/train_test.go
download: scholscan-9f5978186ac3de07f4325975fecf4f538fe713b6.tar.gz
scholscan-9f5978186ac3de07f4325975fecf4f538fe713b6.zip
1 files changed, 66 insertions, 0 deletions
diff --git a/cmds/train_test.go b/cmds/train_test.go
new file mode 100644
index 0000000..8298494
--- /dev/null
+++ b/cmds/train_test.go
@@ -0,0 +1,66 @@
+package cmds
+
+import (
+	"scholscan/core"
+	"strings"
+	"testing"
+)
+
+// test RSS parsing
+func TestParseRSSFeed(t *testing.T) {
+	rssXML := `<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0">
+<channel>
+<title>Test Feed</title>
+<item>
+<title>Test Article 1</title>
+<link>https://example.com/article1</link>
+<description>This is a test article with some content.</description>
+</item>
+<item>
+<title>Test Article 2</title>
+<link>https://example.com/article2</link>
+<content><![CDATA[<p>This is content with <b>HTML</b> tags.</p>]]></content>
+</item>
+</channel>
+</rss>`
+
+	articles, err := ParseRSSFeed([]byte(rssXML), "https://example.com/feed")
+	if err != nil {
+		t.Fatalf("Failed to parse RSS feed: %v", err)
+	}
+
+	if len(articles) != 2 {
+		t.Fatalf("Expected 2 articles, got %d", len(articles))
+	}
+
+	if articles[0].Title != "Test Article 1" {
+		t.Errorf("Expected title 'Test Article 1', got '%s'", articles[0].Title)
+	}
+	if articles[0].URL != "https://example.com/article1" {
+		t.Errorf("Expected URL 'https://example.com/article1', got '%s'", articles[0].URL)
+	}
+	if articles[0].Content != "This is a test article with some content." {
+		t.Errorf("Expected content 'This is a test article with some content.', got '%s'", articles[0].Content)
+	}
+
+	if articles[1].Title != "Test Article 2" {
+		t.Errorf("Expected title 'Test Article 2', got '%s'", articles[1].Title)
+	}
+	if articles[1].Content != "This is content with HTML tags." {
+		t.Errorf("Expected 'This is content with HTML tags.', got '%s'", articles[1].Content)
+	}
+}
+
+func TestCleanFeedContent(t *testing.T) {
+	longInput := strings.Repeat("test content ", 500) // 6000+ bytes
+	result := core.CleanFeedContent(longInput)
+
+	if len(result) <= 5000 {
+		t.Errorf("Expected content to be truncated to >5000 chars, got %d", len(result))
+	}
+
+	if !strings.HasSuffix(result, "...") {
+		t.Errorf("Expected truncated content to end with '...', got '%s'", result[len(result)-3:])
+	}
+}
author	Sam Scholten	2025-12-15 19:34:17 +1000
committer	Sam Scholten	2025-12-15 19:34:59 +1000
commit	9f5978186ac3de07f4325975fecf4f538fe713b6 (patch)
tree	41440b703054fe59eb561ba81d80fd60380c1f7a /cmds/train_test.go
download	scholscan-9f5978186ac3de07f4325975fecf4f538fe713b6.tar.gz scholscan-9f5978186ac3de07f4325975fecf4f538fe713b6.zip