aboutsummaryrefslogtreecommitdiff
path: root/cmds/train_test.go
blob: 82984944ce1f96fb4b9da74c4cff6934da65efb7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
package cmds

import (
	"scholscan/core"
	"strings"
	"testing"
)

// test RSS parsing
func TestParseRSSFeed(t *testing.T) {
	rssXML := `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Feed</title>
<item>
<title>Test Article 1</title>
<link>https://example.com/article1</link>
<description>This is a test article with some content.</description>
</item>
<item>
<title>Test Article 2</title>
<link>https://example.com/article2</link>
<content><![CDATA[<p>This is content with <b>HTML</b> tags.</p>]]></content>
</item>
</channel>
</rss>`

	articles, err := ParseRSSFeed([]byte(rssXML), "https://example.com/feed")
	if err != nil {
		t.Fatalf("Failed to parse RSS feed: %v", err)
	}

	if len(articles) != 2 {
		t.Fatalf("Expected 2 articles, got %d", len(articles))
	}

	if articles[0].Title != "Test Article 1" {
		t.Errorf("Expected title 'Test Article 1', got '%s'", articles[0].Title)
	}
	if articles[0].URL != "https://example.com/article1" {
		t.Errorf("Expected URL 'https://example.com/article1', got '%s'", articles[0].URL)
	}
	if articles[0].Content != "This is a test article with some content." {
		t.Errorf("Expected content 'This is a test article with some content.', got '%s'", articles[0].Content)
	}

	if articles[1].Title != "Test Article 2" {
		t.Errorf("Expected title 'Test Article 2', got '%s'", articles[1].Title)
	}
	if articles[1].Content != "This is content with HTML tags." {
		t.Errorf("Expected 'This is content with HTML tags.', got '%s'", articles[1].Content)
	}
}

func TestCleanFeedContent(t *testing.T) {
	longInput := strings.Repeat("test content ", 500) // 6000+ bytes
	result := core.CleanFeedContent(longInput)

	if len(result) <= 5000 {
		t.Errorf("Expected content to be truncated to >5000 chars, got %d", len(result))
	}

	if !strings.HasSuffix(result, "...") {
		t.Errorf("Expected truncated content to end with '...', got '%s'", result[len(result)-3:])
	}
}