blob: ef4f86197131736eb03f59fffe17910cb375552c (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
// Text processing for RSS feed content.
// Used for web UI previews and search indexing - not ML (title-only scoring).
package core
import (
"regexp"
"strings"
)
// CleanFeedContent strips HTML, normalizes whitespace, truncates to 5KB
func CleanFeedContent(content string) string {
if content == "" {
return ""
}
content = StripHTMLTags(content)
content = NormalizeSpace(content)
maxLength := 5000
if len(content) > maxLength {
content = content[:maxLength] + "..."
}
return content
}
// StripHTMLTags removes HTML tags
func StripHTMLTags(content string) string {
re := regexp.MustCompile(`<[^>]*>`)
return re.ReplaceAllString(content, "")
}
// NormalizeSpace collapses whitespace and trims
func NormalizeSpace(s string) string {
return strings.Join(strings.Fields(strings.TrimSpace(s)), " ")
}
|