// Text processing for RSS feed content. // Used for web UI previews and search indexing - not ML (title-only scoring). package core import ( "regexp" "strings" ) // CleanFeedContent strips HTML, normalizes whitespace, truncates to 5KB func CleanFeedContent(content string) string { if content == "" { return "" } content = StripHTMLTags(content) content = NormalizeSpace(content) maxLength := 5000 if len(content) > maxLength { content = content[:maxLength] + "..." } return content } // StripHTMLTags removes HTML tags func StripHTMLTags(content string) string { re := regexp.MustCompile(`<[^>]*>`) return re.ReplaceAllString(content, "") } // NormalizeSpace collapses whitespace and trims func NormalizeSpace(s string) string { return strings.Join(strings.Fields(strings.TrimSpace(s)), " ") }