1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
|
// bibiman - a TUI for managing BibLaTeX databases
// Copyright (C) 2025 lukeflo
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
/////
use fancy_regex::Regex;
use unicodeit::replace as unicode_replace;
/// Sanitizing process rules as regex cmds.
///
/// Only macros that are not already covered by unicodeit should be processed in this way.
///
// Regex to capture content between brackets: `(\{(?:[^{}]++|(\1))*+\})`
// Alternative without capturing the outer brackets: `\{((?:(\{(?:[^}{]|(\1))*+\})|[^{])*?)\}`
//
const SANITIZE_REGEX: &[(&str, &str)] = &[
(
r"\\mkbibquote\{((?:(\{(?:[^}{]|(\1))*+\})|[^{])*?)\}",
"\"${1}\"",
),
(r"\\hyphen", "-"),
];
/// Sanitize one String with LaTeX Macros into a more readable one without.
///
/// If one is going to mass-sanitize strings, one should use the [`sanitize`]
/// function for performance reasons instead, to process multiple strings at once.
///
/// This is just a shortcut for the sanitize function.
pub fn sanitize_one(input_text: &str) -> String {
// This does not panic, the sanitize function always returns
// as many elements in the returned list as it get's elements
// in the input vector.
sanitize(vec![input_text]).get(0).unwrap().to_string()
}
/// Sanitize multiple Strings with LaTeX Macros into more readable unicode equivalents.
///
/// This function does always return the same amount of Strings as it gets in the input list.
pub fn sanitize(input_text: Vec<&str>) -> Vec<String> {
let mut result: Vec<String> = input_text.into_iter().map(|s| s.to_string()).collect();
let mut regex: Vec<(Regex, &str)> = Vec::with_capacity(SANITIZE_REGEX.len());
// build regex
for (search, replace) in SANITIZE_REGEX {
regex.push((Regex::new(search).unwrap(), replace));
}
// process strings
let result_len = result.len();
for (re, replace) in regex {
for i in 0..result_len {
result[i] = re.replace_all(&result[i], replace).to_string();
}
}
for i in 0..result_len {
result[i] = unicode_replace(&result[i]);
}
// return result
result
}
|