diff options
| author | lukeflo | 2025-10-18 13:03:08 +0200 |
|---|---|---|
| committer | lukeflo | 2025-10-18 13:03:08 +0200 |
| commit | 231353de2ce5713a4848178f031101534b529442 (patch) | |
| tree | f8612664038a10f847c4c5b1ae9d8a2a2144cbf5 | |
| parent | 05dd5cbc2f5fd8252f9ed3b0e79cf4d785791f98 (diff) | |
| download | bibiman-231353de2ce5713a4848178f031101534b529442.tar.gz bibiman-231353de2ce5713a4848178f031101534b529442.zip | |
add some additional ignored chars and words, check deunicoded chars for ignoring
| -rw-r--r-- | CITEKEYS.md | 4 | ||||
| -rw-r--r-- | src/bibiman/citekeys.rs | 3 | ||||
| -rw-r--r-- | src/bibiman/citekeys/citekey_utils.rs | 34 | ||||
| -rw-r--r-- | src/config.rs | 134 | ||||
| -rw-r--r-- | tests/biblatex-test.bib | 2 |
5 files changed, 107 insertions, 70 deletions
diff --git a/CITEKEYS.md b/CITEKEYS.md index 828e557..d9855b8 100644 --- a/CITEKEYS.md +++ b/CITEKEYS.md @@ -30,7 +30,9 @@ fields = [ "author;2;;-;_", "title;3;6;_;_", "year" ] case = "lowercase" ascii_only = true ignored_chars = [ - "?", "!", "\\", "\'", ".", "-", "–", ":", ",", "[", "]", "(", ")", "{", "}", "§", "$", "%", "&", "/", "`", "´", "#", "+", "*", "=", "|", "<", ">", "^", "°", "_", "\"", + '?', '!', '\\', '\'', '.', '-', '–', ':', ',', '[', ']', '(', ')', '{', '}', '§', '$', '%', + '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"', '»', '«', '‘', '’', + '“', '”', ] ignored_words = [ "the", diff --git a/src/bibiman/citekeys.rs b/src/bibiman/citekeys.rs index fdeed14..a56969b 100644 --- a/src/bibiman/citekeys.rs +++ b/src/bibiman/citekeys.rs @@ -49,7 +49,8 @@ pub enum CitekeyCase { alias = "camelcase", alias = "camel_case", alias = "uppercamelcase", - alias = "upper_camel_case" + alias = "upper_camel_case", + alias = "pascalcase" )] Camel, } diff --git a/src/bibiman/citekeys/citekey_utils.rs b/src/bibiman/citekeys/citekey_utils.rs index 2de3600..3f12f03 100644 --- a/src/bibiman/citekeys/citekey_utils.rs +++ b/src/bibiman/citekeys/citekey_utils.rs @@ -175,24 +175,36 @@ pub(super) fn build_citekey( if counter == 0 && case == Some(&CitekeyCase::Camel) { c = c.to_ascii_uppercase() } - if let Some(len) = max_chars - && counter >= len - { - break 'char_loop; - } - // if a word slice contains a special char, skip it - if ignored_chars.contains(&c) { - continue 'char_loop; - } // if non-ascii chars should be mapped, check if needed and do it if let Some(chars) = deunicode::deunicode_char(c) && ascii_only { - word_slice.push_str(chars); - counter += chars.len(); + 'deunicode_char_loop: for ch in chars.chars() { + // if the deunicoded charset of the word slice's current + // char contains a special char, skip it + if ignored_chars.contains(&ch) { + continue 'deunicode_char_loop; + } + word_slice.push(ch); + counter += 1; + if let Some(len) = max_chars + && counter >= len + { + break 'char_loop; + } + } } else { + // if a word slice contains a special char, skip it + if ignored_chars.contains(&c) { + continue 'char_loop; + } word_slice.push(c); counter += 1; + if let Some(len) = max_chars + && counter >= len + { + break 'char_loop; + } } } // Don't count empty slices and don't add delimiter to those diff --git a/src/config.rs b/src/config.rs index 47e145c..e0a097c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -36,43 +36,49 @@ use crate::{ cliargs::CLIArgs, }; -pub const IGNORED_SPECIAL_CHARS: [char; 33] = [ +pub const IGNORED_SPECIAL_CHARS: [char; 39] = [ '?', '!', '\\', '\'', '.', '-', '–', ':', ',', '[', ']', '(', ')', '{', '}', '§', '$', '%', - '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"', + '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"', '»', '«', '‘', '’', + '“', '”', ]; pub static IGNORED_WORDS: LazyLock<Vec<String>> = LazyLock::new(|| { vec![ - String::from("the"), - String::from("a"), - String::from("an"), - String::from("of"), - String::from("for"), - String::from("in"), - String::from("at"), - String::from("to"), - String::from("and"), - String::from("him"), - String::from("her"), - String::from("his"), - String::from("hers"), - String::from("der"), - String::from("die"), - String::from("das"), - String::from("ein"), - String::from("eine"), - String::from("eines"), - String::from("des"), - String::from("auf"), - String::from("und"), - String::from("für"), - String::from("vor"), - String::from("er"), - String::from("sie"), - String::from("es"), - String::from("ihm"), - String::from("ihr"), - String::from("ihnen"), + "the".into(), + "a".into(), + "an".into(), + "of".into(), + "for".into(), + "in".into(), + "at".into(), + "to".into(), + "and".into(), + "him".into(), + "her".into(), + "his".into(), + "he".into(), + "she".into(), + "it".into(), + "hers".into(), + "der".into(), + "die".into(), + "das".into(), + "ein".into(), + "eine".into(), + "eines".into(), + "des".into(), + "auf".into(), + "und".into(), + "für".into(), + "vor".into(), + "er".into(), + "sie".into(), + "es".into(), + "ihm".into(), + "ihr".into(), + "ihnen".into(), + "zum".into(), + "dem".into(), ] }); @@ -159,34 +165,50 @@ const DEFAULT_CONFIG: &str = r##" # ascii_only = true ## List of special chars that'll be ignored when building citekeys. -## A custom list will overwrite the default list +## A custom list will overwrite the default list. Thus, to add to the list, +## uncomment it and add the additional chars. # ignored_chars = [ -# "?", "!", "\\", "\'", ".", "-", "–", ":", ",", "[", "]", "(", ")", "{", "}", "§", "$", "%", "&", "/", "`", "´", "#", "+", "*", "=", "|", "<", ">", "^", "°", "_", """, -# ] +# '?', '!', '\\', '\'', '.', '-', '–', ':', ',', '[', ']', '(', ')', '{', '}', '§', '$', '%', '&', '/', '`', '´', '#', '+', '*', '=', '|', '<', '>', '^', '°', '_', '"', '»', '«', '‘', '’', '“', '”', ## List of words that'll be ignored when building citekeys. -## A custom list will overwrite the default list +## A custom list will overwrite the default list. Thus, to add to the list, +## uncomment it and add the additional words. # ignored_words = [ -# "the", -# "a", -# "an", -# "of", -# "for", -# "in", -# "at", -# "to", -# "and", -# "der", -# "die", -# "das", -# "ein", -# "eine", -# "eines", -# "des", -# "auf", -# "und", -# "für", -# "vor", +# "the" +# "a" +# "an" +# "of" +# "for" +# "in" +# "at" +# "to" +# "and" +# "him" +# "her" +# "his" +# "he" +# "she" +# "it" +# "hers" +# "der" +# "die" +# "das" +# "ein" +# "eine" +# "eines" +# "des" +# "auf" +# "und" +# "für" +# "vor" +# "er" +# "sie" +# "es" +# "ihm" +# "ihr" +# "ihnen" +# "zum" +# "dem" # ] "##; diff --git a/tests/biblatex-test.bib b/tests/biblatex-test.bib index 2149e7c..941a24d 100644 --- a/tests/biblatex-test.bib +++ b/tests/biblatex-test.bib @@ -40,7 +40,7 @@ } @article{angenendt, - title = {In Honore Salvatoris~-- Vom Sinn und Unsinn der Patrozinienkunde}, + title = {“In Honore Salvatoris”~-- Vom Sinn und Unsinn der Patrozinienkunde}, shorttitle = {In Honore Salvatoris}, author = {Angenendt, Arnold}, volume = {97}, |
