aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTrim Bresilla2024-12-04 13:01:10 +0100
committerlukeflo2024-12-23 21:03:19 +0100
commitbe72bac936515007b4fd9a30632f62ee19839bf8 (patch)
treef6233f5d0dc3626c5555b174dc708fd44f8380c8
parentbcde4e631f28d4610738df65353cf9b319d4b98a (diff)
downloadbibiman-be72bac936515007b4fd9a30632f62ee19839bf8.tar.gz
bibiman-be72bac936515007b4fd9a30632f62ee19839bf8.zip
feat: handle better arXiv's "citation key" imported from DOI
- Add a new dependency `rand` version `0.8` to the project - Modify the `format_bibtex_entry` function for better readability and citation key cleaning - Introduce logic to clean the citation key by retaining only alphanumerical characters and underscores - Limit the citation key to the last 14 characters if it exceeds that length - Add a new `file` field to the BibTeX entry using the provided file path
-rw-r--r--Cargo.lock67
-rw-r--r--Cargo.toml1
-rw-r--r--src/bibiman.rs45
3 files changed, 106 insertions, 7 deletions
diff --git a/Cargo.lock b/Cargo.lock
index f00bd10..8b9dbb7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -105,6 +105,7 @@ dependencies = [
"itertools",
"lexopt",
"nucleo-matcher",
+ "rand",
"ratatui",
"signal-hook",
"tokio",
@@ -160,6 +161,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d"
[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
name = "byteorder-lite"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1497,6 +1504,15 @@ dependencies = [
]
[[package]]
+name = "ppv-lite86"
+version = "0.2.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
name = "proc-macro2"
version = "1.0.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1524,6 +1540,36 @@ dependencies = [
]
[[package]]
+name = "rand"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
+dependencies = [
+ "libc",
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
+dependencies = [
+ "getrandom",
+]
+
+[[package]]
name = "ratatui"
version = "0.29.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2773,6 +2819,27 @@ dependencies = [
]
[[package]]
+name = "zerocopy"
+version = "0.7.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
+dependencies = [
+ "byteorder",
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.7.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
name = "zerofrom"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index ffd4cca..9d6dba3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -25,6 +25,7 @@ doi2bib = "0.2.0"
# doi = "0.3.0"
editor-command = "0.1.1"
futures = "0.3.30"
+rand = "0.8"
itertools = "0.13.0"
lexopt = "0.3.0"
nucleo-matcher = "0.3.1"
diff --git a/src/bibiman.rs b/src/bibiman.rs
index 232fd9e..10dab1e 100644
--- a/src/bibiman.rs
+++ b/src/bibiman.rs
@@ -441,24 +441,26 @@ impl Bibiman {
/// Formats a raw BibTeX entry string for better readability.
pub fn format_bibtex_entry(entry: &str, file_path: &str) -> String {
let mut formatted = String::new();
+
// Find the position of the first '{'
if let Some(start_brace_pos) = entry.find('{') {
- // Copy the preamble (e.g., '@article{')
+ // Extract the preamble (e.g., '@article{')
let preamble = &entry[..start_brace_pos + 1];
let preamble = preamble.trim_start();
formatted.push_str(preamble);
formatted.push('\n'); // Add newline
- // Now get the content inside the braces
+
+ // Get the content inside the braces
let rest = &entry[start_brace_pos + 1..];
- // Remove the last '}' at the end
+ // Remove the last '}' at the end, if present
let rest = rest.trim_end();
let rest = if rest.ends_with('}') {
&rest[..rest.len() - 1]
} else {
rest
};
- // Now we need to split the rest by commas, but commas can be inside braces or quotes
- // We'll parse the fields properly
+
+ // Parse the fields, considering braces and quotes
let mut fields = Vec::new();
let mut current_field = String::new();
let mut brace_level = 0;
@@ -492,11 +494,41 @@ impl Bibiman {
fields.push(current_field.trim().to_string());
}
+ // **Conditionally Clean the Citation Key**
+ if let Some(citation_key) = fields.get_mut(0) {
+ // Check if the citation key contains any non-alphanumerical characters except underscores
+ let needs_cleaning = citation_key
+ .chars()
+ .any(|c| !c.is_alphanumeric() && c != '_');
+ if needs_cleaning {
+ // Retain only alphanumerical characters and underscores
+ let cleaned_key: String = citation_key
+ .chars()
+ .filter(|c| c.is_alphanumeric() || *c == '_')
+ .collect();
+ // If the cleaned key is longer than 14 characters, retain only the last 14
+ let limited_key = if cleaned_key.len() > 14 {
+ cleaned_key
+ .chars()
+ .rev()
+ .take(14)
+ .collect::<String>()
+ .chars()
+ .rev()
+ .collect()
+ } else {
+ cleaned_key
+ };
+ // Replace the original citation key with the cleaned and possibly limited key
+ *citation_key = limited_key;
+ }
+ }
+
// Add the new 'file' field
let file_field = format!("file = {{{}}}", file_path);
fields.push(file_field);
- // Now reconstruct the entry with proper indentation
+ // Reconstruct the entry with proper indentation
for (i, field) in fields.iter().enumerate() {
formatted.push_str(" ");
formatted.push_str(field);
@@ -513,7 +545,6 @@ impl Bibiman {
entry.to_string()
}
}
-
// Search entry list
pub fn search_entries(&mut self) {
// Use snapshot of entry list saved when starting the search