From 80a51848951eac3d9053846a3446616b9147d9dc Mon Sep 17 00:00:00 2001 From: lukeflo Date: Tue, 1 Oct 2024 14:41:55 +0200 Subject: keyword search implemented --- src/backend/bib.rs | 61 ++++++++++++++--- src/backend/cliargs.rs | 2 +- src/backend/search.rs | 173 ++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 188 insertions(+), 48 deletions(-) (limited to 'src/backend') diff --git a/src/backend/bib.rs b/src/backend/bib.rs index cbaa00e..c897099 100644 --- a/src/backend/bib.rs +++ b/src/backend/bib.rs @@ -29,7 +29,7 @@ pub struct BibiMain { pub bibfilestring: String, // content of bibfile as string pub bibliography: Bibliography, // parsed bibliography pub citekeys: Vec, // list of all citekeys - // pub bibentries: BibiDataSets, + pub keyword_list: Vec, // list of all available keywords } impl BibiMain { @@ -39,11 +39,13 @@ impl BibiMain { let bibfilestring = fs::read_to_string(&bibfile).unwrap(); let bibliography = biblatex::Bibliography::parse(&bibfilestring).unwrap(); let citekeys = Self::get_citekeys(&bibliography); + let keyword_list = Self::collect_tag_list(&citekeys, &bibliography); Self { bibfile, bibfilestring, bibliography, citekeys, + keyword_list, } } @@ -56,6 +58,40 @@ impl BibiMain { citekeys.sort_by_key(|name| name.to_lowercase()); citekeys } + + // collect all keywords present in the bibliography + // sort them and remove duplicates + // this list is for fast filtering entries by topics/keyowrds + pub fn collect_tag_list(citekeys: &Vec, biblio: &Bibliography) -> Vec { + // Initialize vector collecting all keywords + let mut keyword_list = vec![]; + + // Loop over entries and collect all keywords + for i in citekeys { + if biblio.get(&i).unwrap().keywords().is_ok() { + let items = biblio + .get(&i) + .unwrap() + .keywords() + .unwrap() + .format_verbatim(); + // Split keyword string into slices, trim leading and trailing + // whitespaces, remove empty slices, and collect them + let mut key_vec: Vec = items + .split(',') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(); + // Append keywords to vector + keyword_list.append(&mut key_vec); + } + } + + // Sort the vector and remove duplicates + keyword_list.sort_by(|a, b| a.to_lowercase().cmp(&b.to_lowercase())); + keyword_list.dedup(); + keyword_list + } } #[derive(Debug)] @@ -91,7 +127,7 @@ pub struct BibiEntry { pub title: String, pub year: String, pub pubtype: String, - // pub keywords: Vec, + pub keywords: String, pub citekey: String, } @@ -102,6 +138,7 @@ impl BibiEntry { Self::get_title(&citekey, &biblio), Self::get_year(&citekey, &biblio), Self::get_pubtype(&citekey, &biblio), + Self::get_keywords(&citekey, &biblio), citekey.to_string(), ] } @@ -181,12 +218,20 @@ impl BibiEntry { } pub fn get_keywords(citekey: &str, biblio: &Bibliography) -> String { - let keywords = biblio - .get(&citekey) - .unwrap() - .keywords() - .unwrap() - .format_verbatim(); + let keywords = { + if biblio.get(&citekey).unwrap().keywords().is_ok() { + let keywords = biblio + .get(&citekey) + .unwrap() + .keywords() + .unwrap() + .format_verbatim(); + keywords + } else { + let keywords = String::from(""); + keywords + } + }; keywords } diff --git a/src/backend/cliargs.rs b/src/backend/cliargs.rs index 32972e0..516de66 100644 --- a/src/backend/cliargs.rs +++ b/src/backend/cliargs.rs @@ -16,7 +16,7 @@ ///// use core::panic; -use std::path::{Path, PathBuf}; +use std::path::PathBuf; use sarge::prelude::*; diff --git a/src/backend/search.rs b/src/backend/search.rs index c3b5816..d09dd04 100644 --- a/src/backend/search.rs +++ b/src/backend/search.rs @@ -4,50 +4,145 @@ use nucleo_matcher::{ }; use std::collections::HashMap; -// Stringify inner Vec by joining/concat -fn convert_to_string(inner_vec: &Vec) -> String { - inner_vec.join(" ") +#[derive(Debug)] +pub struct BibiSearch { + pub search_string: String, // Search string show in footer, used for search + pub inner_search: bool, // True, if we trigger a search for already filtered list + pub filtered_entry_list: Vec>, // Temporary holds filtered entry list to refilter it } -// Return a filtered entry list -pub fn search_entry_list(search_pattern: &str, orig_list: Vec>) -> Vec> { - // Create a hashmap to connect stingified entry with entry vec - let mut entry_string_hm: HashMap> = HashMap::new(); +impl Default for BibiSearch { + fn default() -> Self { + Self { + search_string: String::new(), + inner_search: false, + filtered_entry_list: Vec::new(), + } + } +} + +impl BibiSearch { + // Stringify inner Vec by joining/concat + fn convert_to_string(inner_vec: &Vec) -> String { + inner_vec.join(" ") + } + + // Return a filtered entry list + pub fn search_entry_list( + search_pattern: &str, + orig_list: Vec>, + ) -> Vec> { + // Create a hashmap to connect stingified entry with entry vec + let mut entry_string_hm: HashMap> = HashMap::new(); + + // Convert all entries to string and insert them into the hashmap + // next to the original inner Vec of the entry list + for entry in orig_list { + entry_string_hm.insert(Self::convert_to_string(&entry), entry); + } - // Convert all entries to string and insert them into the hashmap - // next to the original inner Vec of the entry list - for entry in orig_list { - entry_string_hm.insert(convert_to_string(&entry), entry); + // Set up matcher (TODO: One time needed only, move to higher level) + let mut matcher = Matcher::new(Config::DEFAULT); + + // Filter the stringified entries and collect them into a vec + let filtered_matches: Vec = { + let matches = + Pattern::parse(search_pattern, CaseMatching::Ignore, Normalization::Smart) + .match_list(entry_string_hm.keys(), &mut matcher); + matches.into_iter().map(|f| f.0.to_string()).collect() + }; + + // Create filtered entry list and push the inner entry vec's to it + // Use the filtered stringified hm-key as index + let mut filtered_list: Vec> = Vec::new(); + for m in filtered_matches { + filtered_list.push(entry_string_hm[&m].to_owned()); + } + filtered_list } - // Set up matcher (TODO: One time needed only, move to higher level) - let mut matcher = Matcher::new(Config::DEFAULT); - - // Filter the stringified entries and collect them into a vec - let filtered_matches: Vec = { - let matches = Pattern::parse(search_pattern, CaseMatching::Ignore, Normalization::Smart) - .match_list(entry_string_hm.keys(), &mut matcher); - matches.into_iter().map(|f| f.0.to_string()).collect() - }; - - // Create filtered entry list and push the inner entry vec's to it - // Use the filtered stringified hm-key as index - let mut filtered_list: Vec> = Vec::new(); - for m in filtered_matches { - filtered_list.push(entry_string_hm[&m].to_owned()); + pub fn search_tag_list(search_pattern: &str, orig_list: Vec) -> Vec { + // Set up matcher (TODO: One time needed only) + let mut matcher = Matcher::new(Config::DEFAULT); + + // Filter the list items by search pattern + let filtered_matches: Vec = { + let matches = + Pattern::parse(search_pattern, CaseMatching::Ignore, Normalization::Smart) + .match_list(orig_list, &mut matcher); + matches.into_iter().map(|f| f.0.to_string()).collect() + }; + filtered_matches } - filtered_list -} -pub fn search_tag_list(search_pattern: &str, orig_list: Vec) -> Vec { - // Set up matcher (TODO: One time needed only) - let mut matcher = Matcher::new(Config::DEFAULT); - - // Filter the list items by search pattern - let filtered_matches: Vec = { - let matches = Pattern::parse(search_pattern, CaseMatching::Ignore, Normalization::Smart) - .match_list(orig_list, &mut matcher); - matches.into_iter().map(|f| f.0.to_string()).collect() - }; - filtered_matches + pub fn filter_entries_by_tag(keyword: &str, orig_list: &Vec>) -> Vec> { + let mut filtered_list: Vec> = Vec::new(); + + for e in orig_list { + if e[4].contains(keyword) { + filtered_list.push(e.to_owned()); + } + } + + filtered_list + } } +// // Stringify inner Vec by joining/concat +// fn convert_to_string(inner_vec: &Vec) -> String { +// inner_vec.join(" ") +// } + +// // Return a filtered entry list +// pub fn search_entry_list(search_pattern: &str, orig_list: Vec>) -> Vec> { +// // Create a hashmap to connect stingified entry with entry vec +// let mut entry_string_hm: HashMap> = HashMap::new(); + +// // Convert all entries to string and insert them into the hashmap +// // next to the original inner Vec of the entry list +// for entry in orig_list { +// entry_string_hm.insert(convert_to_string(&entry), entry); +// } + +// // Set up matcher (TODO: One time needed only, move to higher level) +// let mut matcher = Matcher::new(Config::DEFAULT); + +// // Filter the stringified entries and collect them into a vec +// let filtered_matches: Vec = { +// let matches = Pattern::parse(search_pattern, CaseMatching::Ignore, Normalization::Smart) +// .match_list(entry_string_hm.keys(), &mut matcher); +// matches.into_iter().map(|f| f.0.to_string()).collect() +// }; + +// // Create filtered entry list and push the inner entry vec's to it +// // Use the filtered stringified hm-key as index +// let mut filtered_list: Vec> = Vec::new(); +// for m in filtered_matches { +// filtered_list.push(entry_string_hm[&m].to_owned()); +// } +// filtered_list +// } + +// pub fn search_tag_list(search_pattern: &str, orig_list: Vec) -> Vec { +// // Set up matcher (TODO: One time needed only) +// let mut matcher = Matcher::new(Config::DEFAULT); + +// // Filter the list items by search pattern +// let filtered_matches: Vec = { +// let matches = Pattern::parse(search_pattern, CaseMatching::Ignore, Normalization::Smart) +// .match_list(orig_list, &mut matcher); +// matches.into_iter().map(|f| f.0.to_string()).collect() +// }; +// filtered_matches +// } + +// pub fn filter_entries_by_tag(keyword: &str, orig_list: &Vec>) -> Vec> { +// let mut filtered_list: Vec> = Vec::new(); + +// for e in orig_list { +// if e[4].contains(keyword) { +// filtered_list.push(e.to_owned()); +// } +// } + +// filtered_list +// } -- cgit v1.2.3