biblion/tools/
format.rs

1//! Item formatting utilities.
2//!
3//! Converts internal Zotero types into human-readable text summaries
4//! that Claude can parse and present to users. Mirrors the Python
5//! `format_item_summary()` function.
6
7use crate::db::zotero::{Creator, ZoteroItem};
8
9/// Format a ZoteroItem as a concise text summary.
10///
11/// Output format matches the Python MCP server for consistency:
12/// ```text
13/// **citekey**
14///   Title of the Paper
15///   Author1, A.; Author2, B.
16///   (2024)
17///   [journalArticle]
18///   DOI: 10.1234/example
19/// ```
20pub fn format_item_summary(item: &ZoteroItem, citekey: Option<&str>) -> String {
21    let mut parts = Vec::new();
22
23    // Header: citekey or item key
24    let header = citekey.unwrap_or(&item.item_key);
25    parts.push(format!("**{header}**"));
26
27    // Title
28    parts.push(format!("  {}", item.title));
29
30    // Creators
31    if !item.creators.is_empty() {
32        let authors = format_creators(&item.creators);
33        parts.push(format!("  {authors}"));
34    }
35
36    // Date
37    if let Some(date) = &item.date {
38        parts.push(format!("  ({date})"));
39    }
40
41    // Type
42    parts.push(format!("  [{}]", item.item_type));
43
44    // DOI
45    if let Some(doi) = &item.doi {
46        parts.push(format!("  DOI: {doi}"));
47    }
48
49    parts.join("\n")
50}
51
52/// Format creators as "LastName, F. I.; LastName2, F."
53pub fn format_creators(creators: &[Creator]) -> String {
54    creators
55        .iter()
56        .map(|c| {
57            match &c.first_name {
58                Some(first) if !first.is_empty() => {
59                    // Abbreviate: "Richard A." → "R. A."
60                    let initials: String = first
61                        .split_whitespace()
62                        .map(|w| format!("{}.", w.chars().next().unwrap_or(' ')))
63                        .collect::<Vec<_>>()
64                        .join(" ");
65                    format!("{}, {initials}", c.last_name)
66                }
67                _ => c.last_name.clone(),
68            }
69        })
70        .collect::<Vec<_>>()
71        .join("; ")
72}
73
74/// Strip HTML tags from a string and convert block elements to newlines.
75///
76/// Handles common Zotero note patterns: `<p>`, `<br>`, `<div>`, `<b>`, `<i>`.
77/// Block-level closing tags (`</p>`, `</div>`) insert newlines.
78pub fn html_to_text(html: &str) -> String {
79    let mut result = String::with_capacity(html.len());
80    let mut in_tag = false;
81    let mut tag_buf = String::new();
82
83    for ch in html.chars() {
84        match ch {
85            '<' => {
86                in_tag = true;
87                tag_buf.clear();
88            }
89            '>' if in_tag => {
90                in_tag = false;
91                let tag = tag_buf.to_lowercase();
92                // Insert newline after block-level closing tags and <br>
93                if (tag.starts_with("/p")
94                    || tag.starts_with("/div")
95                    || tag.starts_with("/h")
96                    || tag.starts_with("br"))
97                    && !result.ends_with('\n')
98                    && !result.is_empty()
99                {
100                    result.push('\n');
101                }
102            }
103            _ if in_tag => {
104                tag_buf.push(ch);
105            }
106            _ => {
107                result.push(ch);
108            }
109        }
110    }
111    result.trim().to_string()
112}
113
114/// Extract a 4-digit year from various date formats.
115///
116/// Handles: "2024", "2024-01-15", "2024-00-00 2024", "January 2024", etc.
117/// Shared by bibtex.rs and bibliography.rs.
118pub fn extract_year(date: &str) -> Option<String> {
119    for word in date.split(|c: char| !c.is_ascii_digit()) {
120        if word.len() == 4
121            && let Ok(y) = word.parse::<u32>()
122            && (1800..=2100).contains(&y)
123        {
124            return Some(word.to_string());
125        }
126    }
127    None
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133
134    #[test]
135    fn format_item_with_citekey() {
136        let item = ZoteroItem {
137            item_id: 1,
138            item_key: "ABC12345".into(),
139            item_type: "journalArticle".into(),
140            title: "Hints on Test Data Selection".into(),
141            date: Some("1978".into()),
142            doi: Some("10.1109/C-M.1978.218136".into()),
143            url: None,
144            abstract_note: None,
145            creators: vec![
146                Creator {
147                    creator_type: "author".into(),
148                    first_name: Some("Richard".into()),
149                    last_name: "DeMillo".into(),
150                    order: 0,
151                },
152                Creator {
153                    creator_type: "author".into(),
154                    first_name: Some("Richard".into()),
155                    last_name: "Lipton".into(),
156                    order: 1,
157                },
158            ],
159            tags: vec!["mutation-testing".into()],
160            date_added: "2024-01-01".into(),
161            date_modified: "2024-06-15".into(),
162        };
163        let summary = format_item_summary(&item, Some("demilloHintsTestData1978"));
164        assert!(summary.contains("**demilloHintsTestData1978**"));
165        assert!(summary.contains("Hints on Test Data Selection"));
166        assert!(summary.contains("DeMillo, R.; Lipton, R."));
167        assert!(summary.contains("(1978)"));
168        assert!(summary.contains("[journalArticle]"));
169        assert!(summary.contains("DOI: 10.1109/C-M.1978.218136"));
170    }
171
172    #[test]
173    fn format_item_without_citekey_uses_item_key() {
174        let item = ZoteroItem {
175            item_id: 1,
176            item_key: "ABC12345".into(),
177            item_type: "book".into(),
178            title: "A Book".into(),
179            date: None,
180            doi: None,
181            url: None,
182            abstract_note: None,
183            creators: vec![],
184            tags: vec![],
185            date_added: "2024-01-01".into(),
186            date_modified: "2024-01-01".into(),
187        };
188        let summary = format_item_summary(&item, None);
189        assert!(summary.contains("**ABC12345**"));
190    }
191
192    #[test]
193    fn format_creators_with_initials() {
194        let creators = vec![
195            Creator {
196                creator_type: "author".into(),
197                first_name: Some("John".into()),
198                last_name: "Doe".into(),
199                order: 0,
200            },
201            Creator {
202                creator_type: "author".into(),
203                first_name: Some("Jane".into()),
204                last_name: "Smith".into(),
205                order: 1,
206            },
207        ];
208        assert_eq!(format_creators(&creators), "Doe, J.; Smith, J.");
209    }
210
211    #[test]
212    fn format_creators_multi_word_first_name() {
213        let creators = vec![Creator {
214            creator_type: "author".into(),
215            first_name: Some("Richard A.".into()),
216            last_name: "DeMillo".into(),
217            order: 0,
218        }];
219        assert_eq!(format_creators(&creators), "DeMillo, R. A.");
220    }
221
222    #[test]
223    fn format_creators_no_first_name() {
224        let creators = vec![Creator {
225            creator_type: "author".into(),
226            first_name: None,
227            last_name: "Organization".into(),
228            order: 0,
229        }];
230        assert_eq!(format_creators(&creators), "Organization");
231    }
232
233    #[test]
234    fn html_to_text_strips_tags() {
235        assert_eq!(html_to_text("<p>Hello <b>world</b></p>"), "Hello world");
236    }
237
238    #[test]
239    fn html_to_text_preserves_plain_text() {
240        assert_eq!(html_to_text("No HTML here"), "No HTML here");
241    }
242
243    #[test]
244    fn html_to_text_inserts_newlines_between_paragraphs() {
245        let html = "<p>First paragraph</p><p>Second paragraph</p>";
246        let text = html_to_text(html);
247        assert!(
248            text.contains("First paragraph\nSecond paragraph"),
249            "Got: {text}"
250        );
251    }
252
253    #[test]
254    fn html_to_text_handles_br_tags() {
255        assert_eq!(html_to_text("Line one<br>Line two"), "Line one\nLine two");
256        assert_eq!(html_to_text("Line one<br/>Line two"), "Line one\nLine two");
257    }
258
259    #[test]
260    fn html_to_text_handles_divs() {
261        let html = "<div>Block one</div><div>Block two</div>";
262        let text = html_to_text(html);
263        assert!(text.contains("Block one\nBlock two"), "Got: {text}");
264    }
265}