thedes_tui_core/
grapheme.rs

1use std::{collections::HashMap, fmt, str, sync::Arc};
2
3use thiserror::Error;
4use unicode_segmentation::{Graphemes, UnicodeSegmentation};
5
6type Grapheme = Box<str>;
7
8#[derive(Debug, Error)]
9#[error("Input {input} is not a grapheme")]
10pub struct NotGrapheme {
11    pub input: String,
12}
13
14#[derive(Debug, Error)]
15#[error("Grapheme id {id} is unknwon")]
16pub struct UnknownId {
17    pub id: Id,
18}
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
21pub struct Id {
22    bits: u64,
23}
24
25impl Id {
26    fn from_index(index: usize) -> Self {
27        let max_code = u64::from(char::MAX);
28        let bits = u64::try_from(index)
29            .ok()
30            .and_then(|bits| bits.checked_add(max_code + 1))
31            .expect("index could not be so large");
32        Self { bits }
33    }
34}
35
36impl From<char> for Id {
37    fn from(value: char) -> Self {
38        Self { bits: u64::from(value) }
39    }
40}
41
42impl fmt::Display for Id {
43    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
44        write!(f, "{}", self.bits)
45    }
46}
47
48impl PartialEq<char> for Id {
49    fn eq(&self, other: &char) -> bool {
50        *self == Self::from(*other)
51    }
52}
53
54impl PartialEq<Id> for char {
55    fn eq(&self, other: &Id) -> bool {
56        Id::from(*self) == *other
57    }
58}
59
60#[derive(Debug)]
61struct RegistryInner {
62    index_to_string: Vec<Grapheme>,
63    string_to_id: HashMap<Grapheme, Id>,
64}
65
66impl RegistryInner {
67    pub fn new() -> Self {
68        Self { index_to_string: Vec::new(), string_to_id: HashMap::new() }
69    }
70
71    pub fn index_to_string(&self, index: usize) -> Option<&str> {
72        self.index_to_string.get(index).map(AsRef::as_ref)
73    }
74
75    pub fn get_or_register(&mut self, grapheme: &str) -> Id {
76        match self.string_to_id.get(grapheme) {
77            Some(id) => *id,
78            None => self.register(grapheme),
79        }
80    }
81
82    pub fn register(&mut self, grapheme: &str) -> Id {
83        let index = self.index_to_string.len();
84        let id = Id::from_index(index);
85        self.index_to_string.push(grapheme.into());
86        self.string_to_id.insert(grapheme.into(), id);
87        id
88    }
89}
90
91#[derive(Debug, Clone)]
92pub struct Registry {
93    inner: Arc<std::sync::Mutex<RegistryInner>>,
94}
95
96impl Registry {
97    pub fn new() -> Self {
98        Self { inner: Arc::new(std::sync::Mutex::new(RegistryInner::new())) }
99    }
100
101    pub fn get_or_register_many<'r, 'g>(
102        &'r self,
103        graphemes: &'g str,
104    ) -> GetOrRegisterMany<'r, 'g> {
105        GetOrRegisterMany {
106            registry: self,
107            graphemes: graphemes.graphemes(true),
108        }
109    }
110
111    pub fn get_or_register(&self, grapheme: &str) -> Result<Id, NotGrapheme> {
112        let mut iter = grapheme.graphemes(true);
113        if iter.next().is_none() {
114            Err(NotGrapheme { input: grapheme.into() })?;
115        }
116        if iter.next().is_some() {
117            Err(NotGrapheme { input: grapheme.into() })?;
118        }
119        Ok(self.get_or_register_unchecked(grapheme))
120    }
121
122    pub fn lookup<F, T>(&self, id: Id, scope: F) -> T
123    where
124        F: FnOnce(Result<GraphemeChars<'_>, UnknownId>) -> T,
125    {
126        let max_char = u64::from(char::MAX);
127        if let Some(bits) = id.bits.checked_sub(max_char + 1) {
128            let index = usize::try_from(bits)
129                .expect("id bits should have been constructed from index");
130            let inner = self.inner.lock().expect("poisoned lock");
131            let result = inner
132                .index_to_string(index)
133                .map(GraphemeChars::multiple)
134                .ok_or(UnknownId { id });
135            scope(result)
136        } else {
137            let code = u32::try_from(id.bits)
138                .ok()
139                .and_then(char::from_u32)
140                .expect("already checked for char range");
141            let result = Ok(GraphemeChars::single(code));
142            scope(result)
143        }
144    }
145
146    fn get_or_register_unchecked(&self, grapheme: &str) -> Id {
147        let mut chars = grapheme.chars();
148        if let Some(ch) = chars.next() {
149            if chars.next().is_none() {
150                return Id::from(ch);
151            }
152        }
153
154        let mut inner = self.inner.lock().expect("poisoned lock");
155        inner.get_or_register(grapheme)
156    }
157}
158
159#[derive(Debug)]
160pub struct GetOrRegisterMany<'r, 'g> {
161    registry: &'r Registry,
162    graphemes: Graphemes<'g>,
163}
164
165impl<'r, 'g> Iterator for GetOrRegisterMany<'r, 'g> {
166    type Item = Id;
167
168    fn next(&mut self) -> Option<Self::Item> {
169        let grapheme = self.graphemes.next()?;
170        Some(self.registry.get_or_register_unchecked(grapheme))
171    }
172}
173
174#[derive(Debug, Clone)]
175pub struct GraphemeChars<'r> {
176    inner: GraphemeCharsInner<'r>,
177}
178
179impl<'r> GraphemeChars<'r> {
180    fn single(ch: char) -> Self {
181        Self { inner: GraphemeCharsInner::Single(Some(ch)) }
182    }
183
184    fn multiple(content: &'r str) -> Self {
185        Self { inner: GraphemeCharsInner::Multiple(content.chars()) }
186    }
187}
188
189impl<'r> Iterator for GraphemeChars<'r> {
190    type Item = char;
191
192    fn next(&mut self) -> Option<Self::Item> {
193        match &mut self.inner {
194            GraphemeCharsInner::Single(ch) => ch.take(),
195            GraphemeCharsInner::Multiple(iter) => iter.next(),
196        }
197    }
198
199    fn size_hint(&self) -> (usize, Option<usize>) {
200        match &self.inner {
201            GraphemeCharsInner::Single(None) => (0, Some(0)),
202            GraphemeCharsInner::Single(Some(_)) => (1, Some(1)),
203            GraphemeCharsInner::Multiple(iter) => iter.size_hint(),
204        }
205    }
206}
207
208#[derive(Debug, Clone)]
209enum GraphemeCharsInner<'r> {
210    Single(Option<char>),
211    Multiple(str::Chars<'r>),
212}
213
214#[cfg(test)]
215mod test {
216    use super::{Id, Registry};
217
218    #[test]
219    fn id_from_char_is_char_ascii() {
220        let actual = Id::from('a').bits;
221        let expected = 'a' as u64;
222        assert_eq!(expected, actual);
223    }
224
225    #[test]
226    fn id_from_char_is_char_unicode() {
227        let actual = Id::from('á').bits;
228        let expected = 'á' as u64;
229        assert_eq!(expected, actual);
230    }
231
232    #[test]
233    fn register_single_char_grapheme_ascii() {
234        let registry = Registry::new();
235        let id = registry.get_or_register("a").unwrap();
236        let actual: String =
237            registry.lookup(id, |result| result.unwrap().collect());
238        let expected = "a";
239        assert_eq!(expected, actual);
240
241        let expected = "a";
242        let actual: String =
243            registry.lookup(id, |result| result.unwrap().collect());
244        assert_eq!(expected, actual);
245    }
246
247    #[test]
248    fn register_single_char_grapheme_unicode() {
249        let registry = Registry::new();
250        let id = registry.get_or_register("á").unwrap();
251        let actual: String =
252            registry.lookup(id, |result| result.unwrap().collect());
253        let expected = "á";
254        assert_eq!(expected, actual);
255
256        let expected = "á";
257        let actual: String =
258            registry.lookup(id, |result| result.unwrap().collect());
259        assert_eq!(expected, actual);
260    }
261
262    #[test]
263    fn register_single_grapheme_cluster() {
264        let registry = Registry::new();
265        let id = registry.get_or_register("b̥").unwrap();
266        let actual: String =
267            registry.lookup(id, |result| result.unwrap().collect());
268        let expected = "b̥";
269        assert_eq!(expected, actual);
270
271        let expected = "b̥";
272        let actual: String =
273            registry.lookup(id, |result| result.unwrap().collect());
274        assert_eq!(expected, actual);
275    }
276
277    #[test]
278    fn register_many() {
279        let registry = Registry::new();
280        let ids: Vec<_> = registry.get_or_register_many("ab̥á").collect();
281        let mut actual = Vec::<String>::new();
282        for &id in &ids {
283            let actual_elem =
284                registry.lookup(id, |result| result.unwrap().collect());
285            actual.push(actual_elem);
286        }
287        let expected = ["a", "b̥", "á"].map(ToOwned::to_owned);
288        assert_eq!(&expected[..], &actual[..]);
289
290        let expected = "a";
291        let actual: String =
292            registry.lookup(ids[0], |result| result.unwrap().collect());
293        assert_eq!(expected, actual);
294
295        let expected = "b̥";
296        let actual: String =
297            registry.lookup(ids[1], |result| result.unwrap().collect());
298        assert_eq!(expected, actual);
299
300        let expected = "á";
301        let actual: String =
302            registry.lookup(ids[2], |result| result.unwrap().collect());
303        assert_eq!(expected, actual);
304    }
305}