zng_ext_font/
hyphenation.rs

1use std::path::PathBuf;
2
3use crate::task::parking_lot::Mutex;
4use hyphenation::{Hyphenator as _, Load as _};
5use zng_app_context::app_local;
6use zng_ext_l10n::Lang;
7
8app_local! {
9    static HYPHENATION_SV: Hyphenation = Hyphenation {
10        #[cfg(feature = "hyphenation_embed_all")]
11        source: Mutex::new(Some(Box::new(HyphenationDataEmbedded))),
12        #[cfg(not(feature = "hyphenation_embed_all"))]
13        source: Mutex::new(None),
14
15        dictionaries: vec![],
16    };
17}
18
19struct Hyphenation {
20    source: Mutex<Option<Box<dyn HyphenationDataSource>>>,
21    dictionaries: Vec<hyphenation::Standard>,
22}
23
24/// Hyphenation service.
25///
26/// Note that dictionary data is required to support a language, if the feature `"hyphenation_embed_all"` is enabled
27/// dictionaries for all supported languages is embedded, otherwise dictionaries must be loaded using a [`HyphenationDataSource`].
28///
29/// You can use the [`HyphenationDataDir`] to use external files, see the [hyphenation](https://github.com/tapeinosyne/hyphenation)
30/// for more details about the data files.
31pub struct HYPHENATION;
32impl HYPHENATION {
33    /// Set the hyphenation dictionaries source and clear cache.
34    ///
35    /// Note that this applies immediately and does not notify, it should only be called once during app init.
36    pub fn init_data_source(&self, source: impl HyphenationDataSource) {
37        let mut h = HYPHENATION_SV.write();
38        *h.source.get_mut() = Some(Box::new(source));
39        h.dictionaries.clear();
40    }
41
42    /// Try to hyphenate the `word` using the `lang` dictionary and rules.
43    ///
44    /// Returns a vector of indexes that allow a line break.
45    pub fn hyphenate(&self, lang: &Lang, word: &str) -> Vec<usize> {
46        self.hyphenate_opt(lang, word).unwrap_or_default()
47    }
48
49    /// Try to hyphenate the `word` using the `lang` dictionary and rules.
50    ///
51    /// Returns a vector of indexes that allow a line break. Returns `None` if the `lang` is not supported or the
52    /// `word` contains non-word characters.
53    pub fn hyphenate_opt(&self, lang: &Lang, word: &str) -> Option<Vec<usize>> {
54        let lang = self.lang_to_hyphenation_language(lang)?;
55        self.hyphenate_opt_language(word, lang)
56    }
57
58    /// Get the best `hyphenation::Language` for the `lang`.
59    pub fn lang_to_hyphenation_language(&self, lang: &Lang) -> Option<hyphenation::Language> {
60        for (l, r) in &*util::LANG_TO_LANGUAGE_MAP.read() {
61            if lang.matches(l, false, true) {
62                return Some(*r);
63            }
64        }
65
66        None
67    }
68
69    /// Hyphenate with language already resolved.
70    pub fn hyphenate_opt_language(&self, word: &str, lang: hyphenation::Language) -> Option<Vec<usize>> {
71        if !util::WORD_REGEX.read().is_match(word) {
72            return None;
73        }
74
75        {
76            let h = HYPHENATION_SV.read();
77
78            for d in &h.dictionaries {
79                if d.language() == lang {
80                    return Some(d.hyphenate(word).breaks);
81                }
82            }
83        }
84
85        let mut h = HYPHENATION_SV.write();
86
87        if h.source.get_mut().is_none() {
88            return None;
89        }
90
91        for d in &h.dictionaries {
92            if d.language() == lang {
93                return Some(d.hyphenate(word).breaks);
94            }
95        }
96
97        if let Some(source) = h.source.get_mut() {
98            let d = source.load(lang)?;
99            let r = Some(d.hyphenate(word).breaks);
100            h.dictionaries.push(d);
101
102            return r;
103        }
104
105        None
106    }
107}
108
109/// Represents a hyphenation dictionary source.
110///
111/// The data source must be registered in [`HYPHENATION.init_data_source`].
112///
113/// [`HYPHENATION.init_data_source`]: HYPHENATION::init_data_source
114pub trait HyphenationDataSource: Send + 'static {
115    /// Load the dictionary for the `lang`.
116    fn load(&mut self, lang: hyphenation::Language) -> Option<hyphenation::Standard>;
117}
118
119/// Represents a hyphenation data source that searches a directory.
120///
121/// The file names must follow a pattern that includes the language display print, the pattern mut be defined
122/// with a replacement `{lang}`. For example the file `dir/en-us.bincode` is matched by `"{lang}.bincode"`.
123///
124/// See the [hyphenation](https://github.com/tapeinosyne/hyphenation) crate docs for more details about the data files.
125pub struct HyphenationDataDir {
126    dir: PathBuf,
127    name_pattern: &'static str,
128}
129impl HyphenationDataDir {
130    /// New from `dir` and file name pattern.
131    pub fn new(dir: PathBuf, name_pattern: &'static str) -> Self {
132        HyphenationDataDir { dir, name_pattern }
133    }
134}
135impl HyphenationDataSource for HyphenationDataDir {
136    fn load(&mut self, lang: hyphenation::Language) -> Option<hyphenation::Standard> {
137        let name = self.name_pattern.replace("{lang}", lang.to_string().as_str());
138        let file = self.dir.join(name);
139        if file.exists() {
140            match hyphenation::Standard::from_path(lang, file) {
141                Ok(d) => Some(d),
142                Err(e) => {
143                    tracing::error!("error loading hyphenation dictionary, {e}");
144                    None
145                }
146            }
147        } else {
148            None
149        }
150    }
151}
152
153/// Represents embedded hyphenation data.
154///
155/// This is the default data source when compiled with the feature `"hyphenation_embed_all"`.
156#[cfg(feature = "hyphenation_embed_all")]
157pub struct HyphenationDataEmbedded;
158
159#[cfg(feature = "hyphenation_embed_all")]
160impl HyphenationDataSource for HyphenationDataEmbedded {
161    fn load(&mut self, lang: hyphenation::Language) -> Option<hyphenation::Standard> {
162        match hyphenation::Standard::from_embedded(lang) {
163            Ok(d) => Some(d),
164            Err(e) => {
165                tracing::error!("error loading hyphenation dictionary, {e}");
166                None
167            }
168        }
169    }
170}
171
172mod util {
173    use super::*;
174    use hyphenation::Language::*;
175    use regex::Regex;
176    use zng_ext_l10n::{Lang, lang};
177
178    app_local! {
179        pub static LANG_TO_LANGUAGE_MAP: Vec<(Lang, hyphenation::Language)> = vec![
180            (lang!("af"), Afrikaans),
181            (lang!("sq"), Albanian),
182            (lang!("hy"), Armenian),
183            (lang!("as"), Assamese),
184            (lang!("eu"), Basque),
185            (lang!("be"), Belarusian),
186            (lang!("bn"), Bengali),
187            (lang!("bg"), Bulgarian),
188            (lang!("ca"), Catalan),
189            (lang!("zh-latn-pinyin"), Chinese),
190            (lang!("cop"), Coptic),
191            (lang!("hr"), Croatian),
192            (lang!("cs"), Czech),
193            (lang!("da"), Danish),
194            (lang!("nl"), Dutch),
195            (lang!("en-gb"), EnglishGB),
196            (lang!("en-us"), EnglishUS),
197            (lang!("eo"), Esperanto),
198            (lang!("et"), Estonian),
199            (lang!("mul-ethi"), Ethiopic),
200            (lang!("fi"), Finnish),
201            // (lang!("fi-x-school"), FinnishScholastic),
202            (lang!("fr"), French),
203            (lang!("fur"), Friulan),
204            (lang!("gl"), Galician),
205            (lang!("ka"), Georgian),
206            (lang!("de-1901"), German1901),
207            (lang!("de-1996"), German1996),
208            (lang!("de-ch-1901"), GermanSwiss),
209            (lang!("grc"), GreekAncient),
210            (lang!("el-monoton"), GreekMono),
211            (lang!("el-polyton"), GreekPoly),
212            (lang!("gu"), Gujarati),
213            (lang!("hi"), Hindi),
214            (lang!("hu"), Hungarian),
215            (lang!("is"), Icelandic),
216            (lang!("id"), Indonesian),
217            (lang!("ia"), Interlingua),
218            (lang!("ga"), Irish),
219            (lang!("it"), Italian),
220            (lang!("kn"), Kannada),
221            (lang!("kmr"), Kurmanji),
222            (lang!("la"), Latin),
223            // (lang!("la-x-classic"), LatinClassic),
224            // (lang!("la-x-liturgic"), LatinLiturgical),
225            (lang!("lv"), Latvian),
226            (lang!("lt"), Lithuanian),
227            (lang!("mk"), Macedonian),
228            (lang!("ml"), Malayalam),
229            (lang!("mr"), Marathi),
230            (lang!("mn-cyrl"), Mongolian),
231            (lang!("nb"), NorwegianBokmal),
232            (lang!("nn"), NorwegianNynorsk),
233            (lang!("oc"), Occitan),
234            (lang!("or"), Oriya),
235            (lang!("pi"), Pali),
236            (lang!("pa"), Panjabi),
237            (lang!("pms"), Piedmontese),
238            (lang!("pl"), Polish),
239            (lang!("pt"), Portuguese),
240            (lang!("ro"), Romanian),
241            (lang!("rm"), Romansh),
242            (lang!("ru"), Russian),
243            (lang!("sa"), Sanskrit),
244            (lang!("sr-cyrl"), SerbianCyrillic),
245            (lang!("sh-cyrl"), SerbocroatianCyrillic),
246            (lang!("sh-latn"), SerbocroatianLatin),
247            (lang!("cu"), SlavonicChurch),
248            (lang!("sk"), Slovak),
249            (lang!("sl"), Slovenian),
250            (lang!("es"), Spanish),
251            (lang!("sv"), Swedish),
252            (lang!("ta"), Tamil),
253            (lang!("te"), Telugu),
254            (lang!("th"), Thai),
255            (lang!("tr"), Turkish),
256            (lang!("tk"), Turkmen),
257            (lang!("uk"), Ukrainian),
258            (lang!("hsb"), Uppersorbian),
259            (lang!("cy"), Welsh),
260        ];
261
262        pub static WORD_REGEX: Regex = Regex::new(r"^\w+$").unwrap();
263    }
264}