Skip to main content

zng_ext_font/
hyphenation.rs

1use std::path::PathBuf;
2
3use crate::task::parking_lot::Mutex;
4use hyphenation::{Hyphenator as _, Load as _};
5use zng_app_context::app_local;
6use zng_ext_l10n::Lang;
7
8app_local! {
9    static HYPHENATION_SV: Hyphenation = Hyphenation {
10        #[cfg(feature = "hyphenation_embed_all")]
11        source: Mutex::new(Some(Box::new(HyphenationDataEmbedded))),
12        #[cfg(not(feature = "hyphenation_embed_all"))]
13        source: Mutex::new(None),
14
15        dictionaries: vec![],
16    };
17}
18
19struct Hyphenation {
20    source: Mutex<Option<Box<dyn HyphenationDataSource>>>,
21    dictionaries: Vec<hyphenation::Standard>,
22}
23
24/// Hyphenation service.
25///
26/// Note that dictionary data is required to support a language, if the feature `"hyphenation_embed_all"` is enabled
27/// dictionaries for all supported languages is embedded, otherwise dictionaries must be loaded using a [`HyphenationDataSource`].
28///
29/// You can use the [`HyphenationDataDir`] to use external files, see the [hyphenation](https://github.com/tapeinosyne/hyphenation)
30/// for more details about the data files.
31pub struct HYPHENATION;
32impl HYPHENATION {
33    /// Set the hyphenation dictionaries source and clear cache.
34    ///
35    /// Note that this applies immediately and does not notify, it should only be called once during app init.
36    pub fn init_data_source(&self, source: impl HyphenationDataSource) {
37        let mut h = HYPHENATION_SV.write();
38        *h.source.get_mut() = Some(Box::new(source));
39        h.dictionaries.clear();
40    }
41
42    /// Try to hyphenate the `word` using the `lang` dictionary and rules.
43    ///
44    /// Returns a vector of indexes that allow a line break.
45    pub fn hyphenate(&self, lang: &Lang, word: &str) -> Vec<usize> {
46        self.hyphenate_opt(lang, word).unwrap_or_default()
47    }
48
49    /// Try to hyphenate the `word` using the `lang` dictionary and rules.
50    ///
51    /// Returns a vector of indexes that allow a line break. Returns `None` if the `lang` is not supported or the
52    /// `word` contains non-word characters.
53    pub fn hyphenate_opt(&self, lang: &Lang, word: &str) -> Option<Vec<usize>> {
54        let lang = self.lang_to_hyphenation_language(lang)?;
55        self.hyphenate_opt_language(word, lang)
56    }
57
58    /// Get the best `hyphenation::Language` for the `lang`.
59    pub fn lang_to_hyphenation_language(&self, lang: &Lang) -> Option<hyphenation::Language> {
60        for (l, r) in &*util::LANG_TO_LANGUAGE_MAP.read() {
61            if lang.matches(l, false, true) {
62                return Some(*r);
63            }
64        }
65
66        None
67    }
68
69    /// Hyphenate with language already resolved.
70    pub fn hyphenate_opt_language(&self, word: &str, lang: hyphenation::Language) -> Option<Vec<usize>> {
71        fn is_word(word: &str) -> bool {
72            // r"^\w+$"
73            !word.is_empty() && word.chars().all(|c| c.is_alphanumeric() || c == '_')
74        }
75        if !is_word(word) {
76            return None;
77        }
78
79        {
80            let h = HYPHENATION_SV.read();
81
82            for d in &h.dictionaries {
83                if d.language() == lang {
84                    return Some(d.hyphenate(word).breaks);
85                }
86            }
87        }
88
89        let mut h = HYPHENATION_SV.write();
90
91        if h.source.get_mut().is_none() {
92            return None;
93        }
94
95        for d in &h.dictionaries {
96            if d.language() == lang {
97                return Some(d.hyphenate(word).breaks);
98            }
99        }
100
101        if let Some(source) = h.source.get_mut() {
102            let d = source.load(lang)?;
103            let r = Some(d.hyphenate(word).breaks);
104            h.dictionaries.push(d);
105
106            return r;
107        }
108
109        None
110    }
111}
112
113/// Represents a hyphenation dictionary source.
114///
115/// The data source must be registered in [`HYPHENATION.init_data_source`].
116///
117/// [`HYPHENATION.init_data_source`]: HYPHENATION::init_data_source
118pub trait HyphenationDataSource: Send + 'static {
119    /// Load the dictionary for the `lang`.
120    fn load(&mut self, lang: hyphenation::Language) -> Option<hyphenation::Standard>;
121}
122
123/// Represents a hyphenation data source that searches a directory.
124///
125/// The file names must follow a pattern that includes the language display print, the pattern mut be defined
126/// with a replacement `{lang}`. For example the file `dir/en-us.bin` is matched by `"{lang}.bin"`.
127///
128/// See the [hyphenation](https://github.com/tapeinosyne/hyphenation) crate docs for more details about the data files.
129pub struct HyphenationDataDir {
130    dir: PathBuf,
131    name_pattern: &'static str,
132}
133impl HyphenationDataDir {
134    /// New from `dir` and file name pattern.
135    pub fn new(dir: PathBuf, name_pattern: &'static str) -> Self {
136        HyphenationDataDir { dir, name_pattern }
137    }
138}
139impl HyphenationDataSource for HyphenationDataDir {
140    fn load(&mut self, lang: hyphenation::Language) -> Option<hyphenation::Standard> {
141        let name = self.name_pattern.replace("{lang}", lang.to_string().as_str());
142        let file = self.dir.join(name);
143        if file.exists() {
144            match hyphenation::Standard::from_path(lang, file) {
145                Ok(d) => Some(d),
146                Err(e) => {
147                    tracing::error!("error loading hyphenation dictionary, {e}");
148                    None
149                }
150            }
151        } else {
152            None
153        }
154    }
155}
156
157/// Represents embedded hyphenation data.
158///
159/// This is the default data source when compiled with the feature `"hyphenation_embed_all"`.
160#[cfg(feature = "hyphenation_embed_all")]
161pub struct HyphenationDataEmbedded;
162
163#[cfg(feature = "hyphenation_embed_all")]
164impl HyphenationDataSource for HyphenationDataEmbedded {
165    fn load(&mut self, lang: hyphenation::Language) -> Option<hyphenation::Standard> {
166        match hyphenation::Standard::from_embedded(lang) {
167            Ok(d) => Some(d),
168            Err(e) => {
169                tracing::error!("error loading hyphenation dictionary, {e}");
170                None
171            }
172        }
173    }
174}
175
176mod util {
177    use super::*;
178    use hyphenation::Language::*;
179    use zng_ext_l10n::{Lang, lang};
180
181    app_local! {
182        pub static LANG_TO_LANGUAGE_MAP: Vec<(Lang, hyphenation::Language)> = vec![
183            (lang!("af"), Afrikaans),
184            (lang!("sq"), Albanian),
185            (lang!("hy"), Armenian),
186            (lang!("as"), Assamese),
187            (lang!("eu"), Basque),
188            (lang!("be"), Belarusian),
189            (lang!("bn"), Bengali),
190            (lang!("bg"), Bulgarian),
191            (lang!("ca"), Catalan),
192            (lang!("zh-latn-pinyin"), Chinese),
193            (lang!("cop"), Coptic),
194            (lang!("hr"), Croatian),
195            (lang!("cs"), Czech),
196            (lang!("da"), Danish),
197            (lang!("nl"), Dutch),
198            (lang!("en-gb"), EnglishGB),
199            (lang!("en-us"), EnglishUS),
200            (lang!("eo"), Esperanto),
201            (lang!("et"), Estonian),
202            (lang!("mul-ethi"), Ethiopic),
203            (lang!("fi"), Finnish),
204            // (lang!("fi-x-school"), FinnishScholastic),
205            (lang!("fr"), French),
206            (lang!("fur"), Friulan),
207            (lang!("gl"), Galician),
208            (lang!("ka"), Georgian),
209            (lang!("de-1901"), German1901),
210            (lang!("de-1996"), German1996),
211            (lang!("de-ch-1901"), GermanSwiss),
212            (lang!("grc"), GreekAncient),
213            (lang!("el-monoton"), GreekMono),
214            (lang!("el-polyton"), GreekPoly),
215            (lang!("gu"), Gujarati),
216            (lang!("hi"), Hindi),
217            (lang!("hu"), Hungarian),
218            (lang!("is"), Icelandic),
219            (lang!("id"), Indonesian),
220            (lang!("ia"), Interlingua),
221            (lang!("ga"), Irish),
222            (lang!("it"), Italian),
223            (lang!("kn"), Kannada),
224            (lang!("kmr"), Kurmanji),
225            (lang!("la"), Latin),
226            // (lang!("la-x-classic"), LatinClassic),
227            // (lang!("la-x-liturgic"), LatinLiturgical),
228            (lang!("lv"), Latvian),
229            (lang!("lt"), Lithuanian),
230            (lang!("mk"), Macedonian),
231            (lang!("ml"), Malayalam),
232            (lang!("mr"), Marathi),
233            (lang!("mn-cyrl"), Mongolian),
234            (lang!("nb"), NorwegianBokmal),
235            (lang!("nn"), NorwegianNynorsk),
236            (lang!("oc"), Occitan),
237            (lang!("or"), Oriya),
238            (lang!("pi"), Pali),
239            (lang!("pa"), Panjabi),
240            (lang!("pms"), Piedmontese),
241            (lang!("pl"), Polish),
242            (lang!("pt"), Portuguese),
243            (lang!("ro"), Romanian),
244            (lang!("rm"), Romansh),
245            (lang!("ru"), Russian),
246            (lang!("sa"), Sanskrit),
247            (lang!("sr-cyrl"), SerbianCyrillic),
248            (lang!("sh-cyrl"), SerbocroatianCyrillic),
249            (lang!("sh-latn"), SerbocroatianLatin),
250            (lang!("cu"), SlavonicChurch),
251            (lang!("sk"), Slovak),
252            (lang!("sl"), Slovenian),
253            (lang!("es"), Spanish),
254            (lang!("sv"), Swedish),
255            (lang!("ta"), Tamil),
256            (lang!("te"), Telugu),
257            (lang!("th"), Thai),
258            (lang!("tr"), Turkish),
259            (lang!("tk"), Turkmen),
260            (lang!("uk"), Ukrainian),
261            (lang!("hsb"), Uppersorbian),
262            (lang!("cy"), Welsh),
263        ];
264    }
265}