1use std::path::PathBuf;
2
3use crate::task::parking_lot::Mutex;
4use hyphenation::{Hyphenator as _, Load as _};
5use zng_app_context::app_local;
6use zng_ext_l10n::Lang;
7
8app_local! {
9 static HYPHENATION_SV: Hyphenation = Hyphenation {
10 #[cfg(feature = "hyphenation_embed_all")]
11 source: Mutex::new(Some(Box::new(HyphenationDataEmbedded))),
12 #[cfg(not(feature = "hyphenation_embed_all"))]
13 source: Mutex::new(None),
14
15 dictionaries: vec![],
16 };
17}
18
19struct Hyphenation {
20 source: Mutex<Option<Box<dyn HyphenationDataSource>>>,
21 dictionaries: Vec<hyphenation::Standard>,
22}
23
24pub struct HYPHENATION;
32impl HYPHENATION {
33 pub fn init_data_source(&self, source: impl HyphenationDataSource) {
37 let mut h = HYPHENATION_SV.write();
38 *h.source.get_mut() = Some(Box::new(source));
39 h.dictionaries.clear();
40 }
41
42 pub fn hyphenate(&self, lang: &Lang, word: &str) -> Vec<usize> {
46 self.hyphenate_opt(lang, word).unwrap_or_default()
47 }
48
49 pub fn hyphenate_opt(&self, lang: &Lang, word: &str) -> Option<Vec<usize>> {
54 let lang = self.lang_to_hyphenation_language(lang)?;
55 self.hyphenate_opt_language(word, lang)
56 }
57
58 pub fn lang_to_hyphenation_language(&self, lang: &Lang) -> Option<hyphenation::Language> {
60 for (l, r) in &*util::LANG_TO_LANGUAGE_MAP.read() {
61 if lang.matches(l, false, true) {
62 return Some(*r);
63 }
64 }
65
66 None
67 }
68
69 pub fn hyphenate_opt_language(&self, word: &str, lang: hyphenation::Language) -> Option<Vec<usize>> {
71 fn is_word(word: &str) -> bool {
72 !word.is_empty() && word.chars().all(|c| c.is_alphanumeric() || c == '_')
74 }
75 if !is_word(word) {
76 return None;
77 }
78
79 {
80 let h = HYPHENATION_SV.read();
81
82 for d in &h.dictionaries {
83 if d.language() == lang {
84 return Some(d.hyphenate(word).breaks);
85 }
86 }
87 }
88
89 let mut h = HYPHENATION_SV.write();
90
91 if h.source.get_mut().is_none() {
92 return None;
93 }
94
95 for d in &h.dictionaries {
96 if d.language() == lang {
97 return Some(d.hyphenate(word).breaks);
98 }
99 }
100
101 if let Some(source) = h.source.get_mut() {
102 let d = source.load(lang)?;
103 let r = Some(d.hyphenate(word).breaks);
104 h.dictionaries.push(d);
105
106 return r;
107 }
108
109 None
110 }
111}
112
113pub trait HyphenationDataSource: Send + 'static {
119 fn load(&mut self, lang: hyphenation::Language) -> Option<hyphenation::Standard>;
121}
122
123pub struct HyphenationDataDir {
130 dir: PathBuf,
131 name_pattern: &'static str,
132}
133impl HyphenationDataDir {
134 pub fn new(dir: PathBuf, name_pattern: &'static str) -> Self {
136 HyphenationDataDir { dir, name_pattern }
137 }
138}
139impl HyphenationDataSource for HyphenationDataDir {
140 fn load(&mut self, lang: hyphenation::Language) -> Option<hyphenation::Standard> {
141 let name = self.name_pattern.replace("{lang}", lang.to_string().as_str());
142 let file = self.dir.join(name);
143 if file.exists() {
144 match hyphenation::Standard::from_path(lang, file) {
145 Ok(d) => Some(d),
146 Err(e) => {
147 tracing::error!("error loading hyphenation dictionary, {e}");
148 None
149 }
150 }
151 } else {
152 None
153 }
154 }
155}
156
157#[cfg(feature = "hyphenation_embed_all")]
161pub struct HyphenationDataEmbedded;
162
163#[cfg(feature = "hyphenation_embed_all")]
164impl HyphenationDataSource for HyphenationDataEmbedded {
165 fn load(&mut self, lang: hyphenation::Language) -> Option<hyphenation::Standard> {
166 match hyphenation::Standard::from_embedded(lang) {
167 Ok(d) => Some(d),
168 Err(e) => {
169 tracing::error!("error loading hyphenation dictionary, {e}");
170 None
171 }
172 }
173 }
174}
175
176mod util {
177 use super::*;
178 use hyphenation::Language::*;
179 use zng_ext_l10n::{Lang, lang};
180
181 app_local! {
182 pub static LANG_TO_LANGUAGE_MAP: Vec<(Lang, hyphenation::Language)> = vec![
183 (lang!("af"), Afrikaans),
184 (lang!("sq"), Albanian),
185 (lang!("hy"), Armenian),
186 (lang!("as"), Assamese),
187 (lang!("eu"), Basque),
188 (lang!("be"), Belarusian),
189 (lang!("bn"), Bengali),
190 (lang!("bg"), Bulgarian),
191 (lang!("ca"), Catalan),
192 (lang!("zh-latn-pinyin"), Chinese),
193 (lang!("cop"), Coptic),
194 (lang!("hr"), Croatian),
195 (lang!("cs"), Czech),
196 (lang!("da"), Danish),
197 (lang!("nl"), Dutch),
198 (lang!("en-gb"), EnglishGB),
199 (lang!("en-us"), EnglishUS),
200 (lang!("eo"), Esperanto),
201 (lang!("et"), Estonian),
202 (lang!("mul-ethi"), Ethiopic),
203 (lang!("fi"), Finnish),
204 (lang!("fr"), French),
206 (lang!("fur"), Friulan),
207 (lang!("gl"), Galician),
208 (lang!("ka"), Georgian),
209 (lang!("de-1901"), German1901),
210 (lang!("de-1996"), German1996),
211 (lang!("de-ch-1901"), GermanSwiss),
212 (lang!("grc"), GreekAncient),
213 (lang!("el-monoton"), GreekMono),
214 (lang!("el-polyton"), GreekPoly),
215 (lang!("gu"), Gujarati),
216 (lang!("hi"), Hindi),
217 (lang!("hu"), Hungarian),
218 (lang!("is"), Icelandic),
219 (lang!("id"), Indonesian),
220 (lang!("ia"), Interlingua),
221 (lang!("ga"), Irish),
222 (lang!("it"), Italian),
223 (lang!("kn"), Kannada),
224 (lang!("kmr"), Kurmanji),
225 (lang!("la"), Latin),
226 (lang!("lv"), Latvian),
229 (lang!("lt"), Lithuanian),
230 (lang!("mk"), Macedonian),
231 (lang!("ml"), Malayalam),
232 (lang!("mr"), Marathi),
233 (lang!("mn-cyrl"), Mongolian),
234 (lang!("nb"), NorwegianBokmal),
235 (lang!("nn"), NorwegianNynorsk),
236 (lang!("oc"), Occitan),
237 (lang!("or"), Oriya),
238 (lang!("pi"), Pali),
239 (lang!("pa"), Panjabi),
240 (lang!("pms"), Piedmontese),
241 (lang!("pl"), Polish),
242 (lang!("pt"), Portuguese),
243 (lang!("ro"), Romanian),
244 (lang!("rm"), Romansh),
245 (lang!("ru"), Russian),
246 (lang!("sa"), Sanskrit),
247 (lang!("sr-cyrl"), SerbianCyrillic),
248 (lang!("sh-cyrl"), SerbocroatianCyrillic),
249 (lang!("sh-latn"), SerbocroatianLatin),
250 (lang!("cu"), SlavonicChurch),
251 (lang!("sk"), Slovak),
252 (lang!("sl"), Slovenian),
253 (lang!("es"), Spanish),
254 (lang!("sv"), Swedish),
255 (lang!("ta"), Tamil),
256 (lang!("te"), Telugu),
257 (lang!("th"), Thai),
258 (lang!("tr"), Turkish),
259 (lang!("tk"), Turkmen),
260 (lang!("uk"), Ukrainian),
261 (lang!("hsb"), Uppersorbian),
262 (lang!("cy"), Welsh),
263 ];
264 }
265}