1use std::path::PathBuf;
2
3use crate::task::parking_lot::Mutex;
4use hyphenation::{Hyphenator as _, Load as _};
5use zng_app_context::app_local;
6use zng_ext_l10n::Lang;
7
8app_local! {
9 static HYPHENATION_SV: Hyphenation = Hyphenation {
10 #[cfg(feature = "hyphenation_embed_all")]
11 source: Mutex::new(Some(Box::new(HyphenationDataEmbedded))),
12 #[cfg(not(feature = "hyphenation_embed_all"))]
13 source: Mutex::new(None),
14
15 dictionaries: vec![],
16 };
17}
18
19struct Hyphenation {
20 source: Mutex<Option<Box<dyn HyphenationDataSource>>>,
21 dictionaries: Vec<hyphenation::Standard>,
22}
23
24pub struct HYPHENATION;
32impl HYPHENATION {
33 pub fn init_data_source(&self, source: impl HyphenationDataSource) {
37 let mut h = HYPHENATION_SV.write();
38 *h.source.get_mut() = Some(Box::new(source));
39 h.dictionaries.clear();
40 }
41
42 pub fn hyphenate(&self, lang: &Lang, word: &str) -> Vec<usize> {
46 self.hyphenate_opt(lang, word).unwrap_or_default()
47 }
48
49 pub fn hyphenate_opt(&self, lang: &Lang, word: &str) -> Option<Vec<usize>> {
54 let lang = self.lang_to_hyphenation_language(lang)?;
55 self.hyphenate_opt_language(word, lang)
56 }
57
58 pub fn lang_to_hyphenation_language(&self, lang: &Lang) -> Option<hyphenation::Language> {
60 for (l, r) in &*util::LANG_TO_LANGUAGE_MAP.read() {
61 if lang.matches(l, false, true) {
62 return Some(*r);
63 }
64 }
65
66 None
67 }
68
69 pub fn hyphenate_opt_language(&self, word: &str, lang: hyphenation::Language) -> Option<Vec<usize>> {
71 if !util::WORD_REGEX.read().is_match(word) {
72 return None;
73 }
74
75 {
76 let h = HYPHENATION_SV.read();
77
78 for d in &h.dictionaries {
79 if d.language() == lang {
80 return Some(d.hyphenate(word).breaks);
81 }
82 }
83 }
84
85 let mut h = HYPHENATION_SV.write();
86
87 if h.source.get_mut().is_none() {
88 return None;
89 }
90
91 for d in &h.dictionaries {
92 if d.language() == lang {
93 return Some(d.hyphenate(word).breaks);
94 }
95 }
96
97 if let Some(source) = h.source.get_mut() {
98 let d = source.load(lang)?;
99 let r = Some(d.hyphenate(word).breaks);
100 h.dictionaries.push(d);
101
102 return r;
103 }
104
105 None
106 }
107}
108
109pub trait HyphenationDataSource: Send + 'static {
115 fn load(&mut self, lang: hyphenation::Language) -> Option<hyphenation::Standard>;
117}
118
119pub struct HyphenationDataDir {
126 dir: PathBuf,
127 name_pattern: &'static str,
128}
129impl HyphenationDataDir {
130 pub fn new(dir: PathBuf, name_pattern: &'static str) -> Self {
132 HyphenationDataDir { dir, name_pattern }
133 }
134}
135impl HyphenationDataSource for HyphenationDataDir {
136 fn load(&mut self, lang: hyphenation::Language) -> Option<hyphenation::Standard> {
137 let name = self.name_pattern.replace("{lang}", lang.to_string().as_str());
138 let file = self.dir.join(name);
139 if file.exists() {
140 match hyphenation::Standard::from_path(lang, file) {
141 Ok(d) => Some(d),
142 Err(e) => {
143 tracing::error!("error loading hyphenation dictionary, {e}");
144 None
145 }
146 }
147 } else {
148 None
149 }
150 }
151}
152
153#[cfg(feature = "hyphenation_embed_all")]
157pub struct HyphenationDataEmbedded;
158
159#[cfg(feature = "hyphenation_embed_all")]
160impl HyphenationDataSource for HyphenationDataEmbedded {
161 fn load(&mut self, lang: hyphenation::Language) -> Option<hyphenation::Standard> {
162 match hyphenation::Standard::from_embedded(lang) {
163 Ok(d) => Some(d),
164 Err(e) => {
165 tracing::error!("error loading hyphenation dictionary, {e}");
166 None
167 }
168 }
169 }
170}
171
172mod util {
173 use super::*;
174 use hyphenation::Language::*;
175 use regex::Regex;
176 use zng_ext_l10n::{Lang, lang};
177
178 app_local! {
179 pub static LANG_TO_LANGUAGE_MAP: Vec<(Lang, hyphenation::Language)> = vec![
180 (lang!("af"), Afrikaans),
181 (lang!("sq"), Albanian),
182 (lang!("hy"), Armenian),
183 (lang!("as"), Assamese),
184 (lang!("eu"), Basque),
185 (lang!("be"), Belarusian),
186 (lang!("bn"), Bengali),
187 (lang!("bg"), Bulgarian),
188 (lang!("ca"), Catalan),
189 (lang!("zh-latn-pinyin"), Chinese),
190 (lang!("cop"), Coptic),
191 (lang!("hr"), Croatian),
192 (lang!("cs"), Czech),
193 (lang!("da"), Danish),
194 (lang!("nl"), Dutch),
195 (lang!("en-gb"), EnglishGB),
196 (lang!("en-us"), EnglishUS),
197 (lang!("eo"), Esperanto),
198 (lang!("et"), Estonian),
199 (lang!("mul-ethi"), Ethiopic),
200 (lang!("fi"), Finnish),
201 (lang!("fr"), French),
203 (lang!("fur"), Friulan),
204 (lang!("gl"), Galician),
205 (lang!("ka"), Georgian),
206 (lang!("de-1901"), German1901),
207 (lang!("de-1996"), German1996),
208 (lang!("de-ch-1901"), GermanSwiss),
209 (lang!("grc"), GreekAncient),
210 (lang!("el-monoton"), GreekMono),
211 (lang!("el-polyton"), GreekPoly),
212 (lang!("gu"), Gujarati),
213 (lang!("hi"), Hindi),
214 (lang!("hu"), Hungarian),
215 (lang!("is"), Icelandic),
216 (lang!("id"), Indonesian),
217 (lang!("ia"), Interlingua),
218 (lang!("ga"), Irish),
219 (lang!("it"), Italian),
220 (lang!("kn"), Kannada),
221 (lang!("kmr"), Kurmanji),
222 (lang!("la"), Latin),
223 (lang!("lv"), Latvian),
226 (lang!("lt"), Lithuanian),
227 (lang!("mk"), Macedonian),
228 (lang!("ml"), Malayalam),
229 (lang!("mr"), Marathi),
230 (lang!("mn-cyrl"), Mongolian),
231 (lang!("nb"), NorwegianBokmal),
232 (lang!("nn"), NorwegianNynorsk),
233 (lang!("oc"), Occitan),
234 (lang!("or"), Oriya),
235 (lang!("pi"), Pali),
236 (lang!("pa"), Panjabi),
237 (lang!("pms"), Piedmontese),
238 (lang!("pl"), Polish),
239 (lang!("pt"), Portuguese),
240 (lang!("ro"), Romanian),
241 (lang!("rm"), Romansh),
242 (lang!("ru"), Russian),
243 (lang!("sa"), Sanskrit),
244 (lang!("sr-cyrl"), SerbianCyrillic),
245 (lang!("sh-cyrl"), SerbocroatianCyrillic),
246 (lang!("sh-latn"), SerbocroatianLatin),
247 (lang!("cu"), SlavonicChurch),
248 (lang!("sk"), Slovak),
249 (lang!("sl"), Slovenian),
250 (lang!("es"), Spanish),
251 (lang!("sv"), Swedish),
252 (lang!("ta"), Tamil),
253 (lang!("te"), Telugu),
254 (lang!("th"), Thai),
255 (lang!("tr"), Turkish),
256 (lang!("tk"), Turkmen),
257 (lang!("uk"), Ukrainian),
258 (lang!("hsb"), Uppersorbian),
259 (lang!("cy"), Welsh),
260 ];
261
262 pub static WORD_REGEX: Regex = Regex::new(r"^\w+$").unwrap();
263 }
264}