Skip to main content

cargo_zng/l10n/
translate.rs

1use std::{
2    fs,
3    io::{self, BufRead, Read, Write},
4    path::{Path, PathBuf},
5    time::Duration,
6};
7
8use leaky_bucket_lite::sync_threadsafe::LeakyBucket;
9use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator};
10use sha2::Digest;
11use unic_langid::LanguageIdentifier;
12
13use crate::util;
14
15pub fn translate(dir: &str, from: &str, to: &str, replace: bool, check: bool, verbose: bool) {
16    let dir_path = Path::new(dir);
17    if !dir_path.exists() {
18        fatal!("dir `{dir}` does not exist")
19    }
20
21    let from = if from.is_empty() {
22        let name = dir_path.file_name().unwrap().to_string_lossy();
23        let name = &*name;
24        if name == "template" {
25            if verbose {
26                println!("from language set to `en`, default language for `template` folder");
27            }
28            LanguageIdentifier::from_bytes(b"en").unwrap()
29        } else {
30            let name = name.strip_suffix("-machine").unwrap_or(name);
31            match LanguageIdentifier::from_bytes(name.as_bytes()) {
32                Ok(l) => {
33                    if verbose {
34                        println!("from language language set to `{l}`, derived from `{name}` folder");
35                    }
36                    l
37                }
38                Err(e) => fatal!("cannot define from language, `{name}` is not a language, {e}\nset --translate-from to resolve"),
39            }
40        }
41    } else {
42        match LanguageIdentifier::from_bytes(from.as_bytes()) {
43            Ok(l) => l,
44            Err(e) => fatal!("cannot translate from `{from}`, {e}"),
45        }
46    };
47    let to = {
48        let mut t = vec![];
49        for l_str in to.split(',') {
50            let l_str = l_str.trim();
51            if l_str.is_empty() {
52                continue;
53            }
54            let l_str = l_str.strip_suffix("-machine").unwrap_or(l_str);
55            match LanguageIdentifier::from_bytes(l_str.as_bytes()) {
56                Ok(l) => t.push(l),
57                Err(e) => fatal!("cannot translate to `{l_str}`, {e}"),
58            }
59        }
60        t
61    };
62    if to.is_empty() {
63        fatal!("--translate-to requires at least one language")
64    }
65
66    let pattern = dir_path.join("**/*.ftl");
67    let mut files = vec![];
68    for entry in glob::glob(&pattern.display().to_string()).unwrap_or_else(|e| fatal!("cannot read `{dir}`, {e}")) {
69        let entry = entry.unwrap_or_else(|e| fatal!("cannot read `{dir}` entry, {e}"));
70        let relative_entry = entry.strip_prefix(dir_path).unwrap();
71        let file = match std::fs::read_to_string(&entry) {
72            Ok(f) => f,
73            Err(e) => fatal!("cannot read `{}`, {}", entry.display(), e),
74        };
75        let mut hasher = sha2::Sha256::new();
76        hasher.update(file.as_bytes());
77        let hash = format!("{:x}", base16ct::HexDisplay(&hasher.finalize()));
78        files.push((relative_entry.to_owned(), file, hash));
79    }
80    let files = files;
81
82    if verbose {
83        println!("collected {} source files", files.len());
84    }
85
86    let dir_path = dunce::canonicalize(dir_path).unwrap();
87    let dir_parent = dir_path.parent().unwrap();
88
89    let translator = Translator::default();
90    if verbose {
91        println!("using `{}`, RPM: {}", translator.path.display(), translator.rpm);
92    }
93
94    to.into_par_iter().for_each(|to| {
95        if to == from {
96            return;
97        }
98        println!("translating to {to}-machine");
99
100        let non_machine_dir = dir_parent.join(to.to_string());
101        if non_machine_dir.exists() {
102            return;
103        }
104
105        let dir_to = dir_parent.join(format!("{to}-machine"));
106
107        files.par_iter().for_each(|(relative_path, file, hash)| {
108            let file_to = dir_to.join(relative_path);
109            if verbose {
110                println!("  {}", file_to.display());
111            }
112
113            let _ = util::check_or_create_dir_all(check, file_to.parent().unwrap());
114
115            const HEADER_PREFIX: &str = "### Machine translated by `cargo zng l10n`, ";
116
117            let mut stale = false;
118            match fs::File::open(&file_to) {
119                Ok(f) => {
120                    if !replace {
121                        let mut f = io::BufReader::new(f);
122                        let mut header = String::new();
123                        let _ = f.read_line(&mut header);
124                        if let Some(h) = header.strip_prefix(HEADER_PREFIX) {
125                            if hash == h.trim_end() {
126                                // translation is up-to-date
127                                return;
128                            }
129                            stale = true;
130                        } else {
131                            // translation not generated by us, assume is up-to-date
132                            return;
133                        }
134                    }
135                }
136                Err(_) => {
137                    if check {
138                        fatal!("expected `{}`", file_to.display());
139                    }
140                }
141            }
142
143            if !check && (replace || stale || !file_to.exists()) {
144                let r = translator.translate(&from, &to, file, verbose);
145                let write = || -> io::Result<()> {
146                    let mut f = fs::File::create(&file_to)?;
147                    f.write_all(HEADER_PREFIX.as_bytes())?;
148                    f.write_all(hash.as_bytes())?;
149                    f.write_all("\n\n".as_bytes())?;
150                    f.write_all(r.as_bytes())?;
151                    Ok(())
152                };
153                if let Err(e) = write() {
154                    fatal!("cannot write `{}`, {}", file_to.display(), e);
155                }
156            }
157        });
158    });
159}
160
161struct Translator {
162    path: PathBuf,
163    rpm: u64,
164    limiter: LeakyBucket,
165}
166impl Default for Translator {
167    fn default() -> Self {
168        let install_dir = std::env::current_exe().unwrap();
169        let install_dir = install_dir.parent().unwrap();
170
171        // find translator
172        let mut t = if let Ok(translator) = std::env::var("ZNG_L10N_TRANSLATOR") {
173            let p = if translator.contains('/') || translator.contains('\\') {
174                PathBuf::from(&translator)
175            } else {
176                install_dir.join(format!("zng-l10n-translator-{translator}{}", std::env::consts::EXE_SUFFIX))
177            };
178            if !p.exists() {
179                fatal!("cannot find translator `{translator}`");
180            }
181            Translator {
182                path: p,
183                rpm: 0,
184                limiter: LeakyBucket::builder().build(),
185            }
186        } else {
187            let translators_pattern = install_dir.join("zng-l10n-translator-*").display().to_string();
188            let mut options = vec![];
189            for opt in glob::glob(&translators_pattern).unwrap() {
190                options.push(opt.unwrap());
191            }
192            if options.is_empty() {
193                fatal!("no translator installed\n   install a zng-l10n-translator-* crate\nor set ZNG_L10N_TRANSLATOR to a path")
194            } else if options.len() > 1 {
195                let mut names: Vec<_> = options
196                    .iter()
197                    .map(|p| {
198                        p.file_name()
199                            .unwrap()
200                            .to_string_lossy()
201                            .trim_start_matches("zng-l10n-translator-")
202                            .trim_end_matches(std::env::consts::EXE_SUFFIX)
203                            .to_owned()
204                    })
205                    .collect();
206                names.sort();
207                let names = names.join(", ");
208                fatal!("multiple translators installed\n    set ZNG_L10N_TRANSLATOR to one of: {names}");
209            }
210            Translator {
211                path: options.remove(0),
212                rpm: 0,
213                limiter: LeakyBucket::builder().build(),
214            }
215        };
216
217        // request limits
218        t.read_limits();
219
220        t
221    }
222}
223impl Translator {
224    fn read_limits(&mut self) {
225        #[derive(serde::Deserialize)]
226        struct Limits {
227            #[serde(rename = "requests-per-minute")]
228            rpm: u64,
229        }
230
231        let output = std::process::Command::new(&self.path).arg("--limits").output().unwrap();
232        if output.status.success() {
233            match serde_json::from_slice::<Limits>(&output.stdout) {
234                Ok(l) => {
235                    self.rpm = l.rpm;
236                    let t: u32 = self.rpm.try_into().unwrap();
237                    self.limiter = LeakyBucket::builder()
238                        .max(t)
239                        .tokens(t)
240                        .refill_amount(t)
241                        .refill_interval(Duration::from_mins(1))
242                        .build();
243                }
244                Err(e) => {
245                    let limits = String::from_utf8_lossy(&output.stdout);
246                    fatal!("invalid response to --limits, {e}\n{limits}")
247                }
248            }
249        } else {
250            let error = String::from_utf8_lossy(&output.stderr);
251            fatal!(
252                "{error}\n{} exited with code {}",
253                self.path.file_name().unwrap().to_string_lossy(),
254                output.status.code().unwrap_or(0)
255            );
256        }
257    }
258
259    fn translate(&self, from: &LanguageIdentifier, to: &LanguageIdentifier, file: &str, _verbose: bool) -> String {
260        self.limiter.acquire_one();
261
262        let mut cmd = std::process::Command::new(&self.path)
263            .arg("--from-lang")
264            .arg(from.to_string())
265            .arg("--to-lang")
266            .arg(to.to_string())
267            .stdin(std::process::Stdio::piped())
268            .stdout(std::process::Stdio::piped())
269            .stderr(std::process::Stdio::piped())
270            .spawn()
271            .unwrap();
272
273        let mut stdin = cmd.stdin.take().unwrap();
274        stdin.write_all(file.as_bytes()).unwrap();
275        stdin.flush().unwrap();
276        drop(stdin);
277
278        let s = cmd.wait().unwrap();
279        if !s.success() {
280            let mut error = String::new();
281            cmd.stderr.unwrap().read_to_string(&mut error).unwrap();
282            fatal!(
283                "{error}\n{} exited with code {}",
284                self.path.file_name().unwrap().to_string_lossy(),
285                s.code().unwrap_or(0)
286            );
287        }
288        let mut out = String::new();
289        cmd.stdout.unwrap().read_to_string(&mut out).unwrap();
290
291        if let Err(e) = fluent_syntax::parser::parse(out.as_str()) {
292            fatal!("translator output is not valid Fluent, {}", &e.1[0]);
293        }
294
295        out
296    }
297}