cargo_zng/l10n/
translate.rs1use std::{
2 fs,
3 io::{self, BufRead, Read, Write},
4 path::{Path, PathBuf},
5 time::Duration,
6};
7
8use leaky_bucket_lite::sync_threadsafe::LeakyBucket;
9use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator};
10use sha2::Digest;
11use unic_langid::LanguageIdentifier;
12
13use crate::util;
14
15pub fn translate(dir: &str, from: &str, to: &str, replace: bool, check: bool, verbose: bool) {
16 let dir_path = Path::new(dir);
17 if !dir_path.exists() {
18 fatal!("dir `{dir}` does not exist")
19 }
20
21 let from = if from.is_empty() {
22 let name = dir_path.file_name().unwrap().to_string_lossy();
23 let name = &*name;
24 if name == "template" {
25 if verbose {
26 println!("from language set to `en`, default language for `template` folder");
27 }
28 LanguageIdentifier::from_bytes(b"en").unwrap()
29 } else {
30 let name = name.strip_suffix("-machine").unwrap_or(name);
31 match LanguageIdentifier::from_bytes(name.as_bytes()) {
32 Ok(l) => {
33 if verbose {
34 println!("from language language set to `{l}`, derived from `{name}` folder");
35 }
36 l
37 }
38 Err(e) => fatal!("cannot define from language, `{name}` is not a language, {e}\nset --translate-from to resolve"),
39 }
40 }
41 } else {
42 match LanguageIdentifier::from_bytes(from.as_bytes()) {
43 Ok(l) => l,
44 Err(e) => fatal!("cannot translate from `{from}`, {e}"),
45 }
46 };
47 let to = {
48 let mut t = vec![];
49 for l_str in to.split(',') {
50 let l_str = l_str.trim();
51 if l_str.is_empty() {
52 continue;
53 }
54 let l_str = l_str.strip_suffix("-machine").unwrap_or(l_str);
55 match LanguageIdentifier::from_bytes(l_str.as_bytes()) {
56 Ok(l) => t.push(l),
57 Err(e) => fatal!("cannot translate to `{l_str}`, {e}"),
58 }
59 }
60 t
61 };
62 if to.is_empty() {
63 fatal!("--translate-to requires at least one language")
64 }
65
66 let pattern = dir_path.join("**/*.ftl");
67 let mut files = vec![];
68 for entry in glob::glob(&pattern.display().to_string()).unwrap_or_else(|e| fatal!("cannot read `{dir}`, {e}")) {
69 let entry = entry.unwrap_or_else(|e| fatal!("cannot read `{dir}` entry, {e}"));
70 let relative_entry = entry.strip_prefix(dir_path).unwrap();
71 let file = match std::fs::read_to_string(&entry) {
72 Ok(f) => f,
73 Err(e) => fatal!("cannot read `{}`, {}", entry.display(), e),
74 };
75 let mut hasher = sha2::Sha256::new();
76 hasher.update(file.as_bytes());
77 let hash = format!("{:x}", base16ct::HexDisplay(&hasher.finalize()));
78 files.push((relative_entry.to_owned(), file, hash));
79 }
80 let files = files;
81
82 if verbose {
83 println!("collected {} source files", files.len());
84 }
85
86 let dir_path = dunce::canonicalize(dir_path).unwrap();
87 let dir_parent = dir_path.parent().unwrap();
88
89 let translator = Translator::default();
90 if verbose {
91 println!("using `{}`, RPM: {}", translator.path.display(), translator.rpm);
92 }
93
94 to.into_par_iter().for_each(|to| {
95 if to == from {
96 return;
97 }
98 println!("translating to {to}-machine");
99
100 let non_machine_dir = dir_parent.join(to.to_string());
101 if non_machine_dir.exists() {
102 return;
103 }
104
105 let dir_to = dir_parent.join(format!("{to}-machine"));
106
107 files.par_iter().for_each(|(relative_path, file, hash)| {
108 let file_to = dir_to.join(relative_path);
109 if verbose {
110 println!(" {}", file_to.display());
111 }
112
113 let _ = util::check_or_create_dir_all(check, file_to.parent().unwrap());
114
115 const HEADER_PREFIX: &str = "### Machine translated by `cargo zng l10n`, ";
116
117 let mut stale = false;
118 match fs::File::open(&file_to) {
119 Ok(f) => {
120 if !replace {
121 let mut f = io::BufReader::new(f);
122 let mut header = String::new();
123 let _ = f.read_line(&mut header);
124 if let Some(h) = header.strip_prefix(HEADER_PREFIX) {
125 if hash == h.trim_end() {
126 return;
128 }
129 stale = true;
130 } else {
131 return;
133 }
134 }
135 }
136 Err(_) => {
137 if check {
138 fatal!("expected `{}`", file_to.display());
139 }
140 }
141 }
142
143 if !check && (replace || stale || !file_to.exists()) {
144 let r = translator.translate(&from, &to, file, verbose);
145 let write = || -> io::Result<()> {
146 let mut f = fs::File::create(&file_to)?;
147 f.write_all(HEADER_PREFIX.as_bytes())?;
148 f.write_all(hash.as_bytes())?;
149 f.write_all("\n\n".as_bytes())?;
150 f.write_all(r.as_bytes())?;
151 Ok(())
152 };
153 if let Err(e) = write() {
154 fatal!("cannot write `{}`, {}", file_to.display(), e);
155 }
156 }
157 });
158 });
159}
160
161struct Translator {
162 path: PathBuf,
163 rpm: u64,
164 limiter: LeakyBucket,
165}
166impl Default for Translator {
167 fn default() -> Self {
168 let install_dir = std::env::current_exe().unwrap();
169 let install_dir = install_dir.parent().unwrap();
170
171 let mut t = if let Ok(translator) = std::env::var("ZNG_L10N_TRANSLATOR") {
173 let p = if translator.contains('/') || translator.contains('\\') {
174 PathBuf::from(&translator)
175 } else {
176 install_dir.join(format!("zng-l10n-translator-{translator}{}", std::env::consts::EXE_SUFFIX))
177 };
178 if !p.exists() {
179 fatal!("cannot find translator `{translator}`");
180 }
181 Translator {
182 path: p,
183 rpm: 0,
184 limiter: LeakyBucket::builder().build(),
185 }
186 } else {
187 let translators_pattern = install_dir.join("zng-l10n-translator-*").display().to_string();
188 let mut options = vec![];
189 for opt in glob::glob(&translators_pattern).unwrap() {
190 options.push(opt.unwrap());
191 }
192 if options.is_empty() {
193 fatal!("no translator installed\n install a zng-l10n-translator-* crate\nor set ZNG_L10N_TRANSLATOR to a path")
194 } else if options.len() > 1 {
195 let mut names: Vec<_> = options
196 .iter()
197 .map(|p| {
198 p.file_name()
199 .unwrap()
200 .to_string_lossy()
201 .trim_start_matches("zng-l10n-translator-")
202 .trim_end_matches(std::env::consts::EXE_SUFFIX)
203 .to_owned()
204 })
205 .collect();
206 names.sort();
207 let names = names.join(", ");
208 fatal!("multiple translators installed\n set ZNG_L10N_TRANSLATOR to one of: {names}");
209 }
210 Translator {
211 path: options.remove(0),
212 rpm: 0,
213 limiter: LeakyBucket::builder().build(),
214 }
215 };
216
217 t.read_limits();
219
220 t
221 }
222}
223impl Translator {
224 fn read_limits(&mut self) {
225 #[derive(serde::Deserialize)]
226 struct Limits {
227 #[serde(rename = "requests-per-minute")]
228 rpm: u64,
229 }
230
231 let output = std::process::Command::new(&self.path).arg("--limits").output().unwrap();
232 if output.status.success() {
233 match serde_json::from_slice::<Limits>(&output.stdout) {
234 Ok(l) => {
235 self.rpm = l.rpm;
236 let t: u32 = self.rpm.try_into().unwrap();
237 self.limiter = LeakyBucket::builder()
238 .max(t)
239 .tokens(t)
240 .refill_amount(t)
241 .refill_interval(Duration::from_mins(1))
242 .build();
243 }
244 Err(e) => {
245 let limits = String::from_utf8_lossy(&output.stdout);
246 fatal!("invalid response to --limits, {e}\n{limits}")
247 }
248 }
249 } else {
250 let error = String::from_utf8_lossy(&output.stderr);
251 fatal!(
252 "{error}\n{} exited with code {}",
253 self.path.file_name().unwrap().to_string_lossy(),
254 output.status.code().unwrap_or(0)
255 );
256 }
257 }
258
259 fn translate(&self, from: &LanguageIdentifier, to: &LanguageIdentifier, file: &str, _verbose: bool) -> String {
260 self.limiter.acquire_one();
261
262 let mut cmd = std::process::Command::new(&self.path)
263 .arg("--from-lang")
264 .arg(from.to_string())
265 .arg("--to-lang")
266 .arg(to.to_string())
267 .stdin(std::process::Stdio::piped())
268 .stdout(std::process::Stdio::piped())
269 .stderr(std::process::Stdio::piped())
270 .spawn()
271 .unwrap();
272
273 let mut stdin = cmd.stdin.take().unwrap();
274 stdin.write_all(file.as_bytes()).unwrap();
275 stdin.flush().unwrap();
276 drop(stdin);
277
278 let s = cmd.wait().unwrap();
279 if !s.success() {
280 let mut error = String::new();
281 cmd.stderr.unwrap().read_to_string(&mut error).unwrap();
282 fatal!(
283 "{error}\n{} exited with code {}",
284 self.path.file_name().unwrap().to_string_lossy(),
285 s.code().unwrap_or(0)
286 );
287 }
288 let mut out = String::new();
289 cmd.stdout.unwrap().read_to_string(&mut out).unwrap();
290
291 if let Err(e) = fluent_syntax::parser::parse(out.as_str()) {
292 fatal!("translator output is not valid Fluent, {}", &e.1[0]);
293 }
294
295 out
296 }
297}