Skip to main content

cargo_zng/
l10n.rs

1//! Localization text scrapper.
2//!
3//! See the [`l10n!`] documentation for more details.
4//!
5//! [`l10n!`]: https://zng-ui.github.io/doc/zng/l10n/macro.l10n.html#scrap-template
6
7use std::{
8    cmp::Ordering,
9    collections::{HashMap, HashSet},
10    fmt::{self, Write as _},
11    fs,
12    io::{self, BufRead},
13    path::{Path, PathBuf},
14};
15
16use clap::*;
17
18use crate::{l10n::scraper::FluentTemplate, util};
19
20mod scraper;
21
22pub(crate) mod generate_util;
23mod pseudo;
24mod translate;
25
26#[derive(Args, Debug)]
27pub struct L10nArgs {
28    /// Rust files glob or directory
29    #[arg(short, long, default_value = "", value_name = "PATH", hide_default_value = true)]
30    input: String,
31
32    /// L10n resources dir
33    #[arg(short, long, default_value = "", value_name = "DIR", hide_default_value = true)]
34    output: String,
35
36    /// Package to scrap and copy dependencies
37    ///
38    /// If set the --input and --output default is src/**.rs and l10n/
39    #[arg(short, long, default_value = "", hide_default_value = true)]
40    package: String,
41
42    /// Path to Cargo.toml of crate to scrap and copy dependencies
43    ///
44    /// If set the --input and --output default to src/**.rs and l10n/
45    #[arg(long, default_value = "", hide_default_value = true)]
46    manifest_path: String,
47
48    /// Don't copy dependencies localization
49    ///
50    /// Use with --package or --manifest-path to not copy {dep-pkg}/l10n/*.ftl files
51    #[arg(long, action)]
52    no_deps: bool,
53
54    /// Don't scrap `#.#.#-local` dependencies
55    ///
56    /// Use with --package or --manifest-path to not scrap local dependencies.
57    #[arg(long, action)]
58    no_local: bool,
59
60    /// Don't scrap the target package.
61    ///
62    /// Use with --package or --manifest-path to only scrap dependencies.
63    #[arg(long, action)]
64    no_pkg: bool,
65
66    /// Remove all previously copied dependency localization files.
67    #[arg(long, action)]
68    clean_deps: bool,
69
70    /// Remove all previously scraped resources before scraping.
71    #[arg(long, action)]
72    clean_template: bool,
73
74    /// Same as --clean-deps --clean-template
75    #[arg(long, action)]
76    clean: bool,
77
78    /// Custom l10n macro names, comma separated
79    #[arg(short, long, default_value = "", hide_default_value = true)]
80    macros: String,
81
82    /// Generate pseudo locale from dir/lang
83    ///
84    /// EXAMPLE
85    ///
86    /// "l10n/en" generates pseudo from "l10n/en/**/*.ftl" to "l10n/pseudo"
87    #[arg(long, default_value = "", value_name = "PATH", hide_default_value = true)]
88    pseudo: String,
89    /// Generate pseudo mirrored locale
90    #[arg(long, default_value = "", value_name = "PATH", hide_default_value = true)]
91    pseudo_m: String,
92    /// Generate pseudo wide locale
93    #[arg(long, default_value = "", value_name = "PATH", hide_default_value = true)]
94    pseudo_w: String,
95
96    /// Output comma separated list of langs that would be included by .zr-l10n
97    /// sourcing localization from the given PATH
98    ///
99    /// See cargo zng res --tool l10n for details
100    #[arg(long, default_value = "", value_name = "PATH", hide_default_value = true)]
101    release_langs: String,
102
103    /// Machine translate locale from dir/lang
104    ///
105    /// EXAMPLE
106    ///
107    /// "l10n/template" translates from "l10n/template/**/*.ftl" to a folder for each --translate-to language
108    #[arg(long, default_value = "", value_name = "PATH", hide_default_value = true)]
109    translate: String,
110
111    /// Explicit source language for --translate
112    ///
113    /// By default is the source folder name, or English for `template`
114    #[arg(long, default_value = "", value_name = "LANG", hide_default_value = true)]
115    translate_from: String,
116
117    /// Target languages for --translate
118    #[arg(
119        long,
120        default_value = "ar,bg,ca,cs,da,de,el,en,es-419,es-ES,et,eu,fi,fr-CA,fr-FR,gl,he,hi,hr,hu,id,it,ja,ko,lt,lv,nb,nl,pl,pt-BR,pt-PT,ro,ru,sk,sl,sr-Latn,sv,th,tr,uk,vi,zh-Hans,zh-Hant",
121        value_name = "LANGS"
122    )]
123    translate_to: String,
124
125    /// Replace all existing machine translations with --translate
126    ///
127    /// By default only replaces stale translations
128    #[arg(long, action)]
129    translate_replace: bool,
130
131    /// Verify that packages are scrapped and validate Fluent files
132    #[arg(long, action)]
133    check: bool,
134
135    /// Require that all template keys be present in all localized files
136    #[arg(long, action)]
137    check_strict: bool,
138
139    /// Use verbose output.
140    #[arg(short, long, action)]
141    verbose: bool,
142}
143
144pub fn run(mut args: L10nArgs) {
145    if !args.package.is_empty() && !args.manifest_path.is_empty() {
146        fatal!("only one of --package --manifest-path must be set")
147    }
148
149    if args.check_strict {
150        args.check = true;
151    }
152
153    let mut input = String::new();
154    let mut output = args.output.replace('\\', "/");
155
156    if !args.input.is_empty() {
157        input = args.input.replace('\\', "/");
158
159        if !input.contains('*') && PathBuf::from(&input).is_dir() {
160            input = format!("{}/**/*.rs", input.trim_end_matches('/'));
161        }
162    }
163    if !args.package.is_empty() {
164        if let Some(m) = crate::util::manifest_path_from_package(&args.package) {
165            args.manifest_path = m;
166        } else {
167            fatal!("package `{}` not found in workspace", args.package);
168        }
169    }
170
171    if !args.manifest_path.is_empty() {
172        if !Path::new(&args.manifest_path).exists() {
173            fatal!("`{}` does not exist", args.manifest_path)
174        }
175
176        if let Some(path) = args.manifest_path.replace('\\', "/").strip_suffix("/Cargo.toml") {
177            if output.is_empty() {
178                output = format!("{path}/l10n");
179            }
180            if input.is_empty() {
181                input = format!("{path}/src/**/*.rs");
182            }
183        } else {
184            fatal!("expected path to Cargo.toml manifest file");
185        }
186    }
187
188    if args.check {
189        args.clean = false;
190        args.clean_deps = false;
191        args.clean_template = false;
192    } else if args.clean {
193        args.clean_deps = true;
194        args.clean_template = true;
195    }
196
197    if args.verbose {
198        println!(
199            "input: `{input}`\noutput: `{output}`\nclean_deps: {}\nclean_template: {}",
200            args.clean_deps, args.clean_template
201        );
202    }
203
204    if input.is_empty() {
205        return run_others(&args);
206    }
207
208    if output.is_empty() {
209        fatal!("--output is required for --input")
210    }
211
212    let input = input;
213    let output = Path::new(&output);
214
215    let mut template = FluentTemplate::default();
216
217    check_scrap_package(&args, &input, output, &mut template);
218
219    if !template.entries.is_empty() || !template.notes.is_empty() {
220        if let Err(e) = util::check_or_create_dir_all(args.check, output) {
221            fatal!("cannot create dir `{}`, {e}", output.display());
222        }
223
224        let output = output.join("template");
225
226        if let Err(e) = util::check_or_create_dir_all(args.check, &output) {
227            fatal!("cannot create dir `{}`, {e}", output.display());
228        }
229
230        template.sort();
231
232        let mut clean_files = HashSet::new();
233
234        let r = template.write(|file, contents| {
235            let file = format!("{}.ftl", if file.is_empty() { "_" } else { file });
236            let output = output.join(&file);
237            clean_files.insert(file);
238            util::check_or_write(args.check, output, contents, args.verbose)
239        });
240        if let Err(e) = r {
241            fatal!("error writing template files, {e}");
242        }
243
244        if args.clean_template {
245            debug_assert!(!args.check);
246
247            let cleanup = || -> std::io::Result<()> {
248                for entry in std::fs::read_dir(&output)? {
249                    let entry = entry?.path();
250                    if entry.is_file() {
251                        let name = entry.file_prefix().unwrap().to_string_lossy();
252                        if name.ends_with(".ftl") && !clean_files.contains(&*name) {
253                            let mut entry_file = std::fs::File::open(&entry)?;
254                            if let Some(first_line) = std::io::BufReader::new(&mut entry_file).lines().next()
255                                && first_line?.starts_with(FluentTemplate::AUTO_GENERATED_HEADER)
256                            {
257                                drop(entry_file);
258                                std::fs::remove_file(entry)?;
259                            }
260                        }
261                    }
262                }
263                Ok(())
264            };
265            if let Err(e) = cleanup() {
266                error!("failed template cleanup, {e}");
267            }
268        }
269    }
270
271    if args.check {
272        check_fluent_output(&args, output);
273    }
274
275    run_others(&args);
276}
277
278fn check_scrap_package(args: &L10nArgs, input: &str, output: &Path, template: &mut FluentTemplate) {
279    // scrap the target package
280    if !args.no_pkg {
281        if args.check {
282            println!(r#"checking "{input}".."#);
283        } else {
284            println!(r#"scraping "{input}".."#);
285        }
286
287        let custom_macro_names: Vec<&str> = args.macros.split(',').map(|n| n.trim()).collect();
288        let t = scraper::scrape_fluent_text(input, &custom_macro_names);
289        if !args.check {
290            match t.entries.len() {
291                0 => println!("  did not find any entry"),
292                1 => println!("  found 1 entry"),
293                n => println!("  found {n} entries"),
294            }
295        }
296        template.extend(t);
297    }
298
299    // cleanup dependencies
300    if args.clean_deps {
301        for entry in glob::glob(&format!("{}/*/deps", output.display()))
302            .unwrap_or_else(|e| fatal!("cannot cleanup deps in `{}`, {e}", output.display()))
303        {
304            let dir = entry.unwrap_or_else(|e| fatal!("cannot cleanup deps, {e}"));
305            if args.verbose {
306                println!("removing `{}` to clean dependencies", dir.display());
307            }
308            if let Err(e) = std::fs::remove_dir_all(&dir)
309                && !matches!(e.kind(), io::ErrorKind::NotFound)
310            {
311                error!("cannot remove `{}`, {e}", dir.display());
312            }
313        }
314    }
315
316    // collect dependencies
317    let mut local = vec![];
318    if !args.no_deps {
319        let mut count = 0;
320        let (workspace_root, deps) = util::dependencies(&args.manifest_path);
321        for dep in deps {
322            if dep.version.pre.as_str() == "local" && dep.manifest_path.starts_with(&workspace_root) {
323                local.push(dep);
324                continue;
325            }
326
327            let dep_l10n = dep.manifest_path.with_file_name("l10n");
328            let dep_l10n_reader = match fs::read_dir(&dep_l10n) {
329                Ok(d) => d,
330                Err(e) => {
331                    if !matches!(e.kind(), io::ErrorKind::NotFound) {
332                        error!("cannot read `{}`, {e}", dep_l10n.display());
333                    }
334                    continue;
335                }
336            };
337
338            let mut any = false;
339
340            // get l10n_dir/{lang}/deps/dep.name/dep.version/
341            let mut l10n_dir = |lang: Option<&std::ffi::OsStr>| {
342                any = true;
343                let dir = output.join(lang.unwrap()).join("deps");
344
345                let ignore_file = dir.join(".gitignore");
346
347                if !ignore_file.exists() {
348                    // create dir and .gitignore file
349                    (|| -> io::Result<()> {
350                        util::check_or_create_dir_all(args.check, &dir)?;
351
352                        let mut ignore = "# Dependency localization files\n".to_owned();
353
354                        let output = Path::new(&output);
355                        let custom_output = if output != Path::new(&args.manifest_path).with_file_name("l10n") {
356                            format!(
357                                " --output \"{}\"",
358                                output.strip_prefix(std::env::current_dir().unwrap()).unwrap_or(output).display()
359                            )
360                            .replace('\\', "/")
361                        } else {
362                            String::new()
363                        };
364                        if !args.package.is_empty() {
365                            writeln!(
366                                &mut ignore,
367                                "# Call `cargo zng l10n --package {}{custom_output} --no-pkg --no-local --clean-deps` to update",
368                                args.package
369                            )
370                            .unwrap();
371                        } else {
372                            let path = Path::new(&args.manifest_path);
373                            let path = path.strip_prefix(std::env::current_dir().unwrap()).unwrap_or(path);
374                            writeln!(
375                                &mut ignore,
376                                "# Call `cargo zng l10n --manifest-path \"{}\" --no-pkg --no-local --clean-deps` to update",
377                                path.display()
378                            )
379                            .unwrap();
380                        }
381                        writeln!(&mut ignore).unwrap();
382                        writeln!(&mut ignore, "*").unwrap();
383                        writeln!(&mut ignore, "!.gitignore").unwrap();
384
385                        if let Err(e) = fs::write(&ignore_file, ignore.as_bytes()) {
386                            fatal!("cannot write `{}`, {e}", ignore_file.display())
387                        }
388
389                        Ok(())
390                    })()
391                    .unwrap_or_else(|e| fatal!("cannot create `{}`, {e}", output.display()));
392                }
393
394                let dir = dir.join(&dep.name).join(dep.version.to_string());
395                let _ = util::check_or_create_dir_all(args.check, &dir);
396
397                dir
398            };
399
400            // [(exporter_dep, ".../{lang}?/deps")]
401            let mut reexport_deps = vec![];
402
403            for dep_l10n_entry in dep_l10n_reader {
404                let dep_l10n_entry = match dep_l10n_entry {
405                    Ok(e) => e.path(),
406                    Err(e) => {
407                        error!("cannot read `{}` entry, {e}", dep_l10n.display());
408                        continue;
409                    }
410                };
411                if dep_l10n_entry.is_dir() {
412                    // l10n/{lang}/deps/{dep.name}/{dep.version}
413                    let output_dir = l10n_dir(dep_l10n_entry.file_name());
414                    let _ = util::check_or_create_dir_all(args.check, &output_dir);
415
416                    let lang_dir_reader = match fs::read_dir(&dep_l10n_entry) {
417                        Ok(d) => d,
418                        Err(e) => {
419                            error!("cannot read `{}`, {e}", dep_l10n_entry.display());
420                            continue;
421                        }
422                    };
423
424                    for lang_entry in lang_dir_reader {
425                        let lang_entry = match lang_entry {
426                            Ok(e) => e.path(),
427                            Err(e) => {
428                                error!("cannot read `{}` entry, {e}", dep_l10n_entry.display());
429                                continue;
430                            }
431                        };
432
433                        if lang_entry.is_dir() {
434                            if lang_entry.file_name().map(|n| n == "deps").unwrap_or(false) {
435                                reexport_deps.push((&dep, lang_entry));
436                            }
437                        } else if lang_entry.is_file() && lang_entry.extension().map(|e| e == "ftl").unwrap_or(false) {
438                            let _ = util::check_or_create_dir_all(args.check, &output_dir);
439                            let to = output_dir.join(lang_entry.file_name().unwrap());
440                            if let Err(e) = util::check_or_copy(args.check, &lang_entry, &to, args.verbose) {
441                                error!("cannot copy `{}` to `{}`, {e}", lang_entry.display(), to.display());
442                                continue;
443                            }
444                        }
445                    }
446                }
447            }
448
449            reexport_deps.sort_by(|a, b| match a.0.name.cmp(&b.0.name) {
450                Ordering::Equal => b.0.version.cmp(&a.0.version),
451                o => o,
452            });
453
454            for (_, deps) in reexport_deps {
455                // dep/l10n/lang/deps/
456                let target = l10n_dir(deps.parent().and_then(|p| p.file_name()));
457
458                // deps/pkg-name/pkg-version/*.ftl
459                for entry in glob::glob(&deps.join("*/*/*.ftl").display().to_string()).unwrap() {
460                    let entry = entry.unwrap_or_else(|e| fatal!("cannot read `{}` entry, {e}", deps.display()));
461                    let target = target.join(entry.strip_prefix(&deps).unwrap());
462                    if !target.exists()
463                        && entry.is_file()
464                        && let Err(e) = util::check_or_copy(args.check, &entry, &target, args.verbose)
465                    {
466                        error!("cannot copy `{}` to `{}`, {e}", entry.display(), target.display());
467                    }
468                }
469            }
470
471            count += any as u32;
472        }
473        println!("found {count} dependencies with localization");
474    }
475
476    // scrap local dependencies
477    if !args.no_local {
478        for dep in local {
479            let manifest_path = dep.manifest_path.display().to_string();
480            let input = manifest_path.replace('\\', "/");
481            let input = input.strip_suffix("/Cargo.toml").unwrap();
482            let input = format!("{input}/src/**/*.rs");
483            check_scrap_package(
484                &L10nArgs {
485                    input: String::new(),
486                    output: String::new(),
487                    package: String::new(),
488                    manifest_path,
489                    no_deps: true,
490                    no_local: true,
491                    no_pkg: false,
492                    clean_deps: false,
493                    clean_template: false,
494                    clean: false,
495                    macros: args.macros.clone(),
496                    pseudo: String::new(),
497                    pseudo_m: String::new(),
498                    pseudo_w: String::new(),
499                    release_langs: String::new(),
500                    translate: String::new(),
501                    translate_from: String::new(),
502                    translate_to: String::new(),
503                    translate_replace: false,
504                    check: args.check,
505                    check_strict: args.check_strict,
506                    verbose: args.verbose,
507                },
508                &input,
509                output,
510                template,
511            )
512        }
513    }
514}
515
516fn run_others(args: &L10nArgs) {
517    if !args.release_langs.is_empty() {
518        crate::res::built_in::release_langs(Path::new(&args.release_langs));
519        return;
520    }
521    if !args.pseudo.is_empty() {
522        pseudo::pseudo(&args.pseudo, args.check, args.verbose);
523    }
524    if !args.pseudo_m.is_empty() {
525        pseudo::pseudo_mirr(&args.pseudo_m, args.check, args.verbose);
526    }
527    if !args.pseudo_w.is_empty() {
528        pseudo::pseudo_wide(&args.pseudo_w, args.check, args.verbose);
529    }
530    if !args.translate.is_empty() {
531        translate::translate(
532            &args.translate,
533            &args.translate_from,
534            &args.translate_to,
535            args.translate_replace,
536            args.check,
537            args.verbose,
538        );
539    }
540}
541
542fn check_fluent_output(args: &L10nArgs, output: &Path) {
543    let read_dir = match fs::read_dir(output) {
544        Ok(d) => d,
545        Err(e) if matches!(e.kind(), io::ErrorKind::NotFound) => {
546            if args.verbose {
547                eprintln!("no fluent files to check, `{}` not found", output.display());
548            }
549            return;
550        }
551        Err(e) => fatal!("cannot read `{}`, {e}", output.display()),
552    };
553
554    // validate syntax of */*.ftl and collect entry keys
555    let mut template = None;
556    let mut langs = vec![];
557    for lang_dir in read_dir {
558        let lang_dir = lang_dir
559            .unwrap_or_else(|e| fatal!("cannot read `{}`, {e}", output.display()))
560            .path();
561        if lang_dir.is_dir() {
562            let mut files = vec![];
563
564            for file in fs::read_dir(&lang_dir).unwrap_or_else(|e| fatal!("cannot read `{}`, {e}", lang_dir.display())) {
565                let file = file.unwrap_or_else(|e| fatal!("cannot read `{}`, {e}", lang_dir.display())).path();
566                if file.is_file() {
567                    let content = fs::read_to_string(&file).unwrap_or_else(|e| fatal!("cannot read `{}`, {e}", file.display()));
568                    let content = match fluent_syntax::parser::parse(content.as_str()) {
569                        Ok(r) => r,
570                        Err((_, errors)) => {
571                            let e = FluentParserErrors(errors);
572                            error!("cannot parse `{}`\n{e}", file.display());
573                            continue;
574                        }
575                    };
576
577                    let mut keys = vec![];
578                    for entry in content.body {
579                        if let fluent_syntax::ast::Entry::Message(m) = entry {
580                            let key = m.id.name.to_owned();
581                            keys.push((key, m.value.is_some()));
582                            for attr in m.attributes {
583                                keys.push((format!("{}.{}", m.id.name, attr.id.name), true));
584                            }
585                        }
586                    }
587
588                    files.push((file.file_name().unwrap().to_owned(), keys));
589                }
590            }
591
592            if lang_dir.file_name().unwrap() == "template" {
593                assert!(template.is_none());
594                template = Some(files);
595            } else {
596                langs.push((lang_dir, files));
597            }
598        }
599    }
600    if util::is_failed_run() {
601        return;
602    }
603
604    // check
605    if let Some(template) = template {
606        if langs.is_empty() {
607            if args.verbose {
608                eprintln!("no fluent files to compare with template");
609            }
610        } else {
611            // faster template lookup
612            let template = template
613                .into_iter()
614                .map(|(k, v)| (k, v.into_iter().collect::<HashMap<_, _>>()))
615                .collect::<HashMap<_, _>>();
616
617            for (lang, files) in langs {
618                // match localized against template
619                for (file, messages) in &files {
620                    let mut errors = vec![];
621                    if let Some(template_msgs) = template.get(file) {
622                        for (id, has_value) in messages {
623                            if let Some(template_has_value) = template_msgs.get(id) {
624                                if has_value != template_has_value {
625                                    if *has_value {
626                                        errors.push(format!("unexpected value, `{id}` has no value in template"));
627                                    } else if args.check_strict {
628                                        errors.push(format!("missing value, `{id}` has value in template"));
629                                    }
630                                }
631                            } else {
632                                errors.push(format!("unknown id, `{id}` not found in template file"));
633                            }
634                        }
635                        if args.check_strict {
636                            for template_id in template_msgs.keys() {
637                                if !messages.iter().any(|(i, _)| i == template_id) {
638                                    errors.push(format!("missing id, `{template_id}` not found in localized file"));
639                                }
640                            }
641                        }
642                    } else {
643                        errors.push("template file not found".to_owned());
644                    }
645                    if !errors.is_empty() {
646                        let lang_path = Path::new(lang.file_name().unwrap()).join(file);
647                        let template_path = Path::new("template").join(file);
648                        let mut msg = format!("`{}` does not match `{}`\n", lang_path.display(), template_path.display());
649                        for error in errors {
650                            msg.push_str("  ");
651                            msg.push_str(&error);
652                            msg.push('\n');
653                        }
654                        error!("{msg}");
655                    }
656                }
657                if args.check_strict {
658                    for template_file in template.keys() {
659                        if !files.iter().any(|(f, _)| f == template_file) {
660                            let lang_path = Path::new(lang.file_name().unwrap()).join(template_file);
661                            let template_path = Path::new("template").join(template_file);
662                            error!(
663                                "`{}` does not match `{}`\n   localized file not found",
664                                lang_path.display(),
665                                template_path.display()
666                            );
667                        }
668                    }
669                }
670            }
671        }
672    } else if args.verbose {
673        eprintln!("no template to compare, `{}` not found", output.join("template").display());
674    }
675}
676struct FluentParserErrors(Vec<fluent_syntax::parser::ParserError>);
677impl fmt::Display for FluentParserErrors {
678    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
679        let mut sep = "";
680        for e in &self.0 {
681            write!(f, "  {sep}{e}")?;
682            sep = "\n";
683        }
684        Ok(())
685    }
686}