Skip to main content

cargo_zng/
l10n.rs

1//! Localization text scrapper.
2//!
3//! See the [`l10n!`] documentation for more details.
4//!
5//! [`l10n!`]: https://zng-ui.github.io/doc/zng/l10n/macro.l10n.html#scrap-template
6
7use std::{
8    cmp::Ordering,
9    collections::{HashMap, HashSet},
10    fmt::{self, Write as _},
11    fs,
12    io::{self, BufRead},
13    path::{Path, PathBuf},
14};
15
16use clap::*;
17
18use crate::{l10n::scraper::FluentTemplate, util};
19
20mod scraper;
21
22mod generate_util;
23mod pseudo;
24mod translate;
25
26#[derive(Args, Debug)]
27pub struct L10nArgs {
28    /// Rust files glob or directory
29    #[arg(short, long, default_value = "", value_name = "PATH", hide_default_value = true)]
30    input: String,
31
32    /// L10n resources dir
33    #[arg(short, long, default_value = "", value_name = "DIR", hide_default_value = true)]
34    output: String,
35
36    /// Package to scrap and copy dependencies
37    ///
38    /// If set the --input and --output default is src/**.rs and l10n/
39    #[arg(short, long, default_value = "", hide_default_value = true)]
40    package: String,
41
42    /// Path to Cargo.toml of crate to scrap and copy dependencies
43    ///
44    /// If set the --input and --output default to src/**.rs and l10n/
45    #[arg(long, default_value = "", hide_default_value = true)]
46    manifest_path: String,
47
48    /// Don't copy dependencies localization
49    ///
50    /// Use with --package or --manifest-path to not copy {dep-pkg}/l10n/*.ftl files
51    #[arg(long, action)]
52    no_deps: bool,
53
54    /// Don't scrap `#.#.#-local` dependencies
55    ///
56    /// Use with --package or --manifest-path to not scrap local dependencies.
57    #[arg(long, action)]
58    no_local: bool,
59
60    /// Don't scrap the target package.
61    ///
62    /// Use with --package or --manifest-path to only scrap dependencies.
63    #[arg(long, action)]
64    no_pkg: bool,
65
66    /// Remove all previously copied dependency localization files.
67    #[arg(long, action)]
68    clean_deps: bool,
69
70    /// Remove all previously scraped resources before scraping.
71    #[arg(long, action)]
72    clean_template: bool,
73
74    /// Same as --clean-deps --clean-template
75    #[arg(long, action)]
76    clean: bool,
77
78    /// Custom l10n macro names, comma separated
79    #[arg(short, long, default_value = "", hide_default_value = true)]
80    macros: String,
81
82    /// Generate pseudo locale from dir/lang
83    ///
84    /// EXAMPLE
85    ///
86    /// "l10n/en" generates pseudo from "l10n/en/**/*.ftl" to "l10n/pseudo"
87    #[arg(long, default_value = "", value_name = "PATH", hide_default_value = true)]
88    pseudo: String,
89    /// Generate pseudo mirrored locale
90    #[arg(long, default_value = "", value_name = "PATH", hide_default_value = true)]
91    pseudo_m: String,
92    /// Generate pseudo wide locale
93    #[arg(long, default_value = "", value_name = "PATH", hide_default_value = true)]
94    pseudo_w: String,
95
96    /// Machine translate locale from dir/lang
97    ///
98    /// EXAMPLE
99    ///
100    /// "l10n/template" translates from "l10n/template/**/*.ftl" to a folder for each --translate-to language
101    #[arg(long, default_value = "", value_name = "PATH", hide_default_value = true)]
102    translate: String,
103
104    /// Explicit source language for --translate
105    ///
106    /// By default is the source folder name, or English for `template`
107    #[arg(long, default_value = "", value_name = "LANG", hide_default_value = true)]
108    translate_from: String,
109
110    /// Target languages for --translate
111    #[arg(
112        long,
113        default_value = "ar,bg,ca,cs,da,de,el,en,es-419,es-ES,et,eu,fi,fr-CA,fr-FR,gl,he,hi,hr,hu,id,it,ja,ko,lt,lv,nb,nl,pl,pt-BR,pt-PT,ro,ru,sk,sl,sr-Latn,sv,th,tr,uk,vi,zh-Hans,zh-Hant",
114        value_name = "LANGS"
115    )]
116    translate_to: String,
117
118    /// Replace all existing machine translations with --translate
119    ///
120    /// By default only replaces stale translations
121    #[arg(long, action)]
122    translate_replace: bool,
123
124    /// Verify that packages are scrapped and validate Fluent files
125    #[arg(long, action)]
126    check: bool,
127
128    /// Require that all template keys be present in all localized files
129    #[arg(long, action)]
130    check_strict: bool,
131
132    /// Use verbose output.
133    #[arg(short, long, action)]
134    verbose: bool,
135}
136
137pub fn run(mut args: L10nArgs) {
138    if !args.package.is_empty() && !args.manifest_path.is_empty() {
139        fatal!("only one of --package --manifest-path must be set")
140    }
141
142    if args.check_strict {
143        args.check = true;
144    }
145
146    let mut input = String::new();
147    let mut output = args.output.replace('\\', "/");
148
149    if !args.input.is_empty() {
150        input = args.input.replace('\\', "/");
151
152        if !input.contains('*') && PathBuf::from(&input).is_dir() {
153            input = format!("{}/**/*.rs", input.trim_end_matches('/'));
154        }
155    }
156    if !args.package.is_empty() {
157        if let Some(m) = crate::util::manifest_path_from_package(&args.package) {
158            args.manifest_path = m;
159        } else {
160            fatal!("package `{}` not found in workspace", args.package);
161        }
162    }
163
164    if !args.manifest_path.is_empty() {
165        if !Path::new(&args.manifest_path).exists() {
166            fatal!("`{}` does not exist", args.manifest_path)
167        }
168
169        if let Some(path) = args.manifest_path.replace('\\', "/").strip_suffix("/Cargo.toml") {
170            if output.is_empty() {
171                output = format!("{path}/l10n");
172            }
173            if input.is_empty() {
174                input = format!("{path}/src/**/*.rs");
175            }
176        } else {
177            fatal!("expected path to Cargo.toml manifest file");
178        }
179    }
180
181    if args.check {
182        args.clean = false;
183        args.clean_deps = false;
184        args.clean_template = false;
185    } else if args.clean {
186        args.clean_deps = true;
187        args.clean_template = true;
188    }
189
190    if args.verbose {
191        println!(
192            "input: `{input}`\noutput: `{output}`\nclean_deps: {}\nclean_template: {}",
193            args.clean_deps, args.clean_template
194        );
195    }
196
197    if input.is_empty() {
198        return run_generators(&args);
199    }
200
201    if output.is_empty() {
202        fatal!("--output is required for --input")
203    }
204
205    let input = input;
206    let output = Path::new(&output);
207
208    let mut template = FluentTemplate::default();
209
210    check_scrap_package(&args, &input, output, &mut template);
211
212    if !template.entries.is_empty() || !template.notes.is_empty() {
213        if let Err(e) = util::check_or_create_dir_all(args.check, output) {
214            fatal!("cannot create dir `{}`, {e}", output.display());
215        }
216
217        let output = output.join("template");
218
219        if let Err(e) = util::check_or_create_dir_all(args.check, &output) {
220            fatal!("cannot create dir `{}`, {e}", output.display());
221        }
222
223        template.sort();
224
225        let mut clean_files = HashSet::new();
226
227        let r = template.write(|file, contents| {
228            let file = format!("{}.ftl", if file.is_empty() { "_" } else { file });
229            let output = output.join(&file);
230            clean_files.insert(file);
231            util::check_or_write(args.check, output, contents, args.verbose)
232        });
233        if let Err(e) = r {
234            fatal!("error writing template files, {e}");
235        }
236
237        if args.clean_template {
238            debug_assert!(!args.check);
239
240            let cleanup = || -> std::io::Result<()> {
241                for entry in std::fs::read_dir(&output)? {
242                    let entry = entry?.path();
243                    if entry.is_file() {
244                        let name = entry.file_prefix().unwrap().to_string_lossy();
245                        if name.ends_with(".ftl") && !clean_files.contains(&*name) {
246                            let mut entry_file = std::fs::File::open(&entry)?;
247                            if let Some(first_line) = std::io::BufReader::new(&mut entry_file).lines().next()
248                                && first_line?.starts_with(FluentTemplate::AUTO_GENERATED_HEADER)
249                            {
250                                drop(entry_file);
251                                std::fs::remove_file(entry)?;
252                            }
253                        }
254                    }
255                }
256                Ok(())
257            };
258            if let Err(e) = cleanup() {
259                error!("failed template cleanup, {e}");
260            }
261        }
262    }
263
264    if args.check {
265        check_fluent_output(&args, output);
266    }
267
268    run_generators(&args);
269}
270
271fn check_scrap_package(args: &L10nArgs, input: &str, output: &Path, template: &mut FluentTemplate) {
272    // scrap the target package
273    if !args.no_pkg {
274        if args.check {
275            println!(r#"checking "{input}".."#);
276        } else {
277            println!(r#"scraping "{input}".."#);
278        }
279
280        let custom_macro_names: Vec<&str> = args.macros.split(',').map(|n| n.trim()).collect();
281        let t = scraper::scrape_fluent_text(input, &custom_macro_names);
282        if !args.check {
283            match t.entries.len() {
284                0 => println!("  did not find any entry"),
285                1 => println!("  found 1 entry"),
286                n => println!("  found {n} entries"),
287            }
288        }
289        template.extend(t);
290    }
291
292    // cleanup dependencies
293    if args.clean_deps {
294        for entry in glob::glob(&format!("{}/*/deps", output.display()))
295            .unwrap_or_else(|e| fatal!("cannot cleanup deps in `{}`, {e}", output.display()))
296        {
297            let dir = entry.unwrap_or_else(|e| fatal!("cannot cleanup deps, {e}"));
298            if args.verbose {
299                println!("removing `{}` to clean dependencies", dir.display());
300            }
301            if let Err(e) = std::fs::remove_dir_all(&dir)
302                && !matches!(e.kind(), io::ErrorKind::NotFound)
303            {
304                error!("cannot remove `{}`, {e}", dir.display());
305            }
306        }
307    }
308
309    // collect dependencies
310    let mut local = vec![];
311    if !args.no_deps {
312        let mut count = 0;
313        let (workspace_root, deps) = util::dependencies(&args.manifest_path);
314        for dep in deps {
315            if dep.version.pre.as_str() == "local" && dep.manifest_path.starts_with(&workspace_root) {
316                local.push(dep);
317                continue;
318            }
319
320            let dep_l10n = dep.manifest_path.with_file_name("l10n");
321            let dep_l10n_reader = match fs::read_dir(&dep_l10n) {
322                Ok(d) => d,
323                Err(e) => {
324                    if !matches!(e.kind(), io::ErrorKind::NotFound) {
325                        error!("cannot read `{}`, {e}", dep_l10n.display());
326                    }
327                    continue;
328                }
329            };
330
331            let mut any = false;
332
333            // get l10n_dir/{lang}/deps/dep.name/dep.version/
334            let mut l10n_dir = |lang: Option<&std::ffi::OsStr>| {
335                any = true;
336                let dir = output.join(lang.unwrap()).join("deps");
337
338                let ignore_file = dir.join(".gitignore");
339
340                if !ignore_file.exists() {
341                    // create dir and .gitignore file
342                    (|| -> io::Result<()> {
343                        util::check_or_create_dir_all(args.check, &dir)?;
344
345                        let mut ignore = "# Dependency localization files\n".to_owned();
346
347                        let output = Path::new(&output);
348                        let custom_output = if output != Path::new(&args.manifest_path).with_file_name("l10n") {
349                            format!(
350                                " --output \"{}\"",
351                                output.strip_prefix(std::env::current_dir().unwrap()).unwrap_or(output).display()
352                            )
353                            .replace('\\', "/")
354                        } else {
355                            String::new()
356                        };
357                        if !args.package.is_empty() {
358                            writeln!(
359                                &mut ignore,
360                                "# Call `cargo zng l10n --package {}{custom_output} --no-pkg --no-local --clean-deps` to update",
361                                args.package
362                            )
363                            .unwrap();
364                        } else {
365                            let path = Path::new(&args.manifest_path);
366                            let path = path.strip_prefix(std::env::current_dir().unwrap()).unwrap_or(path);
367                            writeln!(
368                                &mut ignore,
369                                "# Call `cargo zng l10n --manifest-path \"{}\" --no-pkg --no-local --clean-deps` to update",
370                                path.display()
371                            )
372                            .unwrap();
373                        }
374                        writeln!(&mut ignore).unwrap();
375                        writeln!(&mut ignore, "*").unwrap();
376                        writeln!(&mut ignore, "!.gitignore").unwrap();
377
378                        if let Err(e) = fs::write(&ignore_file, ignore.as_bytes()) {
379                            fatal!("cannot write `{}`, {e}", ignore_file.display())
380                        }
381
382                        Ok(())
383                    })()
384                    .unwrap_or_else(|e| fatal!("cannot create `{}`, {e}", output.display()));
385                }
386
387                let dir = dir.join(&dep.name).join(dep.version.to_string());
388                let _ = util::check_or_create_dir_all(args.check, &dir);
389
390                dir
391            };
392
393            // [(exporter_dep, ".../{lang}?/deps")]
394            let mut reexport_deps = vec![];
395
396            for dep_l10n_entry in dep_l10n_reader {
397                let dep_l10n_entry = match dep_l10n_entry {
398                    Ok(e) => e.path(),
399                    Err(e) => {
400                        error!("cannot read `{}` entry, {e}", dep_l10n.display());
401                        continue;
402                    }
403                };
404                if dep_l10n_entry.is_dir() {
405                    // l10n/{lang}/deps/{dep.name}/{dep.version}
406                    let output_dir = l10n_dir(dep_l10n_entry.file_name());
407                    let _ = util::check_or_create_dir_all(args.check, &output_dir);
408
409                    let lang_dir_reader = match fs::read_dir(&dep_l10n_entry) {
410                        Ok(d) => d,
411                        Err(e) => {
412                            error!("cannot read `{}`, {e}", dep_l10n_entry.display());
413                            continue;
414                        }
415                    };
416
417                    for lang_entry in lang_dir_reader {
418                        let lang_entry = match lang_entry {
419                            Ok(e) => e.path(),
420                            Err(e) => {
421                                error!("cannot read `{}` entry, {e}", dep_l10n_entry.display());
422                                continue;
423                            }
424                        };
425
426                        if lang_entry.is_dir() {
427                            if lang_entry.file_name().map(|n| n == "deps").unwrap_or(false) {
428                                reexport_deps.push((&dep, lang_entry));
429                            }
430                        } else if lang_entry.is_file() && lang_entry.extension().map(|e| e == "ftl").unwrap_or(false) {
431                            let _ = util::check_or_create_dir_all(args.check, &output_dir);
432                            let to = output_dir.join(lang_entry.file_name().unwrap());
433                            if let Err(e) = util::check_or_copy(args.check, &lang_entry, &to, args.verbose) {
434                                error!("cannot copy `{}` to `{}`, {e}", lang_entry.display(), to.display());
435                                continue;
436                            }
437                        }
438                    }
439                }
440            }
441
442            reexport_deps.sort_by(|a, b| match a.0.name.cmp(&b.0.name) {
443                Ordering::Equal => b.0.version.cmp(&a.0.version),
444                o => o,
445            });
446
447            for (_, deps) in reexport_deps {
448                // dep/l10n/lang/deps/
449                let target = l10n_dir(deps.parent().and_then(|p| p.file_name()));
450
451                // deps/pkg-name/pkg-version/*.ftl
452                for entry in glob::glob(&deps.join("*/*/*.ftl").display().to_string()).unwrap() {
453                    let entry = entry.unwrap_or_else(|e| fatal!("cannot read `{}` entry, {e}", deps.display()));
454                    let target = target.join(entry.strip_prefix(&deps).unwrap());
455                    if !target.exists()
456                        && entry.is_file()
457                        && let Err(e) = util::check_or_copy(args.check, &entry, &target, args.verbose)
458                    {
459                        error!("cannot copy `{}` to `{}`, {e}", entry.display(), target.display());
460                    }
461                }
462            }
463
464            count += any as u32;
465        }
466        println!("found {count} dependencies with localization");
467    }
468
469    // scrap local dependencies
470    if !args.no_local {
471        for dep in local {
472            let manifest_path = dep.manifest_path.display().to_string();
473            let input = manifest_path.replace('\\', "/");
474            let input = input.strip_suffix("/Cargo.toml").unwrap();
475            let input = format!("{input}/src/**/*.rs");
476            check_scrap_package(
477                &L10nArgs {
478                    input: String::new(),
479                    output: String::new(),
480                    package: String::new(),
481                    manifest_path,
482                    no_deps: true,
483                    no_local: true,
484                    no_pkg: false,
485                    clean_deps: false,
486                    clean_template: false,
487                    clean: false,
488                    macros: args.macros.clone(),
489                    pseudo: String::new(),
490                    pseudo_m: String::new(),
491                    pseudo_w: String::new(),
492                    translate: String::new(),
493                    translate_from: String::new(),
494                    translate_to: String::new(),
495                    translate_replace: false,
496                    check: args.check,
497                    check_strict: args.check_strict,
498                    verbose: args.verbose,
499                },
500                &input,
501                output,
502                template,
503            )
504        }
505    }
506}
507
508fn run_generators(args: &L10nArgs) {
509    if !args.pseudo.is_empty() {
510        pseudo::pseudo(&args.pseudo, args.check, args.verbose);
511    }
512    if !args.pseudo_m.is_empty() {
513        pseudo::pseudo_mirr(&args.pseudo_m, args.check, args.verbose);
514    }
515    if !args.pseudo_w.is_empty() {
516        pseudo::pseudo_wide(&args.pseudo_w, args.check, args.verbose);
517    }
518    if !args.translate.is_empty() {
519        translate::translate(
520            &args.translate,
521            &args.translate_from,
522            &args.translate_to,
523            args.translate_replace,
524            args.check,
525            args.verbose,
526        );
527    }
528}
529
530fn check_fluent_output(args: &L10nArgs, output: &Path) {
531    let read_dir = match fs::read_dir(output) {
532        Ok(d) => d,
533        Err(e) if matches!(e.kind(), io::ErrorKind::NotFound) => {
534            if args.verbose {
535                eprintln!("no fluent files to check, `{}` not found", output.display());
536            }
537            return;
538        }
539        Err(e) => fatal!("cannot read `{}`, {e}", output.display()),
540    };
541
542    // validate syntax of */*.ftl and collect entry keys
543    let mut template = None;
544    let mut langs = vec![];
545    for lang_dir in read_dir {
546        let lang_dir = lang_dir
547            .unwrap_or_else(|e| fatal!("cannot read `{}`, {e}", output.display()))
548            .path();
549        if lang_dir.is_dir() {
550            let mut files = vec![];
551
552            for file in fs::read_dir(&lang_dir).unwrap_or_else(|e| fatal!("cannot read `{}`, {e}", lang_dir.display())) {
553                let file = file.unwrap_or_else(|e| fatal!("cannot read `{}`, {e}", lang_dir.display())).path();
554                if file.is_file() {
555                    let content = fs::read_to_string(&file).unwrap_or_else(|e| fatal!("cannot read `{}`, {e}", file.display()));
556                    let content = match fluent_syntax::parser::parse(content.as_str()) {
557                        Ok(r) => r,
558                        Err((_, errors)) => {
559                            let e = FluentParserErrors(errors);
560                            error!("cannot parse `{}`\n{e}", file.display());
561                            continue;
562                        }
563                    };
564
565                    let mut keys = vec![];
566                    for entry in content.body {
567                        if let fluent_syntax::ast::Entry::Message(m) = entry {
568                            let key = m.id.name.to_owned();
569                            keys.push((key, m.value.is_some()));
570                            for attr in m.attributes {
571                                keys.push((format!("{}.{}", m.id.name, attr.id.name), true));
572                            }
573                        }
574                    }
575
576                    files.push((file.file_name().unwrap().to_owned(), keys));
577                }
578            }
579
580            if lang_dir.file_name().unwrap() == "template" {
581                assert!(template.is_none());
582                template = Some(files);
583            } else {
584                langs.push((lang_dir, files));
585            }
586        }
587    }
588    if util::is_failed_run() {
589        return;
590    }
591
592    // check
593    if let Some(template) = template {
594        if langs.is_empty() {
595            if args.verbose {
596                eprintln!("no fluent files to compare with template");
597            }
598        } else {
599            // faster template lookup
600            let template = template
601                .into_iter()
602                .map(|(k, v)| (k, v.into_iter().collect::<HashMap<_, _>>()))
603                .collect::<HashMap<_, _>>();
604
605            for (lang, files) in langs {
606                // match localized against template
607                for (file, messages) in &files {
608                    let mut errors = vec![];
609                    if let Some(template_msgs) = template.get(file) {
610                        for (id, has_value) in messages {
611                            if let Some(template_has_value) = template_msgs.get(id) {
612                                if has_value != template_has_value {
613                                    if *has_value {
614                                        errors.push(format!("unexpected value, `{id}` has no value in template"));
615                                    } else if args.check_strict {
616                                        errors.push(format!("missing value, `{id}` has value in template"));
617                                    }
618                                }
619                            } else {
620                                errors.push(format!("unknown id, `{id}` not found in template file"));
621                            }
622                        }
623                        if args.check_strict {
624                            for template_id in template_msgs.keys() {
625                                if !messages.iter().any(|(i, _)| i == template_id) {
626                                    errors.push(format!("missing id, `{template_id}` not found in localized file"));
627                                }
628                            }
629                        }
630                    } else {
631                        errors.push("template file not found".to_owned());
632                    }
633                    if !errors.is_empty() {
634                        let lang_path = Path::new(lang.file_name().unwrap()).join(file);
635                        let template_path = Path::new("template").join(file);
636                        let mut msg = format!("`{}` does not match `{}`\n", lang_path.display(), template_path.display());
637                        for error in errors {
638                            msg.push_str("  ");
639                            msg.push_str(&error);
640                            msg.push('\n');
641                        }
642                        error!("{msg}");
643                    }
644                }
645                if args.check_strict {
646                    for template_file in template.keys() {
647                        if !files.iter().any(|(f, _)| f == template_file) {
648                            let lang_path = Path::new(lang.file_name().unwrap()).join(template_file);
649                            let template_path = Path::new("template").join(template_file);
650                            error!(
651                                "`{}` does not match `{}`\n   localized file not found",
652                                lang_path.display(),
653                                template_path.display()
654                            );
655                        }
656                    }
657                }
658            }
659        }
660    } else if args.verbose {
661        eprintln!("no template to compare, `{}` not found", output.join("template").display());
662    }
663}
664struct FluentParserErrors(Vec<fluent_syntax::parser::ParserError>);
665impl fmt::Display for FluentParserErrors {
666    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
667        let mut sep = "";
668        for e in &self.0 {
669            write!(f, "  {sep}{e}")?;
670            sep = "\n";
671        }
672        Ok(())
673    }
674}