Skip to main content

zng_ext_l10n/sources/
tar.rs

1use std::{borrow::Cow, collections::HashMap, fmt, io::Read as _, path::PathBuf, str::FromStr as _, sync::Arc};
2
3use semver::Version;
4use zng_clone_move::clmv;
5use zng_txt::Txt;
6use zng_var::{ArcEq, Var, WeakVar, const_var, var, weak_var};
7
8use crate::{FluentParserErrors, L10nSource, Lang, LangFilePath, LangMap, LangResourceStatus};
9
10/// Represents localization resources loaded from a `.tar` or `.tar.gz` container.
11///
12/// The expected container layout is `root_dir/{lang}/{file}.ftl` app files and `root_dir/{lang}/deps/{pkg-name}/{pkg-version}/{file}.ftl`
13/// for dependencies, same as [`L10nDir`], `root_dir` can have any name.
14///
15/// [`L10nDir`]: crate::L10nDir
16pub struct L10nTar {
17    data: L10nTarData,
18    available_langs: Var<Arc<LangMap<HashMap<LangFilePath, PathBuf>>>>,
19    available_langs_status: Var<LangResourceStatus>,
20    res: HashMap<(Lang, LangFilePath), L10nEntry>,
21}
22impl L10nTar {
23    /// Load from TAR data.
24    pub fn load(data: impl Into<L10nTarData>) -> Self {
25        Self::load_impl(data.into())
26    }
27    fn load_impl(data: L10nTarData) -> Self {
28        let r = Self {
29            data,
30            available_langs: var(Arc::new(LangMap::new())),
31            available_langs_status: var(LangResourceStatus::Loading),
32            res: HashMap::default(),
33        };
34        r.load_available_langs();
35        r
36    }
37    fn load_available_langs(&self) {
38        let status = self.available_langs_status.clone();
39        let map = self.available_langs.clone();
40        let data = self.data.clone();
41        zng_task::spawn_wait(move || {
42            let r = (|| -> std::io::Result<_> {
43                let mut set: LangMap<HashMap<LangFilePath, PathBuf>> = LangMap::new();
44                let mut errors: Vec<Arc<dyn std::error::Error + Send + Sync>> = vec![];
45                // resource_var expects the "fatal" errors here to not insert in map
46                let data = data.decode_bytes()?;
47                let data: &[u8] = &data;
48                let mut archive = tar::Archive::new(std::io::Cursor::new(data));
49                let entries = archive.entries_with_seek()?;
50                for entry in entries {
51                    let entry = entry?;
52                    let ty = entry.header().entry_type();
53                    let entry = entry.path()?;
54
55                    const EXT: unicase::Ascii<&'static str> = unicase::Ascii::new("ftl");
56
57                    let is_ftl = ty.is_file()
58                        && entry
59                            .file_name()
60                            .and_then(|s| s.to_str())
61                            .and_then(|n| n.rsplit_once('.'))
62                            .map(|(_, ext)| ext.is_ascii() && unicase::Ascii::new(ext) == EXT)
63                            .unwrap_or(false);
64
65                    if !is_ftl {
66                        continue;
67                    }
68
69                    let utf8_path: Vec<_> = entry.iter().take(6).map(|s| s.to_str().unwrap_or("")).collect();
70                    let utf8_path = &utf8_path[1..];
71
72                    let (lang, mut file) = match utf8_path.len() {
73                        // lang/file.ftl
74                        2 => {
75                            let lang = utf8_path[0];
76                            let file_str = utf8_path[1].rsplit_once('.').unwrap().0;
77                            let file = Txt::from_str(if file_str == "_" { "" } else { file_str });
78                            (lang, LangFilePath::current_app(file))
79                        }
80                        // lang/deps/pkg-name/pkg-version/file.ftl
81                        5 => {
82                            if utf8_path[1] != "deps" {
83                                continue;
84                            }
85                            let lang = utf8_path[0];
86                            let pkg_name = Txt::from_str(utf8_path[2]);
87                            let pkg_version: Version = match utf8_path[3].parse() {
88                                Ok(v) => v,
89                                Err(e) => {
90                                    errors.push(Arc::new(e));
91                                    continue;
92                                }
93                            };
94                            let file_str = utf8_path[4].rsplit_once('.').unwrap().0;
95                            let file = Txt::from_str(if file_str == "_" { "" } else { file_str });
96
97                            (lang, LangFilePath::new(pkg_name, pkg_version, file))
98                        }
99                        _ => {
100                            continue;
101                        }
102                    };
103
104                    let lang = match Lang::from_str(lang) {
105                        Ok(l) => l,
106                        Err(e) => {
107                            errors.push(Arc::new(e));
108                            continue;
109                        }
110                    };
111
112                    if file.file == "_" {
113                        file.file = "".into();
114                    }
115
116                    set.get_exact_or_insert(lang, Default::default)
117                        .insert(file, entry.as_ref().to_owned());
118                }
119                for m in set.values_mut() {
120                    m.shrink_to_fit();
121                }
122                set.shrink_to_fit();
123                map.set(set);
124                Ok(errors)
125            })();
126            match r {
127                Ok(e) => {
128                    if e.is_empty() {
129                        status.set(LangResourceStatus::Loaded)
130                    } else {
131                        let e = LangResourceStatus::Errors(e);
132                        tracing::error!("'loading available' {e}");
133                        status.set(e)
134                    }
135                }
136                Err(e) => {
137                    tracing::error!("failed to load tar, {e}");
138                    status.set(LangResourceStatus::Errors(vec![Arc::new(e)]))
139                }
140            }
141        });
142    }
143}
144impl L10nSource for L10nTar {
145    fn available_langs(&mut self) -> Var<Arc<LangMap<HashMap<LangFilePath, PathBuf>>>> {
146        self.available_langs.read_only()
147    }
148
149    fn available_langs_status(&mut self) -> Var<LangResourceStatus> {
150        self.available_langs_status.read_only()
151    }
152
153    fn lang_resource(&mut self, lang: Lang, file: LangFilePath) -> Var<Option<ArcEq<fluent::FluentResource>>> {
154        match self.res.entry((lang, file)) {
155            std::collections::hash_map::Entry::Occupied(mut e) => {
156                if let Some(out) = e.get().res.upgrade() {
157                    out.read_only()
158                } else {
159                    let (lang, file) = e.key();
160                    let out = resource_var(
161                        self.data.clone(),
162                        &self.available_langs,
163                        e.get().status.clone(),
164                        lang.clone(),
165                        file.clone(),
166                    );
167                    e.get_mut().res = out.downgrade();
168                    out
169                }
170            }
171            std::collections::hash_map::Entry::Vacant(e) => {
172                let mut f = L10nEntry::new();
173                let (lang, file) = e.key();
174                let out = resource_var(
175                    self.data.clone(),
176                    &self.available_langs,
177                    f.status.clone(),
178                    lang.clone(),
179                    file.clone(),
180                );
181                f.res = out.downgrade();
182                e.insert(f);
183                out
184            }
185        }
186    }
187
188    fn lang_resource_status(&mut self, lang: Lang, file: LangFilePath) -> Var<LangResourceStatus> {
189        self.res.entry((lang, file)).or_insert_with(L10nEntry::new).status.read_only()
190    }
191}
192
193/// TAR data for [`L10nTar`].
194#[derive(Clone, PartialEq, Eq)]
195pub enum L10nTarData {
196    /// Embedded data.
197    Static(&'static [u8]),
198    /// Loaded data.
199    Arc(Arc<Vec<u8>>),
200}
201impl fmt::Debug for L10nTarData {
202    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
203        match self {
204            Self::Static(_) => f.debug_tuple("Static").finish_non_exhaustive(),
205            Self::Arc(_) => f.debug_tuple("Arc").finish_non_exhaustive(),
206        }
207    }
208}
209impl From<&'static [u8]> for L10nTarData {
210    fn from(value: &'static [u8]) -> Self {
211        L10nTarData::Static(value)
212    }
213}
214impl From<Arc<Vec<u8>>> for L10nTarData {
215    fn from(value: Arc<Vec<u8>>) -> Self {
216        L10nTarData::Arc(value)
217    }
218}
219impl From<Vec<u8>> for L10nTarData {
220    fn from(value: Vec<u8>) -> Self {
221        L10nTarData::Arc(Arc::new(value))
222    }
223}
224impl L10nTarData {
225    /// Reference the data.
226    pub fn bytes(&self) -> &[u8] {
227        match self {
228            L10nTarData::Static(b) => b,
229            L10nTarData::Arc(b) => b,
230        }
231    }
232
233    /// Check if the bytes have the GZIP magic number.
234    pub fn is_gz(&self) -> bool {
235        let bytes = self.bytes();
236        bytes.len() >= 2 && bytes[0..2] == [0x1F, 0x8B]
237    }
238
239    /// Decompress bytes.
240    pub fn decode_bytes(&self) -> std::io::Result<Cow<'_, [u8]>> {
241        if self.is_gz() {
242            let bytes = self.bytes();
243            let mut data = vec![];
244            let mut decoder = flate2::read::GzDecoder::new(bytes);
245            decoder.read_to_end(&mut data)?;
246            Ok(Cow::Owned(data))
247        } else {
248            Ok(Cow::Borrowed(self.bytes()))
249        }
250    }
251}
252
253struct L10nEntry {
254    res: WeakVar<Option<ArcEq<fluent::FluentResource>>>,
255    status: Var<LangResourceStatus>,
256}
257impl L10nEntry {
258    fn new() -> Self {
259        Self {
260            res: weak_var(),
261            status: var(LangResourceStatus::Loading),
262        }
263    }
264}
265
266fn resource_var(
267    data: L10nTarData,
268    available_langs: &Var<Arc<LangMap<HashMap<LangFilePath, PathBuf>>>>,
269    status: Var<LangResourceStatus>,
270    lang: Lang,
271    file: LangFilePath,
272) -> Var<Option<ArcEq<fluent::FluentResource>>> {
273    available_langs
274        .map(move |w| w.get_file(&lang, &file).cloned())
275        .flat_map(move |p| match p {
276            Some(p) => {
277                status.set(LangResourceStatus::Loading);
278                let rsp = zng_task::wait_respond(clmv!(p, status, data, || {
279                    const E: &str = "already decoded ok once to get entries";
280                    let data = data.decode_bytes().expect(E);
281                    let data: &[u8] = &data;
282                    let mut archive = tar::Archive::new(std::io::Cursor::new(data));
283                    for entry in archive.entries_with_seek().expect(E) {
284                        let mut entry = entry.expect(E);
285                        if entry.path().map(|ep| ep == p).unwrap_or(false) {
286                            let mut flt = String::new();
287                            if let Err(e) = entry.read_to_string(&mut flt) {
288                                tracing::error!("error reading fluent resource, {e}");
289                                status.set(LangResourceStatus::Errors(vec![Arc::new(e)]));
290                            } else {
291                                match fluent::FluentResource::try_new(flt) {
292                                    Ok(flt) => {
293                                        // ok
294                                        // Loaded set by `r` to avoid race condition in waiter.
295                                        return Some(ArcEq::new(flt));
296                                    }
297                                    Err(e) => {
298                                        let e = FluentParserErrors(e.1);
299                                        tracing::error!("error parsing fluent resource, {e}");
300                                        status.set(LangResourceStatus::Errors(vec![Arc::new(e)]));
301                                    }
302                                }
303                            }
304                            return None;
305                        }
306                    }
307                    status.set(LangResourceStatus::NotAvailable);
308                    None
309                }));
310                rsp.bind_filter_map(&status, |r| r.done().and_then(|r| r.as_ref()).map(|_| LangResourceStatus::Loaded))
311                    .perm();
312                rsp.map(|r| r.done().cloned().flatten())
313            }
314            None => const_var(None),
315        })
316}