Skip to main content

zng_ext_l10n/sources/
tar.rs

1use std::{borrow::Cow, collections::HashMap, fmt, io::Read as _, path::PathBuf, str::FromStr as _, sync::Arc};
2
3use semver::Version;
4use zng_clone_move::clmv;
5use zng_txt::Txt;
6use zng_var::{ArcEq, Var, WeakVar, const_var, var, weak_var};
7
8use crate::{FluentParserErrors, L10nSource, Lang, LangFilePath, LangMap, LangResourceStatus};
9
10/// Represents localization resources loaded from a `.tar` or `.tar.gz` container.
11///
12/// The expected container layout is `root_dir/{lang}/{file}.ftl` app files and `root_dir/{lang}/deps/{pkg-name}/{pkg-version}/{file}.ftl`
13/// for dependencies, same as [`L10nDir`], `root_dir` can have any name.
14///
15/// [`L10nDir`]: crate::L10nDir
16pub struct L10nTar {
17    data: L10nTarData,
18    available_langs: Var<Arc<LangMap<HashMap<LangFilePath, PathBuf>>>>,
19    available_langs_status: Var<LangResourceStatus>,
20    res: HashMap<(Lang, LangFilePath), L10nEntry>,
21}
22impl L10nTar {
23    /// Load from TAR data.
24    pub fn load(data: impl Into<L10nTarData>) -> Self {
25        Self::load_impl(data.into())
26    }
27    fn load_impl(data: L10nTarData) -> Self {
28        let r = Self {
29            data,
30            available_langs: var(Arc::new(LangMap::new())),
31            available_langs_status: var(LangResourceStatus::Loading),
32            res: HashMap::default(),
33        };
34        r.load_available_langs();
35        r
36    }
37    fn load_available_langs(&self) {
38        let status = self.available_langs_status.clone();
39        let map = self.available_langs.clone();
40        let data = self.data.clone();
41        zng_task::spawn_wait(move || {
42            let r = (|| -> std::io::Result<_> {
43                let mut set: LangMap<HashMap<LangFilePath, PathBuf>> = LangMap::new();
44                let mut errors: Vec<Arc<dyn std::error::Error + Send + Sync>> = vec![];
45                // resource_var expects the "fatal" errors here to not insert in map
46                let data = data.decode_bytes()?;
47                let data: &[u8] = &data;
48                let mut archive = tar::Archive::new(std::io::Cursor::new(data));
49                let entries = archive.entries_with_seek()?;
50                for entry in entries {
51                    let entry = entry?;
52                    let ty = entry.header().entry_type();
53                    let entry = entry.path()?;
54
55                    const EXT: unicase::Ascii<&'static str> = unicase::Ascii::new("ftl");
56
57                    let is_ftl = ty.is_file()
58                        && entry
59                            .file_name()
60                            .and_then(|s| s.to_str())
61                            .and_then(|n| n.rsplit_once('.'))
62                            .map(|(_, ext)| ext.is_ascii() && unicase::Ascii::new(ext) == EXT)
63                            .unwrap_or(false);
64
65                    if !is_ftl {
66                        continue;
67                    }
68
69                    let utf8_path: Vec<_> = entry.iter().take(6).map(|s| s.to_str().unwrap_or("")).collect();
70                    let utf8_path = &utf8_path[1..];
71
72                    let (lang, mut file) = match utf8_path.len() {
73                        // lang/file.ftl
74                        2 => {
75                            let lang = utf8_path[0];
76                            let file_str = utf8_path[1].rsplit_once('.').unwrap().0;
77                            let file = Txt::from_str(if file_str == "_" { "" } else { file_str });
78                            (lang, LangFilePath::current_app(file))
79                        }
80                        // lang/deps/pkg-name/pkg-version/file.ftl
81                        5 => {
82                            if utf8_path[1] != "deps" {
83                                continue;
84                            }
85                            let lang = utf8_path[0];
86                            let pkg_name = Txt::from_str(utf8_path[2]);
87                            let pkg_version: Version = match utf8_path[3].parse() {
88                                Ok(v) => v,
89                                Err(e) => {
90                                    errors.push(Arc::new(e));
91                                    continue;
92                                }
93                            };
94                            let file_str = utf8_path[4].rsplit_once('.').unwrap().0;
95                            let file = Txt::from_str(if file_str == "_" { "" } else { file_str });
96
97                            (lang, LangFilePath::new(pkg_name, pkg_version, file))
98                        }
99                        _ => {
100                            continue;
101                        }
102                    };
103
104                    let lang = match Lang::from_str(lang) {
105                        Ok(l) => l,
106                        Err(e) => {
107                            errors.push(Arc::new(e));
108                            continue;
109                        }
110                    };
111
112                    if file.file == "_" {
113                        file.file = "".into();
114                    }
115
116                    set.get_exact_or_insert(lang, Default::default)
117                        .insert(file, entry.as_ref().to_owned());
118                }
119                map.set(set);
120                Ok(errors)
121            })();
122            match r {
123                Ok(e) => {
124                    if e.is_empty() {
125                        status.set(LangResourceStatus::Loaded)
126                    } else {
127                        let e = LangResourceStatus::Errors(e);
128                        tracing::error!("'loading available' {e}");
129                        status.set(e)
130                    }
131                }
132                Err(e) => {
133                    tracing::error!("failed to load tar, {e}");
134                    status.set(LangResourceStatus::Errors(vec![Arc::new(e)]))
135                }
136            }
137        });
138    }
139}
140impl L10nSource for L10nTar {
141    fn available_langs(&mut self) -> Var<Arc<LangMap<HashMap<LangFilePath, PathBuf>>>> {
142        self.available_langs.read_only()
143    }
144
145    fn available_langs_status(&mut self) -> Var<LangResourceStatus> {
146        self.available_langs_status.read_only()
147    }
148
149    fn lang_resource(&mut self, lang: Lang, file: LangFilePath) -> Var<Option<ArcEq<fluent::FluentResource>>> {
150        match self.res.entry((lang, file)) {
151            std::collections::hash_map::Entry::Occupied(mut e) => {
152                if let Some(out) = e.get().res.upgrade() {
153                    out.read_only()
154                } else {
155                    let (lang, file) = e.key();
156                    let out = resource_var(
157                        self.data.clone(),
158                        &self.available_langs,
159                        e.get().status.clone(),
160                        lang.clone(),
161                        file.clone(),
162                    );
163                    e.get_mut().res = out.downgrade();
164                    out
165                }
166            }
167            std::collections::hash_map::Entry::Vacant(e) => {
168                let mut f = L10nEntry::new();
169                let (lang, file) = e.key();
170                let out = resource_var(
171                    self.data.clone(),
172                    &self.available_langs,
173                    f.status.clone(),
174                    lang.clone(),
175                    file.clone(),
176                );
177                f.res = out.downgrade();
178                e.insert(f);
179                out
180            }
181        }
182    }
183
184    fn lang_resource_status(&mut self, lang: Lang, file: LangFilePath) -> Var<LangResourceStatus> {
185        self.res.entry((lang, file)).or_insert_with(L10nEntry::new).status.read_only()
186    }
187}
188
189/// TAR data for [`L10nTar`].
190#[derive(Clone, PartialEq, Eq)]
191pub enum L10nTarData {
192    /// Embedded data.
193    Static(&'static [u8]),
194    /// Loaded data.
195    Arc(Arc<Vec<u8>>),
196}
197impl fmt::Debug for L10nTarData {
198    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
199        match self {
200            Self::Static(_) => f.debug_tuple("Static").finish_non_exhaustive(),
201            Self::Arc(_) => f.debug_tuple("Arc").finish_non_exhaustive(),
202        }
203    }
204}
205impl From<&'static [u8]> for L10nTarData {
206    fn from(value: &'static [u8]) -> Self {
207        L10nTarData::Static(value)
208    }
209}
210impl From<Arc<Vec<u8>>> for L10nTarData {
211    fn from(value: Arc<Vec<u8>>) -> Self {
212        L10nTarData::Arc(value)
213    }
214}
215impl From<Vec<u8>> for L10nTarData {
216    fn from(value: Vec<u8>) -> Self {
217        L10nTarData::Arc(Arc::new(value))
218    }
219}
220impl L10nTarData {
221    /// Reference the data.
222    pub fn bytes(&self) -> &[u8] {
223        match self {
224            L10nTarData::Static(b) => b,
225            L10nTarData::Arc(b) => b,
226        }
227    }
228
229    /// Check if the bytes have the GZIP magic number.
230    pub fn is_gz(&self) -> bool {
231        let bytes = self.bytes();
232        bytes.len() >= 2 && bytes[0..2] == [0x1F, 0x8B]
233    }
234
235    /// Decompress bytes.
236    pub fn decode_bytes(&self) -> std::io::Result<Cow<'_, [u8]>> {
237        if self.is_gz() {
238            let bytes = self.bytes();
239            let mut data = vec![];
240            let mut decoder = flate2::read::GzDecoder::new(bytes);
241            decoder.read_to_end(&mut data)?;
242            Ok(Cow::Owned(data))
243        } else {
244            Ok(Cow::Borrowed(self.bytes()))
245        }
246    }
247}
248
249struct L10nEntry {
250    res: WeakVar<Option<ArcEq<fluent::FluentResource>>>,
251    status: Var<LangResourceStatus>,
252}
253impl L10nEntry {
254    fn new() -> Self {
255        Self {
256            res: weak_var(),
257            status: var(LangResourceStatus::Loading),
258        }
259    }
260}
261
262fn resource_var(
263    data: L10nTarData,
264    available_langs: &Var<Arc<LangMap<HashMap<LangFilePath, PathBuf>>>>,
265    status: Var<LangResourceStatus>,
266    lang: Lang,
267    file: LangFilePath,
268) -> Var<Option<ArcEq<fluent::FluentResource>>> {
269    available_langs
270        .map(move |w| w.get_file(&lang, &file).cloned())
271        .flat_map(move |p| match p {
272            Some(p) => {
273                status.set(LangResourceStatus::Loading);
274                let rsp = zng_task::wait_respond(clmv!(p, status, data, || {
275                    const E: &str = "already decoded ok once to get entries";
276                    let data = data.decode_bytes().expect(E);
277                    let data: &[u8] = &data;
278                    let mut archive = tar::Archive::new(std::io::Cursor::new(data));
279                    for entry in archive.entries_with_seek().expect(E) {
280                        let mut entry = entry.expect(E);
281                        if entry.path().map(|ep| ep == p).unwrap_or(false) {
282                            let mut flt = String::new();
283                            if let Err(e) = entry.read_to_string(&mut flt) {
284                                tracing::error!("error reading fluent resource, {e}");
285                                status.set(LangResourceStatus::Errors(vec![Arc::new(e)]));
286                            } else {
287                                match fluent::FluentResource::try_new(flt) {
288                                    Ok(flt) => {
289                                        // ok
290                                        // Loaded set by `r` to avoid race condition in waiter.
291                                        return Some(ArcEq::new(flt));
292                                    }
293                                    Err(e) => {
294                                        let e = FluentParserErrors(e.1);
295                                        tracing::error!("error parsing fluent resource, {e}");
296                                        status.set(LangResourceStatus::Errors(vec![Arc::new(e)]));
297                                    }
298                                }
299                            }
300                            return None;
301                        }
302                    }
303                    status.set(LangResourceStatus::NotAvailable);
304                    None
305                }));
306                rsp.bind_filter_map(&status, |r| r.done().and_then(|r| r.as_ref()).map(|_| LangResourceStatus::Loaded))
307                    .perm();
308                rsp.map(|r| r.done().cloned().flatten())
309            }
310            None => const_var(None),
311        })
312}