zng_ext_l10n/sources/
tar.rs

1use std::{borrow::Cow, collections::HashMap, fmt, io::Read as _, path::PathBuf, str::FromStr as _, sync::Arc};
2
3use semver::Version;
4use zng_clone_move::clmv;
5use zng_txt::Txt;
6use zng_var::{ArcEq, ArcVar, BoxedVar, BoxedWeakVar, LocalVar, Var as _, WeakVar as _, types::WeakArcVar, var};
7
8use crate::{FluentParserErrors, L10nSource, Lang, LangFilePath, LangMap, LangResourceStatus};
9
10/// Represents localization resources loaded from a `.tar` or `.tar.gz` container.
11///
12/// The expected container layout is `root_dir/{lang}/{file}.ftl` app files and `root_dir/{lang}/deps/{pkg-name}/{pkg-version}/{file}.ftl`
13/// for dependencies, same as [`L10nDir`], `root_dir` can have any name.
14///
15/// [`L10nDir`]: crate::L10nDir
16pub struct L10nTar {
17    data: L10nTarData,
18    available_langs: ArcVar<Arc<LangMap<HashMap<LangFilePath, PathBuf>>>>,
19    available_langs_status: ArcVar<LangResourceStatus>,
20    res: HashMap<(Lang, LangFilePath), L10nEntry>,
21}
22impl L10nTar {
23    /// Load from TAR data.
24    pub fn load(data: impl Into<L10nTarData>) -> Self {
25        Self::load_impl(data.into())
26    }
27    fn load_impl(data: L10nTarData) -> Self {
28        let r = Self {
29            data,
30            available_langs: var(Arc::new(LangMap::new())),
31            available_langs_status: var(LangResourceStatus::Loading),
32            res: HashMap::default(),
33        };
34        r.load_available_langs();
35        r
36    }
37    fn load_available_langs(&self) {
38        let status = self.available_langs_status.clone();
39        let map = self.available_langs.clone();
40        let data = self.data.clone();
41        zng_task::spawn_wait(move || {
42            let r = (|| -> std::io::Result<_> {
43                let mut set: LangMap<HashMap<LangFilePath, PathBuf>> = LangMap::new();
44                let mut errors: Vec<Arc<dyn std::error::Error + Send + Sync>> = vec![];
45                // resource_var expects the "fatal" errors here to not insert in map
46                let data = data.decode_bytes()?;
47                let data: &[u8] = &data;
48                let mut archive = tar::Archive::new(std::io::Cursor::new(data));
49                let entries = archive.entries_with_seek()?;
50                for entry in entries {
51                    let entry = entry?;
52                    let ty = entry.header().entry_type();
53                    let entry = entry.path()?;
54
55                    const EXT: unicase::Ascii<&'static str> = unicase::Ascii::new("ftl");
56
57                    let is_ftl = ty.is_file()
58                        && entry
59                            .file_name()
60                            .and_then(|s| s.to_str())
61                            .and_then(|n| n.rsplit_once('.'))
62                            .map(|(_, ext)| ext.is_ascii() && unicase::Ascii::new(ext) == EXT)
63                            .unwrap_or(false);
64
65                    if !is_ftl {
66                        continue;
67                    }
68
69                    let utf8_path: Vec<_> = entry.iter().take(6).map(|s| s.to_str().unwrap_or("")).collect();
70                    let utf8_path = &utf8_path[1..];
71
72                    let (lang, mut file) = match utf8_path.len() {
73                        // lang/file.ftl
74                        2 => {
75                            let lang = utf8_path[0];
76                            let file = Txt::from_str(utf8_path[1].rsplit_once('.').unwrap().0);
77                            (lang, LangFilePath::current_app(file))
78                        }
79                        // lang/deps/pkg-name/pkg-version/file.ftl
80                        5 => {
81                            if utf8_path[1] != "deps" {
82                                continue;
83                            }
84                            let lang = utf8_path[0];
85                            let pkg_name = Txt::from_str(utf8_path[2]);
86                            let pkg_version: Version = match utf8_path[3].parse() {
87                                Ok(v) => v,
88                                Err(e) => {
89                                    errors.push(Arc::new(e));
90                                    continue;
91                                }
92                            };
93                            let file = Txt::from_str(utf8_path[4]);
94
95                            (lang, LangFilePath::new(pkg_name, pkg_version, file))
96                        }
97                        _ => {
98                            continue;
99                        }
100                    };
101
102                    let lang = match Lang::from_str(lang) {
103                        Ok(l) => l,
104                        Err(e) => {
105                            errors.push(Arc::new(e));
106                            continue;
107                        }
108                    };
109
110                    if file.file == "_" {
111                        file.file = "".into();
112                    }
113
114                    set.get_exact_or_insert(lang, Default::default)
115                        .insert(file, entry.as_ref().to_owned());
116                }
117                map.set(set);
118                Ok(errors)
119            })();
120            match r {
121                Ok(e) => {
122                    if e.is_empty() {
123                        status.set(LangResourceStatus::Loaded)
124                    } else {
125                        let e = LangResourceStatus::Errors(e);
126                        tracing::error!("'loading available' {e}");
127                        status.set(e)
128                    }
129                }
130                Err(e) => {
131                    tracing::error!("failed to load tar, {e}");
132                    status.set(LangResourceStatus::Errors(vec![Arc::new(e)]))
133                }
134            }
135        });
136    }
137}
138impl L10nSource for L10nTar {
139    fn available_langs(&mut self) -> BoxedVar<Arc<LangMap<HashMap<LangFilePath, PathBuf>>>> {
140        self.available_langs.read_only().boxed()
141    }
142
143    fn available_langs_status(&mut self) -> BoxedVar<LangResourceStatus> {
144        self.available_langs_status.read_only().boxed()
145    }
146
147    fn lang_resource(&mut self, lang: Lang, file: LangFilePath) -> BoxedVar<Option<ArcEq<fluent::FluentResource>>> {
148        match self.res.entry((lang, file)) {
149            std::collections::hash_map::Entry::Occupied(mut e) => {
150                if let Some(out) = e.get().res.upgrade() {
151                    out.read_only().boxed()
152                } else {
153                    let (lang, file) = e.key();
154                    let out = resource_var(
155                        self.data.clone(),
156                        &self.available_langs,
157                        e.get().status.clone(),
158                        lang.clone(),
159                        file.clone(),
160                    );
161                    e.get_mut().res = out.downgrade();
162                    out
163                }
164            }
165            std::collections::hash_map::Entry::Vacant(e) => {
166                let mut f = L10nEntry::new();
167                let (lang, file) = e.key();
168                let out = resource_var(
169                    self.data.clone(),
170                    &self.available_langs,
171                    f.status.clone(),
172                    lang.clone(),
173                    file.clone(),
174                );
175                f.res = out.downgrade();
176                e.insert(f);
177                out
178            }
179        }
180    }
181
182    fn lang_resource_status(&mut self, lang: Lang, file: LangFilePath) -> BoxedVar<LangResourceStatus> {
183        self.res
184            .entry((lang, file))
185            .or_insert_with(L10nEntry::new)
186            .status
187            .read_only()
188            .boxed()
189    }
190}
191
192/// TAR data for [`L10nTar`].
193#[derive(Clone, PartialEq, Eq)]
194pub enum L10nTarData {
195    /// Embedded data.
196    Static(&'static [u8]),
197    /// Loaded data.
198    Arc(Arc<Vec<u8>>),
199}
200impl fmt::Debug for L10nTarData {
201    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
202        match self {
203            Self::Static(_) => f.debug_tuple("Static").finish_non_exhaustive(),
204            Self::Arc(_) => f.debug_tuple("Arc").finish_non_exhaustive(),
205        }
206    }
207}
208impl From<&'static [u8]> for L10nTarData {
209    fn from(value: &'static [u8]) -> Self {
210        L10nTarData::Static(value)
211    }
212}
213impl From<Arc<Vec<u8>>> for L10nTarData {
214    fn from(value: Arc<Vec<u8>>) -> Self {
215        L10nTarData::Arc(value)
216    }
217}
218impl From<Vec<u8>> for L10nTarData {
219    fn from(value: Vec<u8>) -> Self {
220        L10nTarData::Arc(Arc::new(value))
221    }
222}
223impl L10nTarData {
224    /// Reference the data.
225    pub fn bytes(&self) -> &[u8] {
226        match self {
227            L10nTarData::Static(b) => b,
228            L10nTarData::Arc(b) => b,
229        }
230    }
231
232    /// Check if the bytes have the GZIP magic number.
233    pub fn is_gz(&self) -> bool {
234        let bytes = self.bytes();
235        bytes.len() >= 2 && bytes[0..2] == [0x1F, 0x8B]
236    }
237
238    /// Decompress bytes.
239    pub fn decode_bytes(&self) -> std::io::Result<Cow<[u8]>> {
240        if self.is_gz() {
241            let bytes = self.bytes();
242            let mut data = vec![];
243            let mut decoder = flate2::read::GzDecoder::new(bytes);
244            decoder.read_to_end(&mut data)?;
245            Ok(Cow::Owned(data))
246        } else {
247            Ok(Cow::Borrowed(self.bytes()))
248        }
249    }
250}
251
252struct L10nEntry {
253    res: BoxedWeakVar<Option<ArcEq<fluent::FluentResource>>>,
254    status: ArcVar<LangResourceStatus>,
255}
256impl L10nEntry {
257    fn new() -> Self {
258        Self {
259            res: WeakArcVar::default().boxed(),
260            status: var(LangResourceStatus::Loading),
261        }
262    }
263}
264
265fn resource_var(
266    data: L10nTarData,
267    available_langs: &ArcVar<Arc<LangMap<HashMap<LangFilePath, PathBuf>>>>,
268    status: ArcVar<LangResourceStatus>,
269    lang: Lang,
270    file: LangFilePath,
271) -> BoxedVar<Option<ArcEq<fluent::FluentResource>>> {
272    available_langs
273        .map(move |w| w.get_file(&lang, &file).cloned())
274        .flat_map(move |p| match p {
275            Some(p) => {
276                status.set(LangResourceStatus::Loading);
277                let rsp = zng_task::wait_respond(clmv!(p, status, data, || {
278                    const E: &str = "already decoded ok once to get entries";
279                    let data = data.decode_bytes().expect(E);
280                    let data: &[u8] = &data;
281                    let mut archive = tar::Archive::new(std::io::Cursor::new(data));
282                    for entry in archive.entries_with_seek().expect(E) {
283                        let mut entry = entry.expect(E);
284                        if entry.path().map(|ep| ep == p).unwrap_or(false) {
285                            let mut flt = String::new();
286                            if let Err(e) = entry.read_to_string(&mut flt) {
287                                tracing::error!("error reading fluent resource, {e}");
288                                status.set(LangResourceStatus::Errors(vec![Arc::new(e)]));
289                            } else {
290                                match fluent::FluentResource::try_new(flt) {
291                                    Ok(flt) => {
292                                        // ok
293                                        // Loaded set by `r` to avoid race condition in waiter.
294                                        return Some(ArcEq::new(flt));
295                                    }
296                                    Err(e) => {
297                                        let e = FluentParserErrors(e.1);
298                                        tracing::error!("error parsing fluent resource, {e}");
299                                        status.set(LangResourceStatus::Errors(vec![Arc::new(e)]));
300                                    }
301                                }
302                            }
303                            return None;
304                        }
305                    }
306                    status.set(LangResourceStatus::NotAvailable);
307                    None
308                }));
309                rsp.bind_filter_map(&status, |r| r.done().and_then(|r| r.as_ref()).map(|_| LangResourceStatus::Loaded))
310                    .perm();
311                rsp.map(|r| r.done().cloned().flatten()).boxed()
312            }
313            None => LocalVar(None).boxed(),
314        })
315        .boxed()
316}