zng_txt/
lib.rs

1#![doc(html_favicon_url = "https://raw.githubusercontent.com/zng-ui/zng/main/examples/image/res/zng-logo-icon.png")]
2#![doc(html_logo_url = "https://raw.githubusercontent.com/zng-ui/zng/main/examples/image/res/zng-logo.png")]
3//!
4//! String type optimized for sharing.
5//!
6//! # Crate
7//!
8#![doc = include_str!(concat!("../", std::env!("CARGO_PKG_README")))]
9#![warn(unused_extern_crates)]
10#![warn(missing_docs)]
11
12use std::{borrow::Cow, fmt, hash::Hash, mem, ops::Deref, sync::Arc};
13
14const INLINE_MAX: usize = mem::size_of::<usize>() * 3;
15
16fn inline_to_str(d: &[u8; INLINE_MAX]) -> &str {
17    let utf8 = if let Some(i) = d.iter().position(|&b| b == b'\0') {
18        &d[..i]
19    } else {
20        &d[..]
21    };
22    std::str::from_utf8(utf8).unwrap()
23}
24fn str_to_inline(s: &str) -> [u8; INLINE_MAX] {
25    let mut inline = [b'\0'; INLINE_MAX];
26    inline[..s.len()].copy_from_slice(s.as_bytes());
27    inline
28}
29
30#[derive(Clone)]
31enum TxtData {
32    Static(&'static str),
33    Inline([u8; INLINE_MAX]),
34    String(String),
35    Arc(Arc<str>),
36}
37impl fmt::Debug for TxtData {
38    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
39        if f.alternate() {
40            match self {
41                Self::Static(s) => write!(f, "Static({s:?})"),
42                Self::Inline(d) => write!(f, "Inline({:?})", inline_to_str(d)),
43                Self::String(s) => write!(f, "String({s:?})"),
44                Self::Arc(s) => write!(f, "Arc({s:?})"),
45            }
46        } else {
47            write!(f, "{:?}", self.deref())
48        }
49    }
50}
51impl fmt::Display for TxtData {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        write!(f, "{}", self.deref())
54    }
55}
56impl PartialEq for TxtData {
57    fn eq(&self, other: &Self) -> bool {
58        self.deref() == other.deref()
59    }
60}
61impl Eq for TxtData {}
62impl Hash for TxtData {
63    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
64        Hash::hash(&self.deref(), state)
65    }
66}
67impl Deref for TxtData {
68    type Target = str;
69
70    fn deref(&self) -> &str {
71        match self {
72            TxtData::Static(s) => s,
73            TxtData::Inline(d) => inline_to_str(d),
74            TxtData::String(s) => s,
75            TxtData::Arc(s) => s,
76        }
77    }
78}
79
80/// Identifies how a [`Txt`] is currently storing the string data.
81///
82/// Use [`Txt::repr`] to retrieve.
83#[derive(Debug, Clone, Copy, PartialEq, Eq)]
84pub enum TxtRepr {
85    /// Text data is stored as a `&'static str`.
86    Static,
87    /// Text data is a small string stored as a null terminated `[u8; {size_of::<usize>() * 3}]`.
88    Inline,
89    /// Text data is stored as a `String`.
90    String,
91    /// Text data is stored as an `Arc<str>`.
92    Arc,
93}
94
95/// Text string type, can be one of multiple internal representations, mostly optimized for sharing and one for editing.
96///
97/// This type dereferences to [`str`] so you can use all methods of that type.
98///
99/// For editing some mutable methods are provided, you can also call [`Txt::to_mut`]
100/// to access all mutating methods of [`String`]. After editing you can call [`Txt::end_mut`] to convert
101/// back to an inner representation optimized for sharing.
102///
103/// See [`Txt::repr`] for more details about the inner representations.
104#[derive(PartialEq, Eq, Hash)]
105pub struct Txt(TxtData);
106/// Clones the text.
107///
108/// If the inner representation is [`TxtRepr::String`] the returned value is in a representation optimized
109/// for sharing, either a static empty, an inlined short or an `Arc<str>` long string.
110impl Clone for Txt {
111    fn clone(&self) -> Self {
112        Self(match &self.0 {
113            TxtData::Static(s) => TxtData::Static(s),
114            TxtData::Inline(d) => TxtData::Inline(*d),
115            TxtData::String(s) => return Self::from_str(s),
116            TxtData::Arc(s) => TxtData::Arc(Arc::clone(s)),
117        })
118    }
119}
120impl Txt {
121    /// New text that is a `&'static str`.
122    pub const fn from_static(s: &'static str) -> Txt {
123        Txt(TxtData::Static(s))
124    }
125
126    /// New text from a [`String`] optimized for editing.
127    ///
128    /// If you don't plan to edit the text after this call consider using [`from_str`] instead.
129    ///
130    /// [`from_str`]: Self::from_str
131    pub const fn from_string(s: String) -> Txt {
132        Txt(TxtData::String(s))
133    }
134
135    /// New cloned from `s`.
136    ///
137    /// The text will be internally optimized for sharing, if you plan to edit the text after this call
138    /// consider using [`from_string`] instead.
139    ///
140    /// [`from_string`]: Self::from_string
141    #[expect(clippy::should_implement_trait)] // have implemented trait, this one is infallible.
142    pub fn from_str(s: &str) -> Txt {
143        if s.is_empty() {
144            Self::from_static("")
145        } else if s.len() <= INLINE_MAX && !s.contains('\0') {
146            Self(TxtData::Inline(str_to_inline(s)))
147        } else {
148            Self(TxtData::Arc(Arc::from(s)))
149        }
150    }
151
152    /// New from a shared arc str.
153    ///
154    /// Note that the text can outlive the `Arc`, by cloning the string data when modified or
155    /// to use a more optimal representation, you cannot use the reference count of `s` to track
156    /// the lifetime of the text.
157    ///
158    /// [`from_string`]: Self::from_string
159    pub fn from_arc(s: Arc<str>) -> Txt {
160        if s.is_empty() {
161            Self::from_static("")
162        } else if s.len() <= INLINE_MAX && !s.contains('\0') {
163            Self(TxtData::Inline(str_to_inline(&s)))
164        } else {
165            Self(TxtData::Arc(s))
166        }
167    }
168
169    /// New text that is an inlined `char`.
170    pub fn from_char(c: char) -> Txt {
171        #[allow(clippy::assertions_on_constants)]
172        const _: () = assert!(4 <= INLINE_MAX, "cannot inline char");
173
174        let mut buf = [0u8; 4];
175        let s = c.encode_utf8(&mut buf);
176
177        if s.contains('\0') {
178            return Txt(TxtData::Arc(Arc::from(&*s)));
179        }
180
181        Txt(TxtData::Inline(str_to_inline(s)))
182    }
183
184    /// New text from [`format_args!`], avoids allocation if the text is static (no args) or can fit the inlined representation.
185    pub fn from_fmt(args: std::fmt::Arguments) -> Txt {
186        if let Some(s) = args.as_str() {
187            Txt::from_static(s)
188        } else {
189            let mut r = Txt(TxtData::Inline([b'\0'; INLINE_MAX]));
190            std::fmt::write(&mut r, args).unwrap();
191            r
192        }
193    }
194
195    /// Identifies how the text is currently stored.
196    pub const fn repr(&self) -> TxtRepr {
197        match &self.0 {
198            TxtData::Static(_) => TxtRepr::Static,
199            TxtData::Inline(_) => TxtRepr::Inline,
200            TxtData::String(_) => TxtRepr::String,
201            TxtData::Arc(_) => TxtRepr::Arc,
202        }
203    }
204
205    /// Acquires a mutable reference to a [`String`] buffer.
206    ///
207    /// Converts the text to an internal representation optimized for editing, you can call [`end_mut`] after
208    /// editing to re-optimize the text for sharing.
209    ///
210    /// [`end_mut`]: Self::end_mut
211    pub fn to_mut(&mut self) -> &mut String {
212        self.0 = match mem::replace(&mut self.0, TxtData::Static("")) {
213            TxtData::String(s) => TxtData::String(s),
214            TxtData::Static(s) => TxtData::String(s.to_owned()),
215            TxtData::Inline(d) => TxtData::String(inline_to_str(&d).to_owned()),
216            TxtData::Arc(s) => TxtData::String((*s).to_owned()),
217        };
218
219        if let TxtData::String(s) = &mut self.0 { s } else { unreachable!() }
220    }
221
222    /// Convert the inner representation of the string to not be [`String`]. After
223    /// this call the text can be cheaply cloned.
224    pub fn end_mut(&mut self) {
225        match mem::replace(&mut self.0, TxtData::Static("")) {
226            TxtData::String(s) => {
227                *self = Self::from_str(&s);
228            }
229            already => self.0 = already,
230        }
231    }
232
233    /// Extracts the owned string.
234    ///
235    /// Turns the text to owned if it was borrowed.
236    pub fn into_owned(self) -> String {
237        match self.0 {
238            TxtData::String(s) => s,
239            TxtData::Static(s) => s.to_owned(),
240            TxtData::Inline(d) => inline_to_str(&d).to_owned(),
241            TxtData::Arc(s) => (*s).to_owned(),
242        }
243    }
244
245    /// Calls [`String::clear`] if the text is owned, otherwise
246    /// replaces `self` with an empty str (`""`).
247    pub fn clear(&mut self) {
248        match &mut self.0 {
249            TxtData::String(s) => s.clear(),
250            d => *d = TxtData::Static(""),
251        }
252    }
253
254    /// Removes the last character from the text and returns it.
255    ///
256    /// Returns None if this `Txt` is empty.
257    ///
258    /// This method only converts to [`TxtRepr::String`] if the
259    /// internal representation is [`TxtRepr::Arc`], other representations are reborrowed.
260    pub fn pop(&mut self) -> Option<char> {
261        match &mut self.0 {
262            TxtData::String(s) => s.pop(),
263            TxtData::Static(s) => {
264                if let Some((i, c)) = s.char_indices().last() {
265                    *s = &s[..i];
266                    Some(c)
267                } else {
268                    None
269                }
270            }
271            TxtData::Inline(d) => {
272                let s = inline_to_str(d);
273                if let Some((i, c)) = s.char_indices().last() {
274                    if !s.is_empty() {
275                        *d = str_to_inline(&s[..i]);
276                    } else {
277                        self.0 = TxtData::Static("");
278                    }
279                    Some(c)
280                } else {
281                    None
282                }
283            }
284            TxtData::Arc(_) => self.to_mut().pop(),
285        }
286    }
287
288    /// Shortens this `Txt` to the specified length.
289    ///
290    /// If `new_len` is greater than the text's current length, this has no
291    /// effect.
292    ///
293    /// This method only converts to [`TxtRepr::String`] if the
294    /// internal representation is [`TxtRepr::Arc`], other representations are reborrowed.
295    pub fn truncate(&mut self, new_len: usize) {
296        match &mut self.0 {
297            TxtData::String(s) => s.truncate(new_len),
298            TxtData::Static(s) => {
299                if new_len <= s.len() {
300                    assert!(s.is_char_boundary(new_len));
301                    *s = &s[..new_len];
302                }
303            }
304            TxtData::Inline(d) => {
305                if new_len == 0 {
306                    self.0 = TxtData::Static("");
307                } else {
308                    let s = inline_to_str(d);
309                    if new_len < s.len() {
310                        assert!(s.is_char_boundary(new_len));
311                        d[new_len..].iter_mut().for_each(|b| *b = b'\0');
312                    }
313                }
314            }
315            TxtData::Arc(_) => self.to_mut().truncate(new_len),
316        }
317    }
318
319    /// Splits the text into two at the given index.
320    ///
321    /// Returns a new `Txt`. `self` contains bytes `[0, at)`, and
322    /// the returned `Txt` contains bytes `[at, len)`. `at` must be on the
323    /// boundary of a UTF-8 code point.
324    ///
325    /// This method only converts to [`TxtRepr::String`] if the
326    /// internal representation is [`TxtRepr::Arc`], other representations are reborrowed.
327    pub fn split_off(&mut self, at: usize) -> Txt {
328        match &mut self.0 {
329            TxtData::String(s) => Txt::from_string(s.split_off(at)),
330            TxtData::Static(s) => {
331                assert!(s.is_char_boundary(at));
332                let other = &s[at..];
333                *s = &s[at..];
334                Txt(TxtData::Static(other))
335            }
336            TxtData::Inline(d) => {
337                let s = inline_to_str(d);
338                assert!(s.is_char_boundary(at));
339                let a_len = at;
340                let b_len = s.len() - at;
341
342                let r = Txt(if b_len == 0 {
343                    TxtData::Static("")
344                } else {
345                    TxtData::Inline(str_to_inline(&s[at..]))
346                });
347
348                if a_len == 0 {
349                    self.0 = TxtData::Static("");
350                } else {
351                    *d = str_to_inline(&s[..at]);
352                }
353
354                r
355            }
356            TxtData::Arc(_) => Txt::from_string(self.to_mut().split_off(at)),
357        }
358    }
359
360    /// Push the character to the end of the text.
361    ///
362    /// This method avoids converting to [`TxtRepr::String`] when the current text
363    /// plus char can fit inlined.
364    pub fn push(&mut self, c: char) {
365        match &mut self.0 {
366            TxtData::String(s) => s.push(c),
367            TxtData::Inline(inlined) => {
368                if let Some(len) = inlined.iter().position(|&c| c == b'\0') {
369                    let c_len = c.len_utf8();
370                    if len + c_len <= INLINE_MAX && c != '\0' {
371                        let mut buf = [0u8; 4];
372                        let s = c.encode_utf8(&mut buf);
373                        inlined[len..len + c_len].copy_from_slice(s.as_bytes());
374                        return;
375                    }
376                }
377                self.to_mut().push(c)
378            }
379            _ => {
380                let len = self.len();
381                let c_len = c.len_utf8();
382                if len + c_len <= INLINE_MAX && c != '\0' {
383                    let mut inlined = str_to_inline(self.as_str());
384                    let mut buf = [0u8; 4];
385                    let s = c.encode_utf8(&mut buf);
386                    inlined[len..len + c_len].copy_from_slice(s.as_bytes());
387
388                    self.0 = TxtData::Inline(inlined);
389                } else {
390                    self.to_mut().push(c)
391                }
392            }
393        }
394    }
395
396    /// Push the string to the end of the text.
397    ///
398    /// This method avoids converting to [`TxtRepr::String`] when the current text
399    /// plus char can fit inlined.
400    pub fn push_str(&mut self, s: &str) {
401        if s.is_empty() {
402            return;
403        }
404
405        match &mut self.0 {
406            TxtData::String(str) => str.push_str(s),
407            TxtData::Inline(inlined) => {
408                if let Some(len) = inlined.iter().position(|&c| c == b'\0') {
409                    if len + s.len() <= INLINE_MAX && !s.contains('\0') {
410                        inlined[len..len + s.len()].copy_from_slice(s.as_bytes());
411                        return;
412                    }
413                }
414                self.to_mut().push_str(s)
415            }
416            _ => {
417                let len = self.len();
418                if len + s.len() <= INLINE_MAX && !s.contains('\0') {
419                    let mut inlined = str_to_inline(self.as_str());
420                    inlined[len..len + s.len()].copy_from_slice(s.as_bytes());
421
422                    self.0 = TxtData::Inline(inlined);
423                } else {
424                    self.to_mut().push_str(s)
425                }
426            }
427        }
428    }
429
430    /// Borrow the text as a string slice.
431    pub fn as_str(&self) -> &str {
432        self.0.deref()
433    }
434
435    /// Copy the inner static `str` if this text represents one.
436    pub fn as_static_str(&self) -> Option<&'static str> {
437        match self.0 {
438            TxtData::Static(s) => Some(s),
439            _ => None,
440        }
441    }
442}
443impl fmt::Debug for Txt {
444    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
445        fmt::Debug::fmt(&self.0, f)
446    }
447}
448impl fmt::Display for Txt {
449    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
450        fmt::Display::fmt(&self.0, f)
451    }
452}
453impl Default for Txt {
454    /// Empty.
455    fn default() -> Self {
456        Self::from_static("")
457    }
458}
459impl std::str::FromStr for Txt {
460    type Err = ();
461
462    fn from_str(s: &str) -> Result<Self, Self::Err> {
463        Ok(Txt::from_str(s))
464    }
465}
466impl From<&'static str> for Txt {
467    fn from(value: &'static str) -> Self {
468        Txt(TxtData::Static(value))
469    }
470}
471impl From<String> for Txt {
472    fn from(value: String) -> Self {
473        Txt(TxtData::String(value))
474    }
475}
476impl From<Cow<'static, str>> for Txt {
477    fn from(value: Cow<'static, str>) -> Self {
478        match value {
479            Cow::Borrowed(s) => Txt(TxtData::Static(s)),
480            Cow::Owned(s) => Txt(TxtData::String(s)),
481        }
482    }
483}
484impl From<char> for Txt {
485    fn from(value: char) -> Self {
486        Txt::from_char(value)
487    }
488}
489impl From<Txt> for String {
490    fn from(value: Txt) -> Self {
491        value.into_owned()
492    }
493}
494impl From<Txt> for Cow<'static, str> {
495    fn from(value: Txt) -> Self {
496        match value.0 {
497            TxtData::Static(s) => Cow::Borrowed(s),
498            TxtData::String(s) => Cow::Owned(s),
499            TxtData::Inline(d) => Cow::Owned(inline_to_str(&d).to_owned()),
500            TxtData::Arc(s) => Cow::Owned((*s).to_owned()),
501        }
502    }
503}
504impl From<Txt> for std::path::PathBuf {
505    fn from(value: Txt) -> Self {
506        value.into_owned().into()
507    }
508}
509impl From<Txt> for Box<dyn std::error::Error> {
510    fn from(err: Txt) -> Self {
511        err.into_owned().into()
512    }
513}
514impl From<Txt> for Box<dyn std::error::Error + Send + Sync> {
515    fn from(err: Txt) -> Self {
516        err.into_owned().into()
517    }
518}
519impl From<Txt> for std::ffi::OsString {
520    fn from(value: Txt) -> Self {
521        String::from(value).into()
522    }
523}
524impl std::ops::Deref for Txt {
525    type Target = str;
526
527    fn deref(&self) -> &Self::Target {
528        self.0.deref()
529    }
530}
531impl AsRef<str> for Txt {
532    fn as_ref(&self) -> &str {
533        self.0.as_ref()
534    }
535}
536impl AsRef<std::path::Path> for Txt {
537    fn as_ref(&self) -> &std::path::Path {
538        self.0.as_ref()
539    }
540}
541impl AsRef<std::ffi::OsStr> for Txt {
542    fn as_ref(&self) -> &std::ffi::OsStr {
543        self.0.as_ref()
544    }
545}
546impl std::borrow::Borrow<str> for Txt {
547    fn borrow(&self) -> &str {
548        self.as_str()
549    }
550}
551impl<'a> std::ops::Add<&'a str> for Txt {
552    type Output = Txt;
553
554    fn add(mut self, rhs: &'a str) -> Self::Output {
555        self += rhs;
556        self
557    }
558}
559impl std::ops::AddAssign<&str> for Txt {
560    fn add_assign(&mut self, rhs: &str) {
561        self.push_str(rhs);
562    }
563}
564impl PartialEq<&str> for Txt {
565    fn eq(&self, other: &&str) -> bool {
566        self.as_str().eq(*other)
567    }
568}
569impl PartialEq<str> for Txt {
570    fn eq(&self, other: &str) -> bool {
571        self.as_str().eq(other)
572    }
573}
574impl PartialEq<String> for Txt {
575    fn eq(&self, other: &String) -> bool {
576        self.as_str().eq(other)
577    }
578}
579impl PartialEq<Txt> for &str {
580    fn eq(&self, other: &Txt) -> bool {
581        other.as_str().eq(*self)
582    }
583}
584impl PartialEq<Txt> for str {
585    fn eq(&self, other: &Txt) -> bool {
586        other.as_str().eq(self)
587    }
588}
589impl PartialEq<Txt> for String {
590    fn eq(&self, other: &Txt) -> bool {
591        other.as_str().eq(self)
592    }
593}
594impl serde::Serialize for Txt {
595    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
596    where
597        S: serde::Serializer,
598    {
599        serializer.serialize_str(self.as_str())
600    }
601}
602impl<'de> serde::Deserialize<'de> for Txt {
603    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
604    where
605        D: serde::Deserializer<'de>,
606    {
607        String::deserialize(deserializer).map(Txt::from)
608    }
609}
610impl AsRef<[u8]> for Txt {
611    fn as_ref(&self) -> &[u8] {
612        self.as_str().as_ref()
613    }
614}
615impl std::fmt::Write for Txt {
616    fn write_str(&mut self, s: &str) -> fmt::Result {
617        self.push_str(s);
618        Ok(())
619    }
620}
621impl PartialOrd for Txt {
622    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
623        Some(self.cmp(other))
624    }
625}
626impl Ord for Txt {
627    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
628        self.as_str().cmp(other.as_str())
629    }
630}
631
632/// A trait for converting a value to a [`Txt`].
633///
634/// This trait is automatically implemented for any type which implements the [`ToString`] trait.
635///
636/// You can use [`formatx!`](macro.formatx.html) to `format!` a text.
637pub trait ToTxt {
638    /// Converts the given value to an owned [`Txt`].
639    ///
640    /// # Examples
641    ///
642    /// Basic usage:
643    ///
644    /// ```
645    /// use zng_txt::*;
646    ///
647    /// let expected = formatx!("10");
648    /// let actual = 10.to_txt();
649    ///
650    /// assert_eq!(expected, actual);
651    /// ```
652    fn to_txt(&self) -> Txt;
653}
654impl<T: ToString> ToTxt for T {
655    fn to_txt(&self) -> Txt {
656        self.to_string().into()
657    }
658}
659
660///<span data-del-macro-root></span> Creates a [`Txt`] by formatting using the [`format_args!`] syntax.
661///
662/// Note that this behaves like a [`format!`] for [`Txt`], but it can be more performant because the
663/// text type can represent `&'static str` and can i
664///
665/// # Examples
666///
667/// ```
668/// # use zng_txt::formatx;
669/// let text = formatx!("Hello {}", "World!");
670/// ```
671#[macro_export]
672macro_rules! formatx {
673    ($($tt:tt)*) => {
674        {
675            let res = $crate::Txt::from_fmt(format_args!($($tt)*));
676            res
677        }
678    };
679}