zng_ext_font/
segmenting.rs

Help
1use std::{collections::HashMap, ops};
2
3use crate::emoji_util;
4
5use super::Txt;
6use unicode_bidi::{BidiDataSource as _, BidiInfo};
7
8use zng_layout::context::LayoutDirection;
9pub use zng_layout::context::TextSegmentKind;
10
11pub use unicode_bidi::Level as BidiLevel;
12
13/// Represents a single text segment in a [`SegmentedText`].
14#[derive(Clone, Copy, Debug, PartialEq, Eq)]
15pub struct TextSegment {
16    /// Segment kind.
17    pub kind: TextSegmentKind,
18    /// Direction of the segment in the context of other segments of the line.
19    pub level: BidiLevel,
20
21    /// Exclusive end index on the source text.
22    ///
23    /// The segment range starts from the `end` of the previous segment, or `0`, e.g: `prev_seg.end..self.end`.
24    pub end: usize,
25}
26impl TextSegment {
27    /// Direction of the glyphs in the segment.
28    ///
29    /// Segments iterate in the logical order, that is, the order the text is typed. If two segments
30    /// in the same line have direction `RTL` they must be layout the first to the right of the second.
31    pub fn direction(self) -> LayoutDirection {
32        from_unic_level(self.level)
33    }
34}
35
36/// A string segmented in sequences of words, spaces, tabs and separated line breaks.
37///
38/// Each segment is tagged with a [`TextSegmentKind`] and is defines as
39/// an offset from the last segment.
40#[derive(Default, Debug, Clone, PartialEq, Eq)]
41pub struct SegmentedText {
42    text: Txt,
43    segments: Vec<TextSegment>,
44    base_direction: LayoutDirection,
45}
46impl SegmentedText {
47    /// New segmented text from any text type.
48    pub fn new(text: impl Into<Txt>, base_direction: LayoutDirection) -> Self {
49        Self::new_text(text.into(), base_direction)
50    }
51    fn new_text(text: Txt, base_direction: LayoutDirection) -> Self {
52        let mut segs: Vec<TextSegment> = vec![];
53        let text_str: &str = &text;
54        let bidi = BidiInfo::new(text_str, Some(into_unic_level(base_direction)));
55
56        for (offset, kind) in unicode_linebreak::linebreaks(text_str) {
57            // a hard-break is a '\n', '\r', "\r\n" or text end.
58            if let unicode_linebreak::BreakOpportunity::Mandatory = kind {
59                // start of this segment.
60                let start = segs.last().map(|s| s.end).unwrap_or(0);
61
62                // The segment can have other characters before the line-break character(s).
63
64                let seg = &text_str[start..offset];
65
66                let break_start = if seg.ends_with("\r\n") {
67                    // the break was a "\r\n"
68                    offset - 2
69                } else if seg.ends_with('\n') || seg.ends_with('\r') || seg.ends_with('\u{85}') {
70                    // the break was a '\n', '\r' or NEL
71                    offset - 1
72                } else {
73                    // "break" at end of string
74                    debug_assert_eq!(offset, text_str.len());
75                    offset
76                };
77
78                if break_start > start {
79                    // the segment has more characters than the line-break character(s).
80                    Self::push_seg(text_str, &bidi, &mut segs, break_start);
81                }
82                if break_start < offset {
83                    // the line break character(s).
84                    segs.push(TextSegment {
85                        kind: TextSegmentKind::LineBreak,
86                        end: offset,
87                        level: bidi.levels[break_start],
88                    })
89                }
90            }
91            // else soft break, handled by our own segmentation
92        }
93        SegmentedText {
94            text,
95            segments: segs,
96            base_direction,
97        }
98    }
99
100    fn push_seg(text: &str, bidi: &BidiInfo, segs: &mut Vec<TextSegment>, end: usize) {
101        let start = segs.last().map(|s| s.end).unwrap_or(0);
102
103        let mut char_indices = text[start..end].char_indices().peekable();
104
105        let mut kind = TextSegmentKind::LeftToRight;
106        let mut level = BidiLevel::ltr();
107        for (i, c) in &mut char_indices {
108            const ZWJ: char = '\u{200D}'; // ZERO WIDTH JOINER
109            const VS16: char = '\u{FE0F}'; // VARIANT SELECTOR 16 - Emoji
110            const CEK: char = '\u{20E3}'; // COMBINING ENCLOSING KEYCAP
111
112            let is_emoji = (kind == TextSegmentKind::Emoji // maybe
113                && (
114                    c == VS16 // definitely, modifies prev. char into Emoji.
115                    || c == CEK // definitely, modified prev. char into keycap style.
116                    || c == ZWJ // definitely, ligature with the next Emoji or is ignored.
117                    || emoji_util::is_modifier(c) // definitely, has same effect as VS16.
118                    || emoji_util::is_component(c) // definitely, ligature data, like flag tags.
119                ))
120                || (emoji_util::maybe_emoji(c) // maybe
121                    && (emoji_util::definitely_emoji(c) // definitely
122                        // only if followed by VS16 or modifier
123                        || (text[start+i..].chars().nth(1).map(|c| c == VS16 || emoji_util::is_modifier(c)).unwrap_or(false))));
124
125            let (c_kind, c_level) = if is_emoji {
126                (TextSegmentKind::Emoji, level)
127            } else {
128                let k = match TextSegmentKind::from(bidi.original_classes[start + i]) {
129                    TextSegmentKind::OtherNeutral if unicode_bidi::HardcodedBidiData.bidi_matched_opening_bracket(c).is_some() => {
130                        TextSegmentKind::Bracket(c)
131                    }
132                    k => k,
133                };
134                (k, bidi.levels[start + i])
135            };
136
137            if c_kind != kind || c_level != level || !c_kind.can_merge() {
138                if i > 0 {
139                    segs.push(TextSegment {
140                        kind,
141                        end: i + start,
142                        level,
143                    });
144                }
145                level = c_level;
146                kind = c_kind;
147            }
148        }
149        segs.push(TextSegment { kind, end, level });
150    }
151
152    /// The text string.
153    pub fn text(&self) -> &Txt {
154        &self.text
155    }
156
157    /// The text segments.
158    pub fn segs(&self) -> &[TextSegment] {
159        &self.segments
160    }
161
162    /// Get segment index from a char index.
163    pub fn seg_from_char(&self, from: usize) -> usize {
164        match self.segments.binary_search_by_key(&from, |s| s.end) {
165            Ok(e) => e + 1,
166            Err(s) => s,
167        }
168    }
169
170    /// Contextual direction.
171    ///
172    /// Note that each segment can override the direction, and even the entire text can be a sequence in
173    /// the opposite direction.
174    pub fn base_direction(&self) -> LayoutDirection {
175        self.base_direction
176    }
177
178    /// Gets if the text contains segments not in the base direction.
179    pub fn is_bidi(&self) -> bool {
180        for seg in self.segments.iter() {
181            if seg.direction() != self.base_direction {
182                return true;
183            }
184        }
185        false
186    }
187
188    /// Returns the text segment if `index` is in bounds.
189    pub fn get(&self, index: usize) -> Option<(&str, TextSegment)> {
190        if let Some(&seg) = self.segments.get(index) {
191            let text = if index == 0 {
192                &self.text[..seg.end]
193            } else {
194                &self.text[self.segments[index - 1].end..seg.end]
195            };
196
197            Some((text, seg))
198        } else {
199            None
200        }
201    }
202
203    /// Returns a clone of the text segment if `index` is in bounds.
204    pub fn get_clone(&self, index: usize) -> Option<SegmentedText> {
205        self.get(index).map(|(txt, seg)| SegmentedText {
206            text: txt.to_owned().into(),
207            segments: vec![TextSegment { end: txt.len(), ..seg }],
208            base_direction: self.base_direction,
209        })
210    }
211
212    /// Returns `true` if text and segments are empty.
213    pub fn is_empty(&self) -> bool {
214        self.segments.is_empty()
215    }
216
217    /// Destructs `self` into the text and segments.
218    pub fn into_parts(self) -> (Txt, Vec<TextSegment>, LayoutDirection) {
219        (self.text, self.segments, self.base_direction)
220    }
221
222    /// New segmented text from [parts](Self::into_parts).
223    ///
224    /// # Panics
225    ///
226    /// Some basic validation is done on the input:
227    ///
228    /// * If one of the inputs is empty but the other is not.
229    /// * If text is not empty and the last segment does not end with the text.
230    pub fn from_parts(text: Txt, segments: Vec<TextSegment>, base_direction: LayoutDirection) -> Self {
231        assert_eq!(text.is_empty(), segments.is_empty());
232        if !text.is_empty() {
233            assert!(segments.last().unwrap().end == text.len());
234        }
235
236        SegmentedText {
237            text,
238            segments,
239            base_direction,
240        }
241    }
242
243    /// Segments iterator.
244    ///
245    /// # Examples
246    ///
247    /// ```
248    /// # use zng_ext_font::SegmentedText;
249    /// # use zng_layout::context::LayoutDirection;
250    /// for (sub_str, seg) in SegmentedText::new("Foo bar!\nBaz.", LayoutDirection::LTR).iter() {
251    ///     println!("s: {sub_str:?} is a `{:?}`", seg.kind);
252    /// }
253    /// ```
254    pub fn iter(&self) -> SegmentedTextIter {
255        SegmentedTextIter {
256            text: &self.text,
257            start: 0,
258            segs_iter: self.segments.iter(),
259        }
260    }
261
262    /// Convert a segments range to a text bytes range.
263    pub fn text_range(&self, segs_range: ops::Range<usize>) -> ops::Range<usize> {
264        let start = if segs_range.start == 0 {
265            0
266        } else {
267            self.segments[segs_range.start - 1].end
268        };
269        let end = self.segments[..segs_range.end].last().map(|s| s.end).unwrap_or(0);
270        start..end
271    }
272
273    /// Compute a map of segments in `segs_range` to their final LTR display order.
274    ///
275    /// The `segs_range` must be the segments of a line after line wrap.
276    pub fn reorder_line_to_ltr(&self, segs_range: ops::Range<usize>) -> Vec<usize> {
277        let mut r = Vec::with_capacity(segs_range.len());
278        let offset = segs_range.start;
279        unicode_bidi_sort(
280            self.base_direction,
281            self.segments[segs_range].iter().map(|s| (s.kind, s.level)),
282            offset,
283            &mut r,
284        );
285        r
286    }
287
288    /// Find the nearest next char boundary from the byte index `i`.
289    ///
290    /// If `i` is larger than the text length, returns the text length, if `i` is
291    /// already a char boundary, returns `i`.
292    pub fn snap_char_boundary(&self, i: usize) -> usize {
293        if i >= self.text.len() {
294            self.text.len()
295        } else {
296            let mut next = i;
297            while !self.text.is_char_boundary(next) {
298                next += 1;
299            }
300            next
301        }
302    }
303
304    /// Find the nearest grapheme cluster boundary from the byte index `i`.
305    ///
306    /// If `i` is larger than the text length, returns the text length, if `i` is
307    /// already a grapheme boundary, returns `i`.
308    pub fn snap_grapheme_boundary(&self, i: usize) -> usize {
309        let i = self.snap_char_boundary(i);
310        if i == self.text.len() {
311            i
312        } else {
313            let mut seg_start = 0;
314            for seg in self.segments.iter() {
315                if seg.end > i {
316                    break;
317                }
318                seg_start = seg.end;
319            }
320            let s = &self.text[seg_start..];
321
322            let seg_i = i - seg_start;
323            let mut best_before = 0;
324            let mut best_after = s.len();
325            for (i, _) in unicode_segmentation::UnicodeSegmentation::grapheme_indices(s, true) {
326                if i > seg_i {
327                    best_after = i;
328                    break;
329                }
330                best_before = i;
331            }
332
333            let best = if best_after - seg_i > seg_i - best_before {
334                best_before
335            } else {
336                best_after
337            };
338            seg_start + best
339        }
340    }
341
342    /// Find the next grapheme cluster, after `from`.
343    ///
344    /// The `from` must be in a grapheme boundary or `0` or `len`. This operation is saturating.
345    ///
346    /// # Panics
347    ///
348    /// Panics if `from` is larger than the text length, or is not at a grapheme boundary.
349    pub fn next_insert_index(&self, from: usize) -> usize {
350        if from == self.text.len() {
351            from
352        } else {
353            let s = &self.text.as_str()[from..];
354            let mut iter = unicode_segmentation::UnicodeSegmentation::grapheme_indices(s, true).map(|(i, _)| i + from);
355            assert_eq!(iter.next(), Some(from), "`from` was not a grapheme boundary");
356            iter.next().unwrap_or(self.text.len())
357        }
358    }
359
360    /// Find the previous grapheme cluster, before `from`.
361    ///
362    /// The `from` must be in a grapheme boundary or `0` or `len`. This operation is saturating.
363    ///
364    /// # Panics
365    ///
366    /// Panics if `from` is larger than the text length, or is not at a grapheme boundary.
367    pub fn prev_insert_index(&self, from: usize) -> usize {
368        if from == self.text.len() {
369            let s = &self.text.as_str()[..from];
370            let mut iter = unicode_segmentation::UnicodeSegmentation::grapheme_indices(s, true)
371                .map(|(i, _)| i)
372                .rev();
373            iter.next().unwrap_or(0)
374        } else {
375            let s = self.text.as_str();
376
377            // from + 1_char, so that the `from` is the first yield in reverse if it is a valid grapheme boundary
378            let inclusive_from = s[from..].char_indices().nth(1).map(|(b, _)| from + b).unwrap_or_else(|| s.len());
379
380            let s = &self.text.as_str()[..inclusive_from];
381            let mut iter = unicode_segmentation::UnicodeSegmentation::grapheme_indices(s, true)
382                .map(|(i, _)| i)
383                .rev();
384            assert_eq!(iter.next(), Some(from), "`from` was not a grapheme boundary");
385            iter.next().unwrap_or(0)
386        }
387    }
388
389    /// Find the start of the next word or the next line-break segment, after `from`.
390    ///
391    /// This operation is saturating.
392    pub fn next_word_index(&self, from: usize) -> usize {
393        let mut segs = self.segments[self.seg_from_char(from)..].iter();
394
395        if let Some(seg) = segs.next() {
396            if seg.kind.is_line_break() {
397                return seg.end;
398            }
399            let mut start = seg.end;
400            for seg in segs {
401                if seg.kind.is_word() || seg.kind.is_line_break() {
402                    return start;
403                }
404                start = seg.end;
405            }
406        }
407        self.text.len()
408    }
409
410    /// Find the next word segment end or the next line-break segment end, after `from`.
411    ///
412    /// This operation is saturating.
413    pub fn next_word_end_index(&self, from: usize) -> usize {
414        let mut segs = self.segments[self.seg_from_char(from)..].iter();
415        if let Some(seg) = segs.next() {
416            if seg.kind.is_word() || seg.kind.is_line_break() {
417                return seg.end;
418            }
419            for seg in segs {
420                if seg.kind.is_word() || seg.kind.is_line_break() {
421                    return seg.end;
422                }
423            }
424        }
425        self.text.len()
426    }
427
428    /// Find the start of the previous word segment or the previous line-break segment, before `from`.
429    ///
430    /// This operation is saturating.
431    pub fn prev_word_index(&self, from: usize) -> usize {
432        let seg_i = self.seg_from_char(from);
433        let mut segs = if seg_i < self.segments.len() {
434            self.segments[..=seg_i].iter().rev()
435        } else {
436            self.segs().iter().rev()
437        };
438        let mut seg_kind = TextSegmentKind::Space;
439        for seg in &mut segs {
440            if seg.end < from {
441                if seg_kind.is_word() || seg.kind.is_line_break() {
442                    // last segment start or line-break end
443                    return seg.end;
444                }
445                seg_kind = seg.kind;
446                for seg in segs {
447                    if seg_kind.is_word() || seg.kind.is_line_break() {
448                        // last segment start or line-break end
449                        return seg.end;
450                    }
451                    seg_kind = seg.kind;
452                }
453                break;
454            } else if seg.end == from && seg.kind.is_line_break() {
455                // line-break start
456                return segs.next().map(|p| p.end).unwrap_or(0);
457            }
458            seg_kind = seg.kind;
459        }
460        0
461    }
462
463    /// Find the start of the line that contains `from`.
464    ///
465    /// # Panics
466    ///
467    /// Panics if `from` is larger than the text length, or is not a char boundary.
468    pub fn line_start_index(&self, from: usize) -> usize {
469        let line_break = self.text.as_str()[..from]
470            .char_indices()
471            .rev()
472            .find(|(_, c)| "\n\r\u{85}".contains(*c));
473
474        match line_break {
475            Some((i, _)) => i + 1,
476            None => 0,
477        }
478    }
479
480    /// Find the end of the line that contains `from`.
481    ///
482    /// # Panics
483    ///
484    /// Panics if `from` is larger than the text length, or is not a char boundary.
485    pub fn line_end_index(&self, from: usize) -> usize {
486        if from == self.text.len() {
487            return from;
488        }
489
490        let line_break = self.text.as_str()[from..].char_indices().find(|(_, c)| "\n\r\u{85}".contains(*c));
491
492        match line_break {
493            Some((i, _)) => from + i,
494            None => self.text.len(),
495        }
496    }
497
498    /// Find the range that must be removed to delete starting by `from` a `count` number of times.
499    ///
500    /// Delete **Del** action removes the next grapheme cluster, this is different from
501    /// [`backspace_range`] that usually only removes one character.
502    ///
503    /// # Panics
504    ///
505    /// Panics if `from` is larger than the text length, or is not a grapheme boundary.
506    ///
507    /// [`backspace_range`]: Self::backspace_range
508    pub fn delete_range(&self, from: usize, count: u32) -> std::ops::Range<usize> {
509        let mut end = from;
510        for _ in 0..count {
511            let e = self.next_insert_index(end);
512            if e == end {
513                break;
514            }
515            end = e;
516        }
517
518        from..end
519    }
520
521    /// Find the range that must be removed to backspace before `from` a `count` number of times.
522    ///
523    /// The character at `from` is not included, only the previous char is selected, with some exceptions,
524    /// the selection includes any char before zero-width-joiner (ZWJ), it also includes `\r` before `\n`
525    /// and Emoji char before Emoji modifier or variation selector (VS16).
526    ///
527    /// # Panics
528    ///
529    /// Panics if `from` is larger than the text length, or is not a char boundary.
530    pub fn backspace_range(&self, from: usize, count: u32) -> std::ops::Range<usize> {
531        let mut start = from;
532        for _ in 0..count {
533            let s = self.backspace_start(start);
534            if s == start {
535                break;
536            }
537            start = s;
538        }
539        start..from
540    }
541    fn backspace_start(&self, from: usize) -> usize {
542        let text = &self.text[..from];
543        let mut start = from;
544        for (i, c) in text.char_indices().rev() {
545            start = i;
546            match c {
547                '\u{200D}' => continue, // ZWJ
548                '\n' => {
549                    if text[..i].ends_with('\r') {
550                        start = i - 1;
551                    }
552                }
553                c if c == '\u{FE0F}' || emoji_util::is_modifier(c) => {
554                    // VS16 || Emoji-Modifier
555                    if let Some((i, c)) = text[..i].char_indices().next_back() {
556                        if emoji_util::maybe_emoji(c) {
557                            start = i;
558                        }
559                    }
560                }
561                _ => {}
562            }
563            break;
564        }
565        start
566    }
567
568    /// Find the range that must be removed to backspace words before `from` a `count` number of times.
569    ///
570    /// The character at `from` is not included, only the previous word is selected.
571    pub fn backspace_word_range(&self, from: usize, count: u32) -> std::ops::Range<usize> {
572        let mut start = from;
573        for _ in 0..count {
574            let s = self.prev_word_index(start);
575            if s == start {
576                break;
577            }
578            start = s;
579        }
580        start..from
581    }
582
583    /// Find the range that must be removed to delete words starting by `from` a `count` number of times.
584    pub fn delete_word_range(&self, from: usize, count: u32) -> std::ops::Range<usize> {
585        let mut end = from;
586        for _ in 0..count {
587            let e = self.next_word_end_index(end);
588            if e == end {
589                break;
590            }
591            end = e;
592        }
593
594        from..end
595    }
596}
597
598/// Compute initial bidirectional levels of each segment of a `line`.
599///
600/// The result is set in `levels`.
601pub fn unicode_bidi_levels(base_direction: LayoutDirection, line: impl Iterator<Item = TextSegmentKind>, levels: &mut Vec<BidiLevel>) {
602    let mut original_classes = Vec::with_capacity(line.size_hint().0);
603    let mut brackets = HashMap::default();
604    for (i, k) in line.enumerate() {
605        original_classes.push(k.into());
606        if let TextSegmentKind::Bracket(c) = k {
607            brackets.insert(i, c);
608        }
609    }
610
611    unicode_bidi_levels_impl(levels, base_direction, original_classes, brackets);
612}
613fn unicode_bidi_levels_impl(
614    levels: &mut Vec<BidiLevel>,
615    base_direction: LayoutDirection,
616    original_classes: Vec<unicode_bidi::BidiClass>,
617    brackets: HashMap<usize, char>,
618) {
619    levels.clear();
620    let para_level = into_unic_level(base_direction);
621    levels.resize(original_classes.len(), para_level);
622
623    if !original_classes.is_empty() {
624        let mut processing_classes = original_classes.clone();
625
626        super::unicode_bidi_util::explicit_compute(para_level, &original_classes, levels, &mut processing_classes);
627
628        let sequences = super::unicode_bidi_util::prepare_isolating_run_sequences(para_level, &original_classes, levels);
629        for sequence in &sequences {
630            super::unicode_bidi_util::implicit_resolve_weak(sequence, &mut processing_classes);
631            super::unicode_bidi_util::implicit_resolve_neutral(sequence, levels, &original_classes, &mut processing_classes, &brackets);
632        }
633        super::unicode_bidi_util::implicit_resolve_levels(&processing_classes, levels);
634
635        super::unicode_bidi_util::assign_levels_to_removed_chars(para_level, &original_classes, levels);
636    }
637}
638
639/// Compute a map of segments in `line` to their final LTR display order.
640///
641/// The result is set in `sort_map`.
642pub fn unicode_bidi_sort(
643    base_direction: LayoutDirection,
644    line: impl Iterator<Item = (TextSegmentKind, BidiLevel)>,
645    idx_offset: usize,
646    sort_map: &mut Vec<usize>,
647) {
648    sort_map.clear();
649
650    let cap = line.size_hint().0;
651    let mut line_classes = Vec::with_capacity(cap);
652    let mut levels = Vec::with_capacity(cap);
653    for (kind, level) in line {
654        line_classes.push(kind.into());
655        levels.push(level);
656    }
657
658    if !levels.is_empty() {
659        let (directions, vis_ranges) = super::unicode_bidi_util::visual_runs(levels, line_classes, into_unic_level(base_direction));
660
661        for vis_range in vis_ranges {
662            if directions[vis_range.start].is_rtl() {
663                for i in vis_range.rev() {
664                    sort_map.push(idx_offset + i);
665                }
666            } else {
667                for i in vis_range {
668                    sort_map.push(idx_offset + i);
669                }
670            }
671        }
672    }
673}
674
675/// Segmented text iterator.
676///
677/// This `struct` is created by the [`SegmentedText::iter`] method.
678pub struct SegmentedTextIter<'a> {
679    text: &'a str,
680    start: usize,
681    segs_iter: std::slice::Iter<'a, TextSegment>,
682}
683impl<'a> Iterator for SegmentedTextIter<'a> {
684    type Item = (&'a str, TextSegment);
685    fn next(&mut self) -> Option<Self::Item> {
686        if let Some(&seg) = self.segs_iter.next() {
687            let r = Some((&self.text[self.start..seg.end], seg));
688            self.start = seg.end;
689            r
690        } else {
691            None
692        }
693    }
694}
695
696fn from_unic_level(d: unicode_bidi::Level) -> LayoutDirection {
697    if d.is_ltr() { LayoutDirection::LTR } else { LayoutDirection::RTL }
698}
699fn into_unic_level(d: LayoutDirection) -> unicode_bidi::Level {
700    match d {
701        LayoutDirection::LTR => unicode_bidi::Level::ltr(),
702        LayoutDirection::RTL => unicode_bidi::Level::rtl(),
703    }
704}
705
706#[cfg(test)]
707mod tests {
708    use zng_layout::context::{LayoutDirection, TextSegmentKind};
709    use zng_txt::ToTxt;
710
711    use crate::{BidiLevel, SegmentedText, TextSegment};
712
713    #[test]
714    fn segments() {
715        let test = "a\nb\r\nc\td ";
716        let actual = SegmentedText::new(test, LayoutDirection::LTR);
717
718        fn seg(kind: TextSegmentKind, end: usize) -> TextSegment {
719            TextSegment {
720                kind,
721                end,
722                level: BidiLevel::ltr(),
723            }
724        }
725        use TextSegmentKind::*;
726
727        let expected = SegmentedText {
728            text: test.to_txt(),
729            segments: vec![
730                seg(LeftToRight, 1),
731                seg(LineBreak, 2),
732                seg(LeftToRight, 3),
733                seg(LineBreak, 5),
734                seg(LeftToRight, 6),
735                seg(Tab, 7),
736                seg(LeftToRight, 8),
737                seg(Space, 9),
738            ],
739            base_direction: LayoutDirection::LTR,
740        };
741
742        assert_eq!(expected, actual);
743    }
744
745    #[test]
746    fn reorder_line() {
747        let test = "0 2 4";
748        let txt = SegmentedText::new(test, LayoutDirection::RTL);
749
750        let expected = vec![4, 3, 2, 1, 0];
751        let actual = txt.reorder_line_to_ltr(0..test.len());
752
753        assert_eq!(expected, actual);
754    }
755
756    #[test]
757    fn reorder_line_issue() {
758        let test = "      المادة 1";
759        let txt = SegmentedText::new(test, LayoutDirection::RTL);
760
761        let expected = vec![3, 2, 1, 0];
762        let actual = txt.reorder_line_to_ltr(0..4);
763
764        assert_eq!(expected, actual);
765    }
766
767    #[test]
768    fn emoji_seg() {
769        let test = "'🙎🏻‍♀️'1# 1️⃣#️⃣";
770        let txt = SegmentedText::new(test, LayoutDirection::LTR);
771        let k: Vec<_> = txt.segs().iter().map(|s| s.kind).collect();
772
773        assert_eq!(
774            vec![
775                TextSegmentKind::OtherNeutral,       // '
776                TextSegmentKind::Emoji,              // 🙎🏻‍♀️
777                TextSegmentKind::OtherNeutral,       // '
778                TextSegmentKind::EuropeanNumber,     // 1
779                TextSegmentKind::EuropeanTerminator, // #
780                TextSegmentKind::Space,
781                TextSegmentKind::Emoji, // 1️⃣#️⃣
782            ],
783            k
784        );
785    }
786
787    #[test]
788    fn emoji_issues() {
789        let test = "🏴󠁧󠁢󠁥󠁮󠁧󠁿";
790        let txt = SegmentedText::new(test, LayoutDirection::LTR);
791        for (t, seg) in txt.iter() {
792            assert_eq!(seg.kind, TextSegmentKind::Emoji, "text: {t:?}");
793        }
794    }
795}
zng_ext_font/segmenting.rs

zng_ext_font/
segmenting.rs