zng_ext_font/
segmenting.rs

1use std::{collections::HashMap, ops};
2
3use crate::emoji_util;
4
5use super::Txt;
6use unicode_bidi::{BidiDataSource as _, BidiInfo};
7
8use zng_layout::context::LayoutDirection;
9pub use zng_layout::context::TextSegmentKind;
10
11pub use unicode_bidi::Level as BidiLevel;
12
13/// Represents a single text segment in a [`SegmentedText`].
14#[derive(Clone, Copy, Debug, PartialEq, Eq)]
15#[non_exhaustive]
16pub struct TextSegment {
17    /// Segment kind.
18    pub kind: TextSegmentKind,
19    /// Direction of the segment in the context of other segments of the line.
20    pub level: BidiLevel,
21
22    /// Exclusive end index on the source text.
23    ///
24    /// The segment range starts from the `end` of the previous segment, or `0`, e.g: `prev_seg.end..self.end`.
25    pub end: usize,
26}
27impl TextSegment {
28    /// Direction of the glyphs in the segment.
29    ///
30    /// Segments iterate in the logical order, that is, the order the text is typed. If two segments
31    /// in the same line have direction `RTL` they must be layout the first to the right of the second.
32    pub fn direction(self) -> LayoutDirection {
33        from_unic_level(self.level)
34    }
35}
36
37/// A string segmented in sequences of words, spaces, tabs and separated line breaks.
38///
39/// Each segment is tagged with a [`TextSegmentKind`] and is defines as
40/// an offset from the last segment.
41#[derive(Default, Debug, Clone, PartialEq, Eq)]
42pub struct SegmentedText {
43    text: Txt,
44    segments: Vec<TextSegment>,
45    base_direction: LayoutDirection,
46}
47impl SegmentedText {
48    /// New segmented text from any text type.
49    pub fn new(text: impl Into<Txt>, base_direction: LayoutDirection) -> Self {
50        Self::new_text(text.into(), base_direction)
51    }
52    fn new_text(text: Txt, base_direction: LayoutDirection) -> Self {
53        let mut segs: Vec<TextSegment> = vec![];
54        let text_str: &str = &text;
55        let bidi = BidiInfo::new(text_str, Some(into_unic_level(base_direction)));
56
57        for (offset, kind) in unicode_linebreak::linebreaks(text_str) {
58            // a hard-break is a '\n', '\r', "\r\n" or text end.
59            if let unicode_linebreak::BreakOpportunity::Mandatory = kind {
60                // start of this segment.
61                let start = segs.last().map(|s| s.end).unwrap_or(0);
62
63                // The segment can have other characters before the line-break character(s).
64
65                let seg = &text_str[start..offset];
66
67                let break_start = if seg.ends_with("\r\n") {
68                    // the break was a "\r\n"
69                    offset - 2
70                } else if seg.ends_with('\n') || seg.ends_with('\r') || seg.ends_with('\u{85}') {
71                    // the break was a '\n', '\r' or NEL
72                    offset - 1
73                } else {
74                    // "break" at end of string
75                    debug_assert_eq!(offset, text_str.len());
76                    offset
77                };
78
79                if break_start > start {
80                    // the segment has more characters than the line-break character(s).
81                    Self::push_seg(text_str, &bidi, &mut segs, break_start);
82                }
83                if break_start < offset {
84                    // the line break character(s).
85                    segs.push(TextSegment {
86                        kind: TextSegmentKind::LineBreak,
87                        end: offset,
88                        level: bidi.levels[break_start],
89                    })
90                }
91            }
92            // else soft break, handled by our own segmentation
93        }
94        SegmentedText {
95            text,
96            segments: segs,
97            base_direction,
98        }
99    }
100
101    fn push_seg(text: &str, bidi: &BidiInfo, segs: &mut Vec<TextSegment>, end: usize) {
102        let start = segs.last().map(|s| s.end).unwrap_or(0);
103
104        let mut char_indices = text[start..end].char_indices().peekable();
105
106        let mut kind = TextSegmentKind::LeftToRight;
107        let mut level = BidiLevel::ltr();
108        for (i, c) in &mut char_indices {
109            const ZWJ: char = '\u{200D}'; // ZERO WIDTH JOINER
110            const VS16: char = '\u{FE0F}'; // VARIANT SELECTOR 16 - Emoji
111            const CEK: char = '\u{20E3}'; // COMBINING ENCLOSING KEYCAP
112
113            let is_emoji = (kind == TextSegmentKind::Emoji // maybe
114                && (
115                    c == VS16 // definitely, modifies prev. char into Emoji.
116                    || c == CEK // definitely, modified prev. char into keycap style.
117                    || c == ZWJ // definitely, ligature with the next Emoji or is ignored.
118                    || emoji_util::is_modifier(c) // definitely, has same effect as VS16.
119                    || emoji_util::is_component(c) // definitely, ligature data, like flag tags.
120                ))
121                || (emoji_util::maybe_emoji(c) // maybe
122                    && (emoji_util::definitely_emoji(c) // definitely
123                        // only if followed by VS16 or modifier
124                        || (text[start+i..].chars().nth(1).map(|c| c == VS16 || emoji_util::is_modifier(c)).unwrap_or(false))));
125
126            let (c_kind, c_level) = if is_emoji {
127                (TextSegmentKind::Emoji, level)
128            } else {
129                let k = match TextSegmentKind::from(bidi.original_classes[start + i]) {
130                    TextSegmentKind::OtherNeutral if unicode_bidi::HardcodedBidiData.bidi_matched_opening_bracket(c).is_some() => {
131                        TextSegmentKind::Bracket(c)
132                    }
133                    k => k,
134                };
135                (k, bidi.levels[start + i])
136            };
137
138            if c_kind != kind || c_level != level || !c_kind.can_merge() {
139                if i > 0 {
140                    segs.push(TextSegment {
141                        kind,
142                        end: i + start,
143                        level,
144                    });
145                }
146                level = c_level;
147                kind = c_kind;
148            }
149        }
150        segs.push(TextSegment { kind, end, level });
151    }
152
153    /// The text string.
154    pub fn text(&self) -> &Txt {
155        &self.text
156    }
157
158    /// The text segments.
159    pub fn segs(&self) -> &[TextSegment] {
160        &self.segments
161    }
162
163    /// Get segment index from a char index.
164    pub fn seg_from_char(&self, from: usize) -> usize {
165        match self.segments.binary_search_by_key(&from, |s| s.end) {
166            Ok(e) => e + 1,
167            Err(s) => s,
168        }
169    }
170
171    /// Contextual direction.
172    ///
173    /// Note that each segment can override the direction, and even the entire text can be a sequence in
174    /// the opposite direction.
175    pub fn base_direction(&self) -> LayoutDirection {
176        self.base_direction
177    }
178
179    /// Gets if the text contains segments not in the base direction.
180    pub fn is_bidi(&self) -> bool {
181        for seg in self.segments.iter() {
182            if seg.direction() != self.base_direction {
183                return true;
184            }
185        }
186        false
187    }
188
189    /// Returns the text segment if `index` is in bounds.
190    pub fn get(&self, index: usize) -> Option<(&str, TextSegment)> {
191        if let Some(&seg) = self.segments.get(index) {
192            let text = if index == 0 {
193                &self.text[..seg.end]
194            } else {
195                &self.text[self.segments[index - 1].end..seg.end]
196            };
197
198            Some((text, seg))
199        } else {
200            None
201        }
202    }
203
204    /// Returns a clone of the text segment if `index` is in bounds.
205    pub fn get_clone(&self, index: usize) -> Option<SegmentedText> {
206        self.get(index).map(|(txt, seg)| SegmentedText {
207            text: txt.to_owned().into(),
208            segments: vec![TextSegment { end: txt.len(), ..seg }],
209            base_direction: self.base_direction,
210        })
211    }
212
213    /// Returns `true` if text and segments are empty.
214    pub fn is_empty(&self) -> bool {
215        self.segments.is_empty()
216    }
217
218    /// Destructs `self` into the text and segments.
219    pub fn into_parts(self) -> (Txt, Vec<TextSegment>, LayoutDirection) {
220        (self.text, self.segments, self.base_direction)
221    }
222
223    /// New segmented text from [parts](Self::into_parts).
224    ///
225    /// # Panics
226    ///
227    /// Some basic validation is done on the input:
228    ///
229    /// * If one of the inputs is empty but the other is not.
230    /// * If text is not empty and the last segment does not end with the text.
231    pub fn from_parts(text: Txt, segments: Vec<TextSegment>, base_direction: LayoutDirection) -> Self {
232        assert_eq!(text.is_empty(), segments.is_empty());
233        if !text.is_empty() {
234            assert!(segments.last().unwrap().end == text.len());
235        }
236
237        SegmentedText {
238            text,
239            segments,
240            base_direction,
241        }
242    }
243
244    /// Segments iterator.
245    ///
246    /// # Examples
247    ///
248    /// ```
249    /// # use zng_ext_font::SegmentedText;
250    /// # use zng_layout::context::LayoutDirection;
251    /// for (sub_str, seg) in SegmentedText::new("Foo bar!\nBaz.", LayoutDirection::LTR).iter() {
252    ///     println!("s: {sub_str:?} is a `{:?}`", seg.kind);
253    /// }
254    /// ```
255    pub fn iter(&self) -> SegmentedTextIter<'_> {
256        SegmentedTextIter {
257            text: &self.text,
258            start: 0,
259            segs_iter: self.segments.iter(),
260        }
261    }
262
263    /// Convert a segments range to a text bytes range.
264    pub fn text_range(&self, segs_range: ops::Range<usize>) -> ops::Range<usize> {
265        let start = if segs_range.start == 0 {
266            0
267        } else {
268            self.segments[segs_range.start - 1].end
269        };
270        let end = self.segments[..segs_range.end].last().map(|s| s.end).unwrap_or(0);
271        start..end
272    }
273
274    /// Compute a map of segments in `segs_range` to their final LTR display order.
275    ///
276    /// The `segs_range` must be the segments of a line after line wrap.
277    pub fn reorder_line_to_ltr(&self, segs_range: ops::Range<usize>) -> Vec<usize> {
278        let mut r = Vec::with_capacity(segs_range.len());
279        let offset = segs_range.start;
280        unicode_bidi_sort(
281            self.base_direction,
282            self.segments[segs_range].iter().map(|s| (s.kind, s.level)),
283            offset,
284            &mut r,
285        );
286        r
287    }
288
289    /// Find the nearest next char boundary from the byte index `i`.
290    ///
291    /// If `i` is larger than the text length, returns the text length, if `i` is
292    /// already a char boundary, returns `i`.
293    pub fn snap_char_boundary(&self, i: usize) -> usize {
294        if i >= self.text.len() {
295            self.text.len()
296        } else {
297            let mut next = i;
298            while !self.text.is_char_boundary(next) {
299                next += 1;
300            }
301            next
302        }
303    }
304
305    /// Find the nearest grapheme cluster boundary from the byte index `i`.
306    ///
307    /// If `i` is larger than the text length, returns the text length, if `i` is
308    /// already a grapheme boundary, returns `i`.
309    pub fn snap_grapheme_boundary(&self, i: usize) -> usize {
310        let i = self.snap_char_boundary(i);
311        if i == self.text.len() {
312            i
313        } else {
314            let mut seg_start = 0;
315            for seg in self.segments.iter() {
316                if seg.end > i {
317                    break;
318                }
319                seg_start = seg.end;
320            }
321            let s = &self.text[seg_start..];
322
323            let seg_i = i - seg_start;
324            let mut best_before = 0;
325            let mut best_after = s.len();
326            for (i, _) in unicode_segmentation::UnicodeSegmentation::grapheme_indices(s, true) {
327                if i > seg_i {
328                    best_after = i;
329                    break;
330                }
331                best_before = i;
332            }
333
334            let best = if best_after - seg_i > seg_i - best_before {
335                best_before
336            } else {
337                best_after
338            };
339            seg_start + best
340        }
341    }
342
343    /// Find the next grapheme cluster, after `from`.
344    ///
345    /// The `from` must be in a grapheme boundary or `0` or `len`. This operation is saturating.
346    ///
347    /// # Panics
348    ///
349    /// Panics if `from` is larger than the text length, or is not at a grapheme boundary.
350    pub fn next_insert_index(&self, from: usize) -> usize {
351        if from == self.text.len() {
352            from
353        } else {
354            let s = &self.text.as_str()[from..];
355            let mut iter = unicode_segmentation::UnicodeSegmentation::grapheme_indices(s, true).map(|(i, _)| i + from);
356            assert_eq!(iter.next(), Some(from), "`from` was not a grapheme boundary");
357            iter.next().unwrap_or(self.text.len())
358        }
359    }
360
361    /// Find the previous grapheme cluster, before `from`.
362    ///
363    /// The `from` must be in a grapheme boundary or `0` or `len`. This operation is saturating.
364    ///
365    /// # Panics
366    ///
367    /// Panics if `from` is larger than the text length, or is not at a grapheme boundary.
368    pub fn prev_insert_index(&self, from: usize) -> usize {
369        if from == self.text.len() {
370            let s = &self.text.as_str()[..from];
371            let mut iter = unicode_segmentation::UnicodeSegmentation::grapheme_indices(s, true)
372                .map(|(i, _)| i)
373                .rev();
374            iter.next().unwrap_or(0)
375        } else {
376            let s = self.text.as_str();
377
378            // from + 1_char, so that the `from` is the first yield in reverse if it is a valid grapheme boundary
379            let inclusive_from = s[from..].char_indices().nth(1).map(|(b, _)| from + b).unwrap_or_else(|| s.len());
380
381            let s = &self.text.as_str()[..inclusive_from];
382            let mut iter = unicode_segmentation::UnicodeSegmentation::grapheme_indices(s, true)
383                .map(|(i, _)| i)
384                .rev();
385            assert_eq!(iter.next(), Some(from), "`from` was not a grapheme boundary");
386            iter.next().unwrap_or(0)
387        }
388    }
389
390    /// Find the start of the next word or the next line-break segment, after `from`.
391    ///
392    /// This operation is saturating.
393    pub fn next_word_index(&self, from: usize) -> usize {
394        let mut segs = self.segments[self.seg_from_char(from)..].iter();
395
396        if let Some(seg) = segs.next() {
397            if seg.kind.is_line_break() {
398                return seg.end;
399            }
400            let mut start = seg.end;
401            for seg in segs {
402                if seg.kind.is_word() || seg.kind.is_line_break() {
403                    return start;
404                }
405                start = seg.end;
406            }
407        }
408        self.text.len()
409    }
410
411    /// Find the next word segment end or the next line-break segment end, after `from`.
412    ///
413    /// This operation is saturating.
414    pub fn next_word_end_index(&self, from: usize) -> usize {
415        let mut segs = self.segments[self.seg_from_char(from)..].iter();
416        if let Some(seg) = segs.next() {
417            if seg.kind.is_word() || seg.kind.is_line_break() {
418                return seg.end;
419            }
420            for seg in segs {
421                if seg.kind.is_word() || seg.kind.is_line_break() {
422                    return seg.end;
423                }
424            }
425        }
426        self.text.len()
427    }
428
429    /// Find the start of the previous word segment or the previous line-break segment, before `from`.
430    ///
431    /// This operation is saturating.
432    pub fn prev_word_index(&self, from: usize) -> usize {
433        let seg_i = self.seg_from_char(from);
434        let mut segs = if seg_i < self.segments.len() {
435            self.segments[..=seg_i].iter().rev()
436        } else {
437            self.segs().iter().rev()
438        };
439        let mut seg_kind = TextSegmentKind::Space;
440        for seg in &mut segs {
441            if seg.end < from {
442                if seg_kind.is_word() || seg.kind.is_line_break() {
443                    // last segment start or line-break end
444                    return seg.end;
445                }
446                seg_kind = seg.kind;
447                for seg in segs {
448                    if seg_kind.is_word() || seg.kind.is_line_break() {
449                        // last segment start or line-break end
450                        return seg.end;
451                    }
452                    seg_kind = seg.kind;
453                }
454                break;
455            } else if seg.end == from && seg.kind.is_line_break() {
456                // line-break start
457                return segs.next().map(|p| p.end).unwrap_or(0);
458            }
459            seg_kind = seg.kind;
460        }
461        0
462    }
463
464    /// Find the start of the line that contains `from`.
465    ///
466    /// # Panics
467    ///
468    /// Panics if `from` is larger than the text length, or is not a char boundary.
469    pub fn line_start_index(&self, from: usize) -> usize {
470        let line_break = self.text.as_str()[..from]
471            .char_indices()
472            .rev()
473            .find(|(_, c)| "\n\r\u{85}".contains(*c));
474
475        match line_break {
476            Some((i, _)) => i + 1,
477            None => 0,
478        }
479    }
480
481    /// Find the end of the line that contains `from`.
482    ///
483    /// # Panics
484    ///
485    /// Panics if `from` is larger than the text length, or is not a char boundary.
486    pub fn line_end_index(&self, from: usize) -> usize {
487        if from == self.text.len() {
488            return from;
489        }
490
491        let line_break = self.text.as_str()[from..].char_indices().find(|(_, c)| "\n\r\u{85}".contains(*c));
492
493        match line_break {
494            Some((i, _)) => from + i,
495            None => self.text.len(),
496        }
497    }
498
499    /// Find the range that must be removed to delete starting by `from` a `count` number of times.
500    ///
501    /// Delete **Del** action removes the next grapheme cluster, this is different from
502    /// [`backspace_range`] that usually only removes one character.
503    ///
504    /// # Panics
505    ///
506    /// Panics if `from` is larger than the text length, or is not a grapheme boundary.
507    ///
508    /// [`backspace_range`]: Self::backspace_range
509    pub fn delete_range(&self, from: usize, count: u32) -> std::ops::Range<usize> {
510        let mut end = from;
511        for _ in 0..count {
512            let e = self.next_insert_index(end);
513            if e == end {
514                break;
515            }
516            end = e;
517        }
518
519        from..end
520    }
521
522    /// Find the range that must be removed to backspace before `from` a `count` number of times.
523    ///
524    /// The character at `from` is not included, only the previous char is selected, with some exceptions,
525    /// the selection includes any char before zero-width-joiner (ZWJ), it also includes `\r` before `\n`
526    /// and Emoji char before Emoji modifier or variation selector (VS16).
527    ///
528    /// # Panics
529    ///
530    /// Panics if `from` is larger than the text length, or is not a char boundary.
531    pub fn backspace_range(&self, from: usize, count: u32) -> std::ops::Range<usize> {
532        let mut start = from;
533        for _ in 0..count {
534            let s = self.backspace_start(start);
535            if s == start {
536                break;
537            }
538            start = s;
539        }
540        start..from
541    }
542    fn backspace_start(&self, from: usize) -> usize {
543        let text = &self.text[..from];
544        let mut start = from;
545        for (i, c) in text.char_indices().rev() {
546            start = i;
547            match c {
548                '\u{200D}' => continue, // ZWJ
549                '\n' => {
550                    if text[..i].ends_with('\r') {
551                        start = i - 1;
552                    }
553                }
554                c if c == '\u{FE0F}' || emoji_util::is_modifier(c) => {
555                    // VS16 || Emoji-Modifier
556                    if let Some((i, c)) = text[..i].char_indices().next_back()
557                        && emoji_util::maybe_emoji(c)
558                    {
559                        start = i;
560                    }
561                }
562                _ => {}
563            }
564            break;
565        }
566        start
567    }
568
569    /// Find the range that must be removed to backspace words before `from` a `count` number of times.
570    ///
571    /// The character at `from` is not included, only the previous word is selected.
572    pub fn backspace_word_range(&self, from: usize, count: u32) -> std::ops::Range<usize> {
573        let mut start = from;
574        for _ in 0..count {
575            let s = self.prev_word_index(start);
576            if s == start {
577                break;
578            }
579            start = s;
580        }
581        start..from
582    }
583
584    /// Find the range that must be removed to delete words starting by `from` a `count` number of times.
585    pub fn delete_word_range(&self, from: usize, count: u32) -> std::ops::Range<usize> {
586        let mut end = from;
587        for _ in 0..count {
588            let e = self.next_word_end_index(end);
589            if e == end {
590                break;
591            }
592            end = e;
593        }
594
595        from..end
596    }
597}
598
599/// Compute initial bidirectional levels of each segment of a `line`.
600///
601/// The result is set in `levels`.
602pub fn unicode_bidi_levels(base_direction: LayoutDirection, line: impl Iterator<Item = TextSegmentKind>, levels: &mut Vec<BidiLevel>) {
603    let mut original_classes = Vec::with_capacity(line.size_hint().0);
604    let mut brackets = HashMap::default();
605    for (i, k) in line.enumerate() {
606        original_classes.push(k.into());
607        if let TextSegmentKind::Bracket(c) = k {
608            brackets.insert(i, c);
609        }
610    }
611
612    unicode_bidi_levels_impl(levels, base_direction, original_classes, brackets);
613}
614fn unicode_bidi_levels_impl(
615    levels: &mut Vec<BidiLevel>,
616    base_direction: LayoutDirection,
617    original_classes: Vec<unicode_bidi::BidiClass>,
618    brackets: HashMap<usize, char>,
619) {
620    levels.clear();
621    let para_level = into_unic_level(base_direction);
622    levels.resize(original_classes.len(), para_level);
623
624    if !original_classes.is_empty() {
625        let mut processing_classes = original_classes.clone();
626
627        super::unicode_bidi_util::explicit_compute(para_level, &original_classes, levels, &mut processing_classes);
628
629        let sequences = super::unicode_bidi_util::prepare_isolating_run_sequences(para_level, &original_classes, levels);
630        for sequence in &sequences {
631            super::unicode_bidi_util::implicit_resolve_weak(sequence, &mut processing_classes);
632            super::unicode_bidi_util::implicit_resolve_neutral(sequence, levels, &original_classes, &mut processing_classes, &brackets);
633        }
634        super::unicode_bidi_util::implicit_resolve_levels(&processing_classes, levels);
635
636        super::unicode_bidi_util::assign_levels_to_removed_chars(para_level, &original_classes, levels);
637    }
638}
639
640/// Compute a map of segments in `line` to their final LTR display order.
641///
642/// The result is set in `sort_map`.
643pub fn unicode_bidi_sort(
644    base_direction: LayoutDirection,
645    line: impl Iterator<Item = (TextSegmentKind, BidiLevel)>,
646    idx_offset: usize,
647    sort_map: &mut Vec<usize>,
648) {
649    sort_map.clear();
650
651    let cap = line.size_hint().0;
652    let mut line_classes = Vec::with_capacity(cap);
653    let mut levels = Vec::with_capacity(cap);
654    for (kind, level) in line {
655        line_classes.push(kind.into());
656        levels.push(level);
657    }
658
659    if !levels.is_empty() {
660        let (directions, vis_ranges) = super::unicode_bidi_util::visual_runs(levels, line_classes, into_unic_level(base_direction));
661
662        for vis_range in vis_ranges {
663            if directions[vis_range.start].is_rtl() {
664                for i in vis_range.rev() {
665                    sort_map.push(idx_offset + i);
666                }
667            } else {
668                for i in vis_range {
669                    sort_map.push(idx_offset + i);
670                }
671            }
672        }
673    }
674}
675
676/// Segmented text iterator.
677///
678/// This `struct` is created by the [`SegmentedText::iter`] method.
679pub struct SegmentedTextIter<'a> {
680    text: &'a str,
681    start: usize,
682    segs_iter: std::slice::Iter<'a, TextSegment>,
683}
684impl<'a> Iterator for SegmentedTextIter<'a> {
685    type Item = (&'a str, TextSegment);
686    fn next(&mut self) -> Option<Self::Item> {
687        if let Some(&seg) = self.segs_iter.next() {
688            let r = Some((&self.text[self.start..seg.end], seg));
689            self.start = seg.end;
690            r
691        } else {
692            None
693        }
694    }
695}
696
697fn from_unic_level(d: unicode_bidi::Level) -> LayoutDirection {
698    if d.is_ltr() { LayoutDirection::LTR } else { LayoutDirection::RTL }
699}
700fn into_unic_level(d: LayoutDirection) -> unicode_bidi::Level {
701    match d {
702        LayoutDirection::LTR => unicode_bidi::Level::ltr(),
703        LayoutDirection::RTL => unicode_bidi::Level::rtl(),
704    }
705}
706
707#[cfg(test)]
708mod tests {
709    use zng_layout::context::{LayoutDirection, TextSegmentKind};
710    use zng_txt::ToTxt;
711
712    use crate::{BidiLevel, SegmentedText, TextSegment};
713
714    #[test]
715    fn segments() {
716        let test = "a\nb\r\nc\td ";
717        let actual = SegmentedText::new(test, LayoutDirection::LTR);
718
719        fn seg(kind: TextSegmentKind, end: usize) -> TextSegment {
720            TextSegment {
721                kind,
722                end,
723                level: BidiLevel::ltr(),
724            }
725        }
726        use TextSegmentKind::*;
727
728        let expected = SegmentedText {
729            text: test.to_txt(),
730            segments: vec![
731                seg(LeftToRight, 1),
732                seg(LineBreak, 2),
733                seg(LeftToRight, 3),
734                seg(LineBreak, 5),
735                seg(LeftToRight, 6),
736                seg(Tab, 7),
737                seg(LeftToRight, 8),
738                seg(Space, 9),
739            ],
740            base_direction: LayoutDirection::LTR,
741        };
742
743        assert_eq!(expected, actual);
744    }
745
746    #[test]
747    fn reorder_line() {
748        let test = "0 2 4";
749        let txt = SegmentedText::new(test, LayoutDirection::RTL);
750
751        let expected = vec![4, 3, 2, 1, 0];
752        let actual = txt.reorder_line_to_ltr(0..test.len());
753
754        assert_eq!(expected, actual);
755    }
756
757    #[test]
758    fn reorder_line_issue() {
759        let test = "      المادة 1";
760        let txt = SegmentedText::new(test, LayoutDirection::RTL);
761
762        let expected = vec![3, 2, 1, 0];
763        let actual = txt.reorder_line_to_ltr(0..4);
764
765        assert_eq!(expected, actual);
766    }
767
768    #[test]
769    fn emoji_seg() {
770        let test = "'🙎🏻‍♀️'1# 1️⃣#️⃣";
771        let txt = SegmentedText::new(test, LayoutDirection::LTR);
772        let k: Vec<_> = txt.segs().iter().map(|s| s.kind).collect();
773
774        assert_eq!(
775            vec![
776                TextSegmentKind::OtherNeutral,       // '
777                TextSegmentKind::Emoji,              // 🙎🏻‍♀️
778                TextSegmentKind::OtherNeutral,       // '
779                TextSegmentKind::EuropeanNumber,     // 1
780                TextSegmentKind::EuropeanTerminator, // #
781                TextSegmentKind::Space,
782                TextSegmentKind::Emoji, // 1️⃣#️⃣
783            ],
784            k
785        );
786    }
787
788    #[test]
789    fn emoji_issues() {
790        let test = "🏴󠁧󠁢󠁥󠁮󠁧󠁿";
791        let txt = SegmentedText::new(test, LayoutDirection::LTR);
792        for (t, seg) in txt.iter() {
793            assert_eq!(seg.kind, TextSegmentKind::Emoji, "text: {t:?}");
794        }
795    }
796}