zng_ext_font/
segmenting.rs

1use std::{collections::HashMap, ops};
2
3use crate::emoji_util;
4
5use super::Txt;
6use unicode_bidi::{BidiDataSource as _, BidiInfo};
7
8use zng_layout::context::LayoutDirection;
9pub use zng_layout::context::TextSegmentKind;
10
11pub use unicode_bidi::Level as BidiLevel;
12
13/// Represents a single text segment in a [`SegmentedText`].
14#[derive(Clone, Copy, Debug, PartialEq, Eq)]
15#[non_exhaustive]
16pub struct TextSegment {
17    /// Segment kind.
18    pub kind: TextSegmentKind,
19    /// Direction of the segment in the context of other segments of the line.
20    pub level: BidiLevel,
21
22    /// Exclusive end index on the source text.
23    ///
24    /// The segment range starts from the `end` of the previous segment, or `0`, e.g: `prev_seg.end..self.end`.
25    pub end: usize,
26}
27impl TextSegment {
28    /// Direction of the glyphs in the segment.
29    ///
30    /// Segments iterate in the logical order, that is, the order the text is typed. If two segments
31    /// in the same line have direction `RTL` they must be layout the first to the right of the second.
32    pub fn direction(self) -> LayoutDirection {
33        from_unic_level(self.level)
34    }
35}
36
37/// A string segmented in sequences of words, spaces, tabs and separated line breaks.
38///
39/// Each segment is tagged with a [`TextSegmentKind`] and is defines as
40/// an offset from the last segment.
41#[derive(Default, Debug, Clone, PartialEq, Eq)]
42pub struct SegmentedText {
43    text: Txt,
44    segments: Vec<TextSegment>,
45    base_direction: LayoutDirection,
46}
47impl SegmentedText {
48    /// New segmented text from any text type.
49    pub fn new(text: impl Into<Txt>, base_direction: LayoutDirection) -> Self {
50        Self::new_text(text.into(), base_direction)
51    }
52    fn new_text(text: Txt, base_direction: LayoutDirection) -> Self {
53        let mut segs: Vec<TextSegment> = vec![];
54        let text_str: &str = &text;
55        let bidi = BidiInfo::new(text_str, Some(into_unic_level(base_direction)));
56
57        for (offset, kind) in unicode_linebreak::linebreaks(text_str) {
58            // a hard-break is a '\n', '\r', "\r\n" or text end.
59            if let unicode_linebreak::BreakOpportunity::Mandatory = kind {
60                // start of this segment.
61                let start = segs.last().map(|s| s.end).unwrap_or(0);
62
63                // The segment can have other characters before the line-break character(s).
64
65                let seg = &text_str[start..offset];
66
67                let break_start = if seg.ends_with("\r\n") {
68                    // the break was a "\r\n"
69                    offset - 2
70                } else if seg.ends_with('\n') || seg.ends_with('\r') || seg.ends_with('\u{85}') {
71                    // the break was a '\n', '\r' or NEL
72                    offset - 1
73                } else {
74                    // "break" at end of string
75                    debug_assert_eq!(offset, text_str.len());
76                    offset
77                };
78
79                if break_start > start {
80                    // the segment has more characters than the line-break character(s).
81                    Self::push_seg(text_str, &bidi, &mut segs, break_start);
82                }
83                if break_start < offset {
84                    // the line break character(s).
85                    segs.push(TextSegment {
86                        kind: TextSegmentKind::LineBreak,
87                        end: offset,
88                        level: bidi.levels[break_start],
89                    })
90                }
91            }
92            // else soft break, handled by our own segmentation
93        }
94        SegmentedText {
95            text,
96            segments: segs,
97            base_direction,
98        }
99    }
100
101    fn push_seg(text: &str, bidi: &BidiInfo, segs: &mut Vec<TextSegment>, end: usize) {
102        let start = segs.last().map(|s| s.end).unwrap_or(0);
103
104        let mut char_indices = text[start..end].char_indices().peekable();
105
106        let mut kind = TextSegmentKind::LeftToRight;
107        let mut level = BidiLevel::ltr();
108        for (i, c) in &mut char_indices {
109            const ZWJ: char = '\u{200D}'; // ZERO WIDTH JOINER
110            const VS16: char = '\u{FE0F}'; // VARIANT SELECTOR 16 - Emoji
111            const CEK: char = '\u{20E3}'; // COMBINING ENCLOSING KEYCAP
112
113            let is_emoji = (kind == TextSegmentKind::Emoji // maybe
114                && (
115                    c == VS16 // definitely, modifies prev. char into Emoji.
116                    || c == CEK // definitely, modified prev. char into keycap style.
117                    || c == ZWJ // definitely, ligature with the next Emoji or is ignored.
118                    || emoji_util::is_modifier(c) // definitely, has same effect as VS16.
119                    || emoji_util::is_component(c) // definitely, ligature data, like flag tags.
120                ))
121                || (emoji_util::maybe_emoji(c) // maybe
122                    && (emoji_util::definitely_emoji(c) // definitely
123                        // only if followed by VS16 or modifier
124                        || (text[start+i..].chars().nth(1).map(|c| c == VS16 || emoji_util::is_modifier(c)).unwrap_or(false))));
125
126            let (c_kind, c_level) = if is_emoji {
127                (TextSegmentKind::Emoji, level)
128            } else {
129                let raw_k = TextSegmentKind::from(bidi.original_classes[start + i]);
130                if i > 0
131                    && let TextSegmentKind::NonSpacingMark = raw_k
132                {
133                    // special char that modifies the previous char (depending on font)
134                    // we just assume it is placed after a valid base char here, there is no
135                    // Unicode data that validates this
136                    (kind, level)
137                } else {
138                    let mut k = raw_k;
139                    if let TextSegmentKind::OtherNeutral = raw_k
140                        && unicode_bidi::HardcodedBidiData.bidi_matched_opening_bracket(c).is_some()
141                    {
142                        k = TextSegmentKind::Bracket(c);
143                    }
144                    (k, bidi.levels[start + i])
145                }
146            };
147
148            if c_kind != kind || c_level != level || !c_kind.can_merge() {
149                if i > 0 {
150                    segs.push(TextSegment {
151                        kind,
152                        end: i + start,
153                        level,
154                    });
155                }
156                level = c_level;
157                kind = c_kind;
158            }
159        }
160        segs.push(TextSegment { kind, end, level });
161    }
162
163    /// The text string.
164    pub fn text(&self) -> &Txt {
165        &self.text
166    }
167
168    /// The text segments.
169    pub fn segs(&self) -> &[TextSegment] {
170        &self.segments
171    }
172
173    /// Get segment index from a char index.
174    pub fn seg_from_char(&self, from: usize) -> usize {
175        match self.segments.binary_search_by_key(&from, |s| s.end) {
176            Ok(e) => e + 1,
177            Err(s) => s,
178        }
179    }
180
181    /// Contextual direction.
182    ///
183    /// Note that each segment can override the direction, and even the entire text can be a sequence in
184    /// the opposite direction.
185    pub fn base_direction(&self) -> LayoutDirection {
186        self.base_direction
187    }
188
189    /// Gets if the text contains segments not in the base direction.
190    pub fn is_bidi(&self) -> bool {
191        for seg in self.segments.iter() {
192            if seg.direction() != self.base_direction {
193                return true;
194            }
195        }
196        false
197    }
198
199    /// Returns the text segment if `index` is in bounds.
200    pub fn get(&self, index: usize) -> Option<(&str, TextSegment)> {
201        if let Some(&seg) = self.segments.get(index) {
202            let text = if index == 0 {
203                &self.text[..seg.end]
204            } else {
205                &self.text[self.segments[index - 1].end..seg.end]
206            };
207
208            Some((text, seg))
209        } else {
210            None
211        }
212    }
213
214    /// Returns a clone of the text segment if `index` is in bounds.
215    pub fn get_clone(&self, index: usize) -> Option<SegmentedText> {
216        self.get(index).map(|(txt, seg)| SegmentedText {
217            text: txt.to_owned().into(),
218            segments: vec![TextSegment { end: txt.len(), ..seg }],
219            base_direction: self.base_direction,
220        })
221    }
222
223    /// Returns `true` if text and segments are empty.
224    pub fn is_empty(&self) -> bool {
225        self.segments.is_empty()
226    }
227
228    /// Destructs `self` into the text and segments.
229    pub fn into_parts(self) -> (Txt, Vec<TextSegment>, LayoutDirection) {
230        (self.text, self.segments, self.base_direction)
231    }
232
233    /// New segmented text from [parts](Self::into_parts).
234    ///
235    /// # Panics
236    ///
237    /// Some basic validation is done on the input:
238    ///
239    /// * If one of the inputs is empty but the other is not.
240    /// * If text is not empty and the last segment does not end with the text.
241    pub fn from_parts(text: Txt, segments: Vec<TextSegment>, base_direction: LayoutDirection) -> Self {
242        assert_eq!(text.is_empty(), segments.is_empty());
243        if !text.is_empty() {
244            assert!(segments.last().unwrap().end == text.len());
245        }
246
247        SegmentedText {
248            text,
249            segments,
250            base_direction,
251        }
252    }
253
254    /// Segments iterator.
255    ///
256    /// # Examples
257    ///
258    /// ```
259    /// # use zng_ext_font::SegmentedText;
260    /// # use zng_layout::context::LayoutDirection;
261    /// for (sub_str, seg) in SegmentedText::new("Foo bar!\nBaz.", LayoutDirection::LTR).iter() {
262    ///     println!("s: {sub_str:?} is a `{:?}`", seg.kind);
263    /// }
264    /// ```
265    pub fn iter(&self) -> SegmentedTextIter<'_> {
266        SegmentedTextIter {
267            text: &self.text,
268            start: 0,
269            segs_iter: self.segments.iter(),
270        }
271    }
272
273    /// Convert a segments range to a text bytes range.
274    pub fn text_range(&self, segs_range: ops::Range<usize>) -> ops::Range<usize> {
275        let start = if segs_range.start == 0 {
276            0
277        } else {
278            self.segments[segs_range.start - 1].end
279        };
280        let end = self.segments[..segs_range.end].last().map(|s| s.end).unwrap_or(0);
281        start..end
282    }
283
284    /// Compute a map of segments in `segs_range` to their final LTR display order.
285    ///
286    /// The `segs_range` must be the segments of a line after line wrap.
287    pub fn reorder_line_to_ltr(&self, segs_range: ops::Range<usize>) -> Vec<usize> {
288        let mut r = Vec::with_capacity(segs_range.len());
289        let offset = segs_range.start;
290        unicode_bidi_sort(
291            self.base_direction,
292            self.segments[segs_range].iter().map(|s| (s.kind, s.level)),
293            offset,
294            &mut r,
295        );
296        r
297    }
298
299    /// Find the nearest next char boundary from the byte index `i`.
300    ///
301    /// If `i` is larger than the text length, returns the text length, if `i` is
302    /// already a char boundary, returns `i`.
303    pub fn snap_char_boundary(&self, i: usize) -> usize {
304        if i >= self.text.len() {
305            self.text.len()
306        } else {
307            let mut next = i;
308            while !self.text.is_char_boundary(next) {
309                next += 1;
310            }
311            next
312        }
313    }
314
315    /// Find the nearest grapheme cluster boundary from the byte index `i`.
316    ///
317    /// If `i` is larger than the text length, returns the text length, if `i` is
318    /// already a grapheme boundary, returns `i`.
319    pub fn snap_grapheme_boundary(&self, i: usize) -> usize {
320        let i = self.snap_char_boundary(i);
321        if i == self.text.len() {
322            i
323        } else {
324            let mut seg_start = 0;
325            for seg in self.segments.iter() {
326                if seg.end > i {
327                    break;
328                }
329                seg_start = seg.end;
330            }
331            let s = &self.text[seg_start..];
332
333            let seg_i = i - seg_start;
334            let mut best_before = 0;
335            let mut best_after = s.len();
336            for (i, _) in unicode_segmentation::UnicodeSegmentation::grapheme_indices(s, true) {
337                if i > seg_i {
338                    best_after = i;
339                    break;
340                }
341                best_before = i;
342            }
343
344            let best = if best_after - seg_i > seg_i - best_before {
345                best_before
346            } else {
347                best_after
348            };
349            seg_start + best
350        }
351    }
352
353    /// Find the next grapheme cluster, after `from`.
354    ///
355    /// The `from` must be in a grapheme boundary or `0` or `len`. This operation is saturating.
356    ///
357    /// # Panics
358    ///
359    /// Panics if `from` is larger than the text length, or is not at a grapheme boundary.
360    pub fn next_insert_index(&self, from: usize) -> usize {
361        if from == self.text.len() {
362            from
363        } else {
364            let s = &self.text.as_str()[from..];
365            let mut iter = unicode_segmentation::UnicodeSegmentation::grapheme_indices(s, true).map(|(i, _)| i + from);
366            assert_eq!(iter.next(), Some(from), "`from` was not a grapheme boundary");
367            iter.next().unwrap_or(self.text.len())
368        }
369    }
370
371    /// Find the previous grapheme cluster, before `from`.
372    ///
373    /// The `from` must be in a grapheme boundary or `0` or `len`. This operation is saturating.
374    ///
375    /// # Panics
376    ///
377    /// Panics if `from` is larger than the text length, or is not at a grapheme boundary.
378    pub fn prev_insert_index(&self, from: usize) -> usize {
379        if from == self.text.len() {
380            let s = &self.text.as_str()[..from];
381            let mut iter = unicode_segmentation::UnicodeSegmentation::grapheme_indices(s, true)
382                .map(|(i, _)| i)
383                .rev();
384            iter.next().unwrap_or(0)
385        } else {
386            let s = self.text.as_str();
387
388            // from + 1_char, so that the `from` is the first yield in reverse if it is a valid grapheme boundary
389            let inclusive_from = s[from..].char_indices().nth(1).map(|(b, _)| from + b).unwrap_or_else(|| s.len());
390
391            let s = &self.text.as_str()[..inclusive_from];
392            let mut iter = unicode_segmentation::UnicodeSegmentation::grapheme_indices(s, true)
393                .map(|(i, _)| i)
394                .rev();
395            assert_eq!(iter.next(), Some(from), "`from` was not a grapheme boundary");
396            iter.next().unwrap_or(0)
397        }
398    }
399
400    /// Find the start of the next word or the next line-break segment, after `from`.
401    ///
402    /// This operation is saturating.
403    pub fn next_word_index(&self, from: usize) -> usize {
404        let mut segs = self.segments[self.seg_from_char(from)..].iter();
405
406        if let Some(seg) = segs.next() {
407            if seg.kind.is_line_break() {
408                return seg.end;
409            }
410            let mut start = seg.end;
411            for seg in segs {
412                if seg.kind.is_word() || seg.kind.is_line_break() {
413                    return start;
414                }
415                start = seg.end;
416            }
417        }
418        self.text.len()
419    }
420
421    /// Find the next word segment end or the next line-break segment end, after `from`.
422    ///
423    /// This operation is saturating.
424    pub fn next_word_end_index(&self, from: usize) -> usize {
425        let mut segs = self.segments[self.seg_from_char(from)..].iter();
426        if let Some(seg) = segs.next() {
427            if seg.kind.is_word() || seg.kind.is_line_break() {
428                return seg.end;
429            }
430            for seg in segs {
431                if seg.kind.is_word() || seg.kind.is_line_break() {
432                    return seg.end;
433                }
434            }
435        }
436        self.text.len()
437    }
438
439    /// Find the start of the previous word segment or the previous line-break segment, before `from`.
440    ///
441    /// This operation is saturating.
442    pub fn prev_word_index(&self, from: usize) -> usize {
443        let seg_i = self.seg_from_char(from);
444        let mut segs = if seg_i < self.segments.len() {
445            self.segments[..=seg_i].iter().rev()
446        } else {
447            self.segs().iter().rev()
448        };
449        let mut seg_kind = TextSegmentKind::Space;
450        for seg in &mut segs {
451            if seg.end < from {
452                if seg_kind.is_word() || seg.kind.is_line_break() {
453                    // last segment start or line-break end
454                    return seg.end;
455                }
456                seg_kind = seg.kind;
457                for seg in segs {
458                    if seg_kind.is_word() || seg.kind.is_line_break() {
459                        // last segment start or line-break end
460                        return seg.end;
461                    }
462                    seg_kind = seg.kind;
463                }
464                break;
465            } else if seg.end == from && seg.kind.is_line_break() {
466                // line-break start
467                return segs.next().map(|p| p.end).unwrap_or(0);
468            }
469            seg_kind = seg.kind;
470        }
471        0
472    }
473
474    /// Find the start of the line that contains `from`.
475    ///
476    /// # Panics
477    ///
478    /// Panics if `from` is larger than the text length, or is not a char boundary.
479    pub fn line_start_index(&self, from: usize) -> usize {
480        let line_break = self.text.as_str()[..from]
481            .char_indices()
482            .rev()
483            .find(|(_, c)| "\n\r\u{85}".contains(*c));
484
485        match line_break {
486            Some((i, _)) => i + 1,
487            None => 0,
488        }
489    }
490
491    /// Find the end of the line that contains `from`.
492    ///
493    /// # Panics
494    ///
495    /// Panics if `from` is larger than the text length, or is not a char boundary.
496    pub fn line_end_index(&self, from: usize) -> usize {
497        if from == self.text.len() {
498            return from;
499        }
500
501        let line_break = self.text.as_str()[from..].char_indices().find(|(_, c)| "\n\r\u{85}".contains(*c));
502
503        match line_break {
504            Some((i, _)) => from + i,
505            None => self.text.len(),
506        }
507    }
508
509    /// Find the range that must be removed to delete starting by `from` a `count` number of times.
510    ///
511    /// Delete **Del** action removes the next grapheme cluster, this is different from
512    /// [`backspace_range`] that usually only removes one character.
513    ///
514    /// # Panics
515    ///
516    /// Panics if `from` is larger than the text length, or is not a grapheme boundary.
517    ///
518    /// [`backspace_range`]: Self::backspace_range
519    pub fn delete_range(&self, from: usize, count: u32) -> std::ops::Range<usize> {
520        let mut end = from;
521        for _ in 0..count {
522            let e = self.next_insert_index(end);
523            if e == end {
524                break;
525            }
526            end = e;
527        }
528
529        from..end
530    }
531
532    /// Find the range that must be removed to backspace before `from` a `count` number of times.
533    ///
534    /// The character at `from` is not included, only the previous char is selected, with some exceptions,
535    /// the selection includes any char before zero-width-joiner (ZWJ), it also includes `\r` before `\n`
536    /// and Emoji char before Emoji modifier or variation selector (VS16).
537    ///
538    /// # Panics
539    ///
540    /// Panics if `from` is larger than the text length, or is not a char boundary.
541    pub fn backspace_range(&self, from: usize, count: u32) -> std::ops::Range<usize> {
542        let mut start = from;
543        for _ in 0..count {
544            let s = self.backspace_start(start);
545            if s == start {
546                break;
547            }
548            start = s;
549        }
550        start..from
551    }
552    fn backspace_start(&self, from: usize) -> usize {
553        let text = &self.text[..from];
554        let mut start = from;
555        for (i, c) in text.char_indices().rev() {
556            start = i;
557            match c {
558                '\u{200D}' => continue, // ZWJ
559                '\n' => {
560                    if text[..i].ends_with('\r') {
561                        start = i - 1;
562                    }
563                }
564                c if c == '\u{FE0F}' || emoji_util::is_modifier(c) => {
565                    // VS16 || Emoji-Modifier
566                    if let Some((i, c)) = text[..i].char_indices().next_back()
567                        && emoji_util::maybe_emoji(c)
568                    {
569                        start = i;
570                    }
571                }
572                _ => {}
573            }
574            break;
575        }
576        start
577    }
578
579    /// Find the range that must be removed to backspace words before `from` a `count` number of times.
580    ///
581    /// The character at `from` is not included, only the previous word is selected.
582    pub fn backspace_word_range(&self, from: usize, count: u32) -> std::ops::Range<usize> {
583        let mut start = from;
584        for _ in 0..count {
585            let s = self.prev_word_index(start);
586            if s == start {
587                break;
588            }
589            start = s;
590        }
591        start..from
592    }
593
594    /// Find the range that must be removed to delete words starting by `from` a `count` number of times.
595    pub fn delete_word_range(&self, from: usize, count: u32) -> std::ops::Range<usize> {
596        let mut end = from;
597        for _ in 0..count {
598            let e = self.next_word_end_index(end);
599            if e == end {
600                break;
601            }
602            end = e;
603        }
604
605        from..end
606    }
607}
608
609/// Compute initial bidirectional levels of each segment of a `line`.
610///
611/// The result is set in `levels`.
612pub fn unicode_bidi_levels(base_direction: LayoutDirection, line: impl Iterator<Item = TextSegmentKind>, levels: &mut Vec<BidiLevel>) {
613    let mut original_classes = Vec::with_capacity(line.size_hint().0);
614    let mut brackets = HashMap::default();
615    for (i, k) in line.enumerate() {
616        original_classes.push(k.into());
617        if let TextSegmentKind::Bracket(c) = k {
618            brackets.insert(i, c);
619        }
620    }
621
622    unicode_bidi_levels_impl(levels, base_direction, original_classes, brackets);
623}
624fn unicode_bidi_levels_impl(
625    levels: &mut Vec<BidiLevel>,
626    base_direction: LayoutDirection,
627    original_classes: Vec<unicode_bidi::BidiClass>,
628    brackets: HashMap<usize, char>,
629) {
630    levels.clear();
631    let para_level = into_unic_level(base_direction);
632    levels.resize(original_classes.len(), para_level);
633
634    if !original_classes.is_empty() {
635        let mut processing_classes = original_classes.clone();
636
637        super::unicode_bidi_util::explicit_compute(para_level, &original_classes, levels, &mut processing_classes);
638
639        let sequences = super::unicode_bidi_util::prepare_isolating_run_sequences(para_level, &original_classes, levels);
640        for sequence in &sequences {
641            super::unicode_bidi_util::implicit_resolve_weak(sequence, &mut processing_classes);
642            super::unicode_bidi_util::implicit_resolve_neutral(sequence, levels, &original_classes, &mut processing_classes, &brackets);
643        }
644        super::unicode_bidi_util::implicit_resolve_levels(&processing_classes, levels);
645
646        super::unicode_bidi_util::assign_levels_to_removed_chars(para_level, &original_classes, levels);
647    }
648}
649
650/// Compute a map of segments in `line` to their final LTR display order.
651///
652/// The result is set in `sort_map`.
653pub fn unicode_bidi_sort(
654    base_direction: LayoutDirection,
655    line: impl Iterator<Item = (TextSegmentKind, BidiLevel)>,
656    idx_offset: usize,
657    sort_map: &mut Vec<usize>,
658) {
659    sort_map.clear();
660
661    let cap = line.size_hint().0;
662    let mut line_classes = Vec::with_capacity(cap);
663    let mut levels = Vec::with_capacity(cap);
664    for (kind, level) in line {
665        line_classes.push(kind.into());
666        levels.push(level);
667    }
668
669    if !levels.is_empty() {
670        let (directions, vis_ranges) = super::unicode_bidi_util::visual_runs(levels, line_classes, into_unic_level(base_direction));
671
672        for vis_range in vis_ranges {
673            if directions[vis_range.start].is_rtl() {
674                for i in vis_range.rev() {
675                    sort_map.push(idx_offset + i);
676                }
677            } else {
678                for i in vis_range {
679                    sort_map.push(idx_offset + i);
680                }
681            }
682        }
683    }
684}
685
686/// Segmented text iterator.
687///
688/// This `struct` is created by the [`SegmentedText::iter`] method.
689pub struct SegmentedTextIter<'a> {
690    text: &'a str,
691    start: usize,
692    segs_iter: std::slice::Iter<'a, TextSegment>,
693}
694impl<'a> Iterator for SegmentedTextIter<'a> {
695    type Item = (&'a str, TextSegment);
696    fn next(&mut self) -> Option<Self::Item> {
697        if let Some(&seg) = self.segs_iter.next() {
698            let r = Some((&self.text[self.start..seg.end], seg));
699            self.start = seg.end;
700            r
701        } else {
702            None
703        }
704    }
705}
706
707fn from_unic_level(d: unicode_bidi::Level) -> LayoutDirection {
708    if d.is_ltr() { LayoutDirection::LTR } else { LayoutDirection::RTL }
709}
710fn into_unic_level(d: LayoutDirection) -> unicode_bidi::Level {
711    match d {
712        LayoutDirection::LTR => unicode_bidi::Level::ltr(),
713        LayoutDirection::RTL => unicode_bidi::Level::rtl(),
714    }
715}
716
717#[cfg(test)]
718mod tests {
719    use zng_layout::context::{LayoutDirection, TextSegmentKind};
720    use zng_txt::ToTxt;
721
722    use crate::{BidiLevel, SegmentedText, TextSegment};
723
724    #[test]
725    fn segments() {
726        let test = "a\nb\r\nc\td ";
727        let actual = SegmentedText::new(test, LayoutDirection::LTR);
728
729        fn seg(kind: TextSegmentKind, end: usize) -> TextSegment {
730            TextSegment {
731                kind,
732                end,
733                level: BidiLevel::ltr(),
734            }
735        }
736        use TextSegmentKind::*;
737
738        let expected = SegmentedText {
739            text: test.to_txt(),
740            segments: vec![
741                seg(LeftToRight, 1),
742                seg(LineBreak, 2),
743                seg(LeftToRight, 3),
744                seg(LineBreak, 5),
745                seg(LeftToRight, 6),
746                seg(Tab, 7),
747                seg(LeftToRight, 8),
748                seg(Space, 9),
749            ],
750            base_direction: LayoutDirection::LTR,
751        };
752
753        assert_eq!(expected, actual);
754    }
755
756    #[test]
757    fn reorder_line() {
758        let test = "0 2 4";
759        let txt = SegmentedText::new(test, LayoutDirection::RTL);
760
761        let expected = vec![4, 3, 2, 1, 0];
762        let actual = txt.reorder_line_to_ltr(0..test.len());
763
764        assert_eq!(expected, actual);
765    }
766
767    #[test]
768    fn reorder_line_issue() {
769        let test = "      المادة 1";
770        let txt = SegmentedText::new(test, LayoutDirection::RTL);
771
772        let expected = vec![3, 2, 1, 0];
773        let actual = txt.reorder_line_to_ltr(0..4);
774
775        assert_eq!(expected, actual);
776    }
777
778    #[test]
779    fn emoji_seg() {
780        let test = "'🙎🏻‍♀️'1# 1️⃣#️⃣";
781        let txt = SegmentedText::new(test, LayoutDirection::LTR);
782        let k: Vec<_> = txt.segs().iter().map(|s| s.kind).collect();
783
784        assert_eq!(
785            vec![
786                TextSegmentKind::OtherNeutral,       // '
787                TextSegmentKind::Emoji,              // 🙎🏻‍♀️
788                TextSegmentKind::OtherNeutral,       // '
789                TextSegmentKind::EuropeanNumber,     // 1
790                TextSegmentKind::EuropeanTerminator, // #
791                TextSegmentKind::Space,
792                TextSegmentKind::Emoji, // 1️⃣#️⃣
793            ],
794            k
795        );
796    }
797
798    #[test]
799    fn emoji_issues() {
800        let test = "🏴󠁧󠁢󠁥󠁮󠁧󠁿";
801        let txt = SegmentedText::new(test, LayoutDirection::LTR);
802        for (t, seg) in txt.iter() {
803            assert_eq!(seg.kind, TextSegmentKind::Emoji, "text: {t:?}");
804        }
805    }
806}