diff --git a/shared-lib/lib-ot/src/core/delta/delta.rs b/shared-lib/lib-ot/src/core/delta/delta.rs index 2193730898..333014b4eb 100644 --- a/shared-lib/lib-ot/src/core/delta/delta.rs +++ b/shared-lib/lib-ot/src/core/delta/delta.rs @@ -150,17 +150,17 @@ where return Err(ErrorBuilder::new(OTErrorCode::IncompatibleLength).build()); } let mut new_s = String::new(); - let utf16_iter = &mut s.utf16_iter(); + let code_point_iter = &mut s.code_point_iter(); for op in &self.ops { match &op { Operation::Retain(retain) => { - for c in utf16_iter.take(retain.n as usize) { - new_s.push_str(&c); + for c in code_point_iter.take(retain.n as usize) { + new_s.push_str(str::from_utf8(c.0).unwrap_or("")); } }, Operation::Delete(delete) => { for _ in 0..*delete { - utf16_iter.next(); + code_point_iter.next(); } }, Operation::Insert(insert) => { @@ -187,7 +187,7 @@ where } }, Operation::Insert(insert) => { - inverted.delete(insert.count_of_code_units()); + inverted.delete(insert.count_of_utf16_code_units()); }, Operation::Delete(delete) => { inverted.insert(&chars.take(*delete as usize).collect::(), op.get_attributes()); @@ -294,12 +294,12 @@ where (Some(Operation::Insert(insert)), _) => { // let composed_attrs = transform_attributes(&next_op1, &next_op2, true); a_prime.insert(&insert.s, insert.attributes.clone()); - b_prime.retain(insert.count_of_code_units(), insert.attributes.clone()); + b_prime.retain(insert.count_of_utf16_code_units(), insert.attributes.clone()); next_op1 = ops1.next(); }, (_, Some(Operation::Insert(o_insert))) => { let composed_attrs = transform_op_attribute(&next_op1, &next_op2)?; - a_prime.retain(o_insert.count_of_code_units(), composed_attrs.clone()); + a_prime.retain(o_insert.count_of_utf16_code_units(), composed_attrs.clone()); b_prime.insert(&o_insert.s, composed_attrs); next_op2 = ops2.next(); }, diff --git a/shared-lib/lib-ot/src/core/delta/iterator.rs b/shared-lib/lib-ot/src/core/delta/iterator.rs index 5a45e21ca8..1818ed84d1 100644 --- a/shared-lib/lib-ot/src/core/delta/iterator.rs +++ b/shared-lib/lib-ot/src/core/delta/iterator.rs @@ -181,7 +181,7 @@ where Operation::::Insert(insert) => { tracing::trace!("extend insert attributes with {} ", &insert.attributes); attributes.extend_other(insert.attributes.clone()); - length = insert.count_of_code_units(); + length = insert.count_of_utf16_code_units(); }, } diff --git a/shared-lib/lib-ot/src/core/flowy_str.rs b/shared-lib/lib-ot/src/core/flowy_str.rs index 6039319a4b..d0e4fe3b93 100644 --- a/shared-lib/lib-ot/src/core/flowy_str.rs +++ b/shared-lib/lib-ot/src/core/flowy_str.rs @@ -5,11 +5,12 @@ use std::{fmt, fmt::Formatter, slice}; pub struct FlowyStr(pub String); impl FlowyStr { + // https://stackoverflow.com/questions/2241348/what-is-unicode-utf-8-utf-16 pub fn count_utf16_code_units(&self) -> usize { count_utf16_code_units(&self.0) } - pub fn utf16_iter(&self) -> FlowyUtf16Iterator { FlowyUtf16Iterator::new(self, 0) } + pub fn utf16_iter(&self) -> FlowyUtf16CodePointIterator { FlowyUtf16CodePointIterator::new(self, 0) } - pub fn code_point_iter(&self) -> CodePointIterator { CodePointIterator::new(self) } + pub fn code_point_iter(&self) -> Utf16CodeUnitIterator { Utf16CodeUnitIterator::new(self) } pub fn sub_str(&self, interval: Interval) -> String { match self.with_interval(interval) { @@ -19,7 +20,7 @@ impl FlowyStr { } pub fn with_interval(&self, interval: Interval) -> Option { - let mut iter = CodePointIterator::new(self); + let mut iter = Utf16CodeUnitIterator::new(self); let mut buf = vec![]; while let Some((byte, _len)) = iter.next() { if interval.start < iter.code_point_offset && interval.end >= iter.code_point_offset { @@ -38,7 +39,7 @@ impl FlowyStr { } } -pub struct CodePointIterator<'a> { +pub struct Utf16CodeUnitIterator<'a> { s: &'a FlowyStr, bytes_offset: usize, code_point_offset: usize, @@ -46,9 +47,9 @@ pub struct CodePointIterator<'a> { iter: slice::Iter<'a, u8>, } -impl<'a> CodePointIterator<'a> { +impl<'a> Utf16CodeUnitIterator<'a> { pub fn new(s: &'a FlowyStr) -> Self { - CodePointIterator { + Utf16CodeUnitIterator { s, bytes_offset: 0, code_point_offset: 0, @@ -58,7 +59,7 @@ impl<'a> CodePointIterator<'a> { } } -impl<'a> Iterator for CodePointIterator<'a> { +impl<'a> Iterator for Utf16CodeUnitIterator<'a> { type Item = (&'a [u8], usize); fn next(&mut self) -> Option { @@ -168,19 +169,19 @@ impl<'de> Deserialize<'de> for FlowyStr { } } -pub struct FlowyUtf16Iterator<'a> { +pub struct FlowyUtf16CodePointIterator<'a> { s: &'a FlowyStr, offset: usize, } -impl<'a> FlowyUtf16Iterator<'a> { - pub fn new(s: &'a FlowyStr, offset: usize) -> Self { FlowyUtf16Iterator { s, offset } } +impl<'a> FlowyUtf16CodePointIterator<'a> { + pub fn new(s: &'a FlowyStr, offset: usize) -> Self { FlowyUtf16CodePointIterator { s, offset } } } use crate::core::Interval; use std::str; -impl<'a> Iterator for FlowyUtf16Iterator<'a> { +impl<'a> Iterator for FlowyUtf16CodePointIterator<'a> { type Item = String; fn next(&mut self) -> Option { diff --git a/shared-lib/lib-ot/src/core/operation/operation.rs b/shared-lib/lib-ot/src/core/operation/operation.rs index 13a775e356..ca2bb87734 100644 --- a/shared-lib/lib-ot/src/core/operation/operation.rs +++ b/shared-lib/lib-ot/src/core/operation/operation.rs @@ -67,7 +67,7 @@ where match self { Operation::Delete(n) => *n, Operation::Retain(r) => r.n, - Operation::Insert(i) => i.count_of_code_units(), + Operation::Insert(i) => i.count_of_utf16_code_units(), } } @@ -95,7 +95,7 @@ where .build(), ); right = Some( - OpBuilder::::insert(&insert.s[index..insert.count_of_code_units()]) + OpBuilder::::insert(&insert.s[index..insert.count_of_utf16_code_units()]) .attributes(attributes) .build(), ); @@ -112,7 +112,7 @@ where .attributes(retain.attributes.clone()) .build(), Operation::Insert(insert) => { - if interval.start > insert.count_of_code_units() { + if interval.start > insert.count_of_utf16_code_units() { OpBuilder::insert("").build() } else { // let s = &insert @@ -291,7 +291,7 @@ impl Insert where T: Attributes, { - pub fn count_of_code_units(&self) -> usize { self.s.count_utf16_code_units() } + pub fn count_of_utf16_code_units(&self) -> usize { self.s.count_utf16_code_units() } pub fn merge_or_new_op(&mut self, s: &str, attributes: T) -> Option> { if self.attributes == attributes {