actually, remove normalization entirely

normalization has some weird edge cases that cause incorrect behavior, since we aren't implementing full grapheme segmentation (for instance, a single codepoint can be normalized into three different codepoints, and there are codepoints that are combining characters but have width != 0)
author: Jesse Luehrs <doy@tozt.net> 2019-12-07 02:01:53 -0500
committer: Jesse Luehrs <doy@tozt.net> 2019-12-07 02:07:11 -0500
commit: 1075c2888dffb261164d1f3c5a5d216c4a3f0d35 (patch)
tree: f91d45e28da8ef705e00d826c121a81851c32698
parent: c4d890090e20a2c3b7c6eb03feb2f6e6033c9caf (diff)
download: vt100-rust-1075c2888dffb261164d1f3c5a5d216c4a3f0d35.tar.gz
vt100-rust-1075c2888dffb261164d1f3c5a5d216c4a3f0d35.zip
11 files changed, 24 insertions, 54 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 9e60834..14d7774 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,14 +12,10 @@ keywords = ["terminal", "vt100"]
 categories = ["command-line-interface", "encoding"]
 license = "MIT"
 
-[features]
-default = ["unicode-normalization"]
-
 [dependencies]
 itoa = "0.4"
 enumset = "0.4"
 log = "0.4"
-unicode-normalization = { version = "0.1", optional = true }
 unicode-width = "0.1"
 vte = "0.3"
 
diff --git a/src/cell.rs b/src/cell.rs
index f9f9056..45e795b 100644
--- a/src/cell.rs
+++ b/src/cell.rs
@@ -51,45 +51,6 @@ impl Cell {
 
         self.contents[self.len()] = c;
         self.len += 1;
-
-        self.normalize();
-    }
-
-    #[cfg(not(feature = "unicode-normalization"))]
-    #[inline]
-    fn normalize(&mut self) {}
-
-    #[cfg(feature = "unicode-normalization")]
-    #[inline]
-    fn normalize(&mut self) {
-        use unicode_normalization::UnicodeNormalization as _;
-
-        // some fonts have combined characters but can't render combining
-        // characters correctly, so try to prefer precombined characters when
-        // possible
-        if unicode_normalization::is_nfc_quick(
-            self.contents.iter().copied().take(CODEPOINTS_IN_CELL),
-        ) == unicode_normalization::IsNormalized::Yes
-        {
-            return;
-        }
-
-        let mut new_contents = ['\x00'; CODEPOINTS_IN_CELL];
-        let mut new_len = 0;
-        for c in self
-            .contents
-            .iter()
-            .copied()
-            .take(self.len())
-            .nfc()
-            .take(CODEPOINTS_IN_CELL)
-        {
-            new_contents[new_len as usize] = c;
-            new_len += 1;
-        }
-        self.contents = new_contents;
-        self.len = new_len;
-        self.set_wide(new_contents[0].width().unwrap_or(0) > 1);
     }
 
     pub(crate) fn clear(&mut self, attrs: crate::attrs::Attrs) {
diff --git a/tests/data/fixtures/combining/11.json b/tests/data/fixtures/combining/11.json
index e973c78..6757b45 100644
--- a/tests/data/fixtures/combining/11.json
+++ b/tests/data/fixtures/combining/11.json
@@ -1,5 +1,5 @@
 {
-  "contents": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaá",
+  "contents": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaá",
   "cells": {
     "0,0": {
       "contents": "a"
@@ -233,7 +233,7 @@
       "contents": "a"
     },
     "0,79": {
-      "contents": "á"
+      "contents": "á"
     },
     "0,8": {
       "contents": "a"
diff --git a/tests/data/fixtures/combining/2.json b/tests/data/fixtures/combining/2.json
index 6dce798..8317208 100644
--- a/tests/data/fixtures/combining/2.json
+++ b/tests/data/fixtures/combining/2.json
@@ -1,8 +1,8 @@
 {
-  "contents": "á",
+  "contents": "á",
   "cells": {
     "0,0": {
-      "contents": "á"
+      "contents": "á"
     }
   },
   "cursor_position": [
diff --git a/tests/data/fixtures/combining/4.json b/tests/data/fixtures/combining/4.json
index 4044d4d..74a34b1 100644
--- a/tests/data/fixtures/combining/4.json
+++ b/tests/data/fixtures/combining/4.json
@@ -1,5 +1,5 @@
 {
-  "contents": "abcdéfg",
+  "contents": "abcdéfg",
   "cells": {
     "0,0": {
       "contents": "a"
@@ -14,7 +14,7 @@
       "contents": "d"
     },
     "0,4": {
-      "contents": "é"
+      "contents": "é"
     },
     "0,5": {
       "contents": "f"
diff --git a/tests/data/fixtures/combining/5.json b/tests/data/fixtures/combining/5.json
index 67600d7..f655d0f 100644
--- a/tests/data/fixtures/combining/5.json
+++ b/tests/data/fixtures/combining/5.json
@@ -1,5 +1,5 @@
 {
-  "contents": "abcdéfg\n\n\n\n\n\n\n\n\n                                                                             aaa",
+  "contents": "abcdéfg\n\n\n\n\n\n\n\n\n                                                                             aaa",
   "cells": {
     "0,0": {
       "contents": "a"
@@ -14,7 +14,7 @@
       "contents": "d"
     },
     "0,4": {
-      "contents": "é"
+      "contents": "é"
     },
     "0,5": {
       "contents": "f"
diff --git a/tests/data/fixtures/combining/6.json b/tests/data/fixtures/combining/6.json
index cbb481a..c0ed465 100644
--- a/tests/data/fixtures/combining/6.json
+++ b/tests/data/fixtures/combining/6.json
@@ -1,5 +1,5 @@
 {
-  "contents": "abcdéfg\n\n\n\n\n\n\n\n\n                                                                             aaa",
+  "contents": "abcdéfg\n\n\n\n\n\n\n\n\n                                                                             aaa",
   "cells": {
     "0,0": {
       "contents": "a"
@@ -14,7 +14,7 @@
       "contents": "d"
     },
     "0,4": {
-      "contents": "é"
+      "contents": "é"
     },
     "0,5": {
       "contents": "f"
diff --git a/tests/data/fixtures/utf8.in b/tests/data/fixtures/utf8.in
index 572eb43..c7efa1e 100644
--- a/tests/data/fixtures/utf8.in
+++ b/tests/data/fixtures/utf8.in
@@ -1 +1,2 @@
 café
+\x1bc\xf0\x9d\x87\x80\xe1\x9c\x92
diff --git a/tests/data/fixtures/utf8/2.json b/tests/data/fixtures/utf8/2.json
new file mode 100644
index 0000000..faaa824
--- /dev/null
+++ b/tests/data/fixtures/utf8/2.json
@@ -0,0 +1,12 @@
+{
+  "contents": "𝆺𝅥𝅯ᜒ",
+  "cells": {
+    "0,0": {
+      "contents": "𝆺𝅥𝅯ᜒ"
+    }
+  },
+  "cursor_position": [
+    0,
+    1
+  ]
+}
+\ No newline at end of file
diff --git a/tests/data/fixtures/utf8/2.typescript b/tests/data/fixtures/utf8/2.typescript
new file mode 100644
index 0000000..14d4c1f
--- /dev/null
+++ b/tests/data/fixtures/utf8/2.typescript
@@ -0,0 +1 @@
+c𝆺𝅥𝅯ᜒ
+\ No newline at end of file
diff --git a/tests/text.rs b/tests/text.rs
index 7e53317..e413100 100644
--- a/tests/text.rs
+++ b/tests/text.rs
@@ -20,7 +20,6 @@ fn wide() {
     helpers::fixture("wide");
 }
 
-#[cfg(feature = "unicode-normalization")]
 #[test]
 fn combining() {
     helpers::fixture("combining");
author	Jesse Luehrs <doy@tozt.net>	2019-12-07 02:01:53 -0500
committer	Jesse Luehrs <doy@tozt.net>	2019-12-07 02:07:11 -0500
commit	1075c2888dffb261164d1f3c5a5d216c4a3f0d35 (patch)
tree	f91d45e28da8ef705e00d826c121a81851c32698
parent	c4d890090e20a2c3b7c6eb03feb2f6e6033c9caf (diff)
download	vt100-rust-1075c2888dffb261164d1f3c5a5d216c4a3f0d35.tar.gz vt100-rust-1075c2888dffb261164d1f3c5a5d216c4a3f0d35.zip