From 1075c2888dffb261164d1f3c5a5d216c4a3f0d35 Mon Sep 17 00:00:00 2001 From: Jesse Luehrs Date: Sat, 7 Dec 2019 02:01:53 -0500 Subject: actually, remove normalization entirely normalization has some weird edge cases that cause incorrect behavior, since we aren't implementing full grapheme segmentation (for instance, a single codepoint can be normalized into three different codepoints, and there are codepoints that are combining characters but have width != 0) --- Cargo.toml | 4 ---- src/cell.rs | 39 ----------------------------------- tests/data/fixtures/combining/11.json | 4 ++-- tests/data/fixtures/combining/2.json | 4 ++-- tests/data/fixtures/combining/4.json | 4 ++-- tests/data/fixtures/combining/5.json | 4 ++-- tests/data/fixtures/combining/6.json | 4 ++-- tests/data/fixtures/utf8.in | 1 + tests/data/fixtures/utf8/2.json | 12 +++++++++++ tests/data/fixtures/utf8/2.typescript | 1 + tests/text.rs | 1 - 11 files changed, 24 insertions(+), 54 deletions(-) create mode 100644 tests/data/fixtures/utf8/2.json create mode 100644 tests/data/fixtures/utf8/2.typescript diff --git a/Cargo.toml b/Cargo.toml index 9e60834..14d7774 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,14 +12,10 @@ keywords = ["terminal", "vt100"] categories = ["command-line-interface", "encoding"] license = "MIT" -[features] -default = ["unicode-normalization"] - [dependencies] itoa = "0.4" enumset = "0.4" log = "0.4" -unicode-normalization = { version = "0.1", optional = true } unicode-width = "0.1" vte = "0.3" diff --git a/src/cell.rs b/src/cell.rs index f9f9056..45e795b 100644 --- a/src/cell.rs +++ b/src/cell.rs @@ -51,45 +51,6 @@ impl Cell { self.contents[self.len()] = c; self.len += 1; - - self.normalize(); - } - - #[cfg(not(feature = "unicode-normalization"))] - #[inline] - fn normalize(&mut self) {} - - #[cfg(feature = "unicode-normalization")] - #[inline] - fn normalize(&mut self) { - use unicode_normalization::UnicodeNormalization as _; - - // some fonts have combined characters but can't render combining - // characters correctly, so try to prefer precombined characters when - // possible - if unicode_normalization::is_nfc_quick( - self.contents.iter().copied().take(CODEPOINTS_IN_CELL), - ) == unicode_normalization::IsNormalized::Yes - { - return; - } - - let mut new_contents = ['\x00'; CODEPOINTS_IN_CELL]; - let mut new_len = 0; - for c in self - .contents - .iter() - .copied() - .take(self.len()) - .nfc() - .take(CODEPOINTS_IN_CELL) - { - new_contents[new_len as usize] = c; - new_len += 1; - } - self.contents = new_contents; - self.len = new_len; - self.set_wide(new_contents[0].width().unwrap_or(0) > 1); } pub(crate) fn clear(&mut self, attrs: crate::attrs::Attrs) { diff --git a/tests/data/fixtures/combining/11.json b/tests/data/fixtures/combining/11.json index e973c78..6757b45 100644 --- a/tests/data/fixtures/combining/11.json +++ b/tests/data/fixtures/combining/11.json @@ -1,5 +1,5 @@ { - "contents": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaá", + "contents": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaá", "cells": { "0,0": { "contents": "a" @@ -233,7 +233,7 @@ "contents": "a" }, "0,79": { - "contents": "á" + "contents": "á" }, "0,8": { "contents": "a" diff --git a/tests/data/fixtures/combining/2.json b/tests/data/fixtures/combining/2.json index 6dce798..8317208 100644 --- a/tests/data/fixtures/combining/2.json +++ b/tests/data/fixtures/combining/2.json @@ -1,8 +1,8 @@ { - "contents": "á", + "contents": "á", "cells": { "0,0": { - "contents": "á" + "contents": "á" } }, "cursor_position": [ diff --git a/tests/data/fixtures/combining/4.json b/tests/data/fixtures/combining/4.json index 4044d4d..74a34b1 100644 --- a/tests/data/fixtures/combining/4.json +++ b/tests/data/fixtures/combining/4.json @@ -1,5 +1,5 @@ { - "contents": "abcdéfg", + "contents": "abcdéfg", "cells": { "0,0": { "contents": "a" @@ -14,7 +14,7 @@ "contents": "d" }, "0,4": { - "contents": "é" + "contents": "é" }, "0,5": { "contents": "f" diff --git a/tests/data/fixtures/combining/5.json b/tests/data/fixtures/combining/5.json index 67600d7..f655d0f 100644 --- a/tests/data/fixtures/combining/5.json +++ b/tests/data/fixtures/combining/5.json @@ -1,5 +1,5 @@ { - "contents": "abcdéfg\n\n\n\n\n\n\n\n\n aaa", + "contents": "abcdéfg\n\n\n\n\n\n\n\n\n aaa", "cells": { "0,0": { "contents": "a" @@ -14,7 +14,7 @@ "contents": "d" }, "0,4": { - "contents": "é" + "contents": "é" }, "0,5": { "contents": "f" diff --git a/tests/data/fixtures/combining/6.json b/tests/data/fixtures/combining/6.json index cbb481a..c0ed465 100644 --- a/tests/data/fixtures/combining/6.json +++ b/tests/data/fixtures/combining/6.json @@ -1,5 +1,5 @@ { - "contents": "abcdéfg\n\n\n\n\n\n\n\n\n aaa", + "contents": "abcdéfg\n\n\n\n\n\n\n\n\n aaa", "cells": { "0,0": { "contents": "a" @@ -14,7 +14,7 @@ "contents": "d" }, "0,4": { - "contents": "é" + "contents": "é" }, "0,5": { "contents": "f" diff --git a/tests/data/fixtures/utf8.in b/tests/data/fixtures/utf8.in index 572eb43..c7efa1e 100644 --- a/tests/data/fixtures/utf8.in +++ b/tests/data/fixtures/utf8.in @@ -1 +1,2 @@ café +\x1bc\xf0\x9d\x87\x80\xe1\x9c\x92 diff --git a/tests/data/fixtures/utf8/2.json b/tests/data/fixtures/utf8/2.json new file mode 100644 index 0000000..faaa824 --- /dev/null +++ b/tests/data/fixtures/utf8/2.json @@ -0,0 +1,12 @@ +{ + "contents": "𝆺𝅥𝅯ᜒ", + "cells": { + "0,0": { + "contents": "𝆺𝅥𝅯ᜒ" + } + }, + "cursor_position": [ + 0, + 1 + ] +} \ No newline at end of file diff --git a/tests/data/fixtures/utf8/2.typescript b/tests/data/fixtures/utf8/2.typescript new file mode 100644 index 0000000..14d4c1f --- /dev/null +++ b/tests/data/fixtures/utf8/2.typescript @@ -0,0 +1 @@ +c𝆺𝅥𝅯ᜒ \ No newline at end of file diff --git a/tests/text.rs b/tests/text.rs index 7e53317..e413100 100644 --- a/tests/text.rs +++ b/tests/text.rs @@ -20,7 +20,6 @@ fn wide() { helpers::fixture("wide"); } -#[cfg(feature = "unicode-normalization")] #[test] fn combining() { helpers::fixture("combining"); -- cgit v1.2.3-54-g00ecf