From f45eaee6e00af151f4c6ae32e7bdadbefa0b81b4 Mon Sep 17 00:00:00 2001 From: gotmor Date: Thu, 12 Feb 2009 23:32:53 +0000 Subject: reverted strlen_utf8() change. now we use default strlen() again git-svn-id: http://dzen.googlecode.com/svn/trunk@248 f2baff5b-bf2c-0410-a398-912abdc3d8b2 --- util.c | 54 ------------------------------------------------------ 1 file changed, 54 deletions(-) (limited to 'util.c') diff --git a/util.c b/util.c index b931b35..4569aa9 100644 --- a/util.c +++ b/util.c @@ -64,57 +64,3 @@ spawn(const char *arg) { wait(0); } -/* http://www.daemonology.net/blog/2008-06-05-faster-utf8-strlen.html */ -size_t -strlen_utf8(const char * _s) -{ - const char * s; - size_t count = 0; - size_t u; - unsigned char b; - - /* Handle any initial misaligned bytes. */ - for (s = _s; (uintptr_t)(s) & (sizeof(size_t) - 1); s++) { - b = *s; - - /* Exit if we hit a zero byte. */ - if (b == '\0') - goto done; - - /* Is this byte NOT the first byte of a character? */ - count += (b >> 7) & ((~b) >> 6); - } - - /* Handle complete blocks. */ - for (; ; s += sizeof(size_t)) { - /* Prefetch 256 bytes ahead. */ - __builtin_prefetch(&s[256], 0, 0); - - /* Grab 4 or 8 bytes of UTF-8 data. */ - u = *(size_t *)(s); - - /* Exit the loop if there are any zero bytes. */ - if ((u - ONEMASK) & (~u) & (ONEMASK * 0x80)) - break; - - /* Count bytes which are NOT the first byte of a character. */ - u = ((u & (ONEMASK * 0x80)) >> 7) & ((~u) >> 6); - count += (u * ONEMASK) >> ((sizeof(size_t) - 1) * 8); - } - - /* Take care of any left-over bytes. */ - for (; ; s++) { - b = *s; - - /* Exit if we hit a zero byte. */ - if (b == '\0') - break; - - /* Is this byte NOT the first byte of a character? */ - count += (b >> 7) & ((~b) >> 6); - } - -done: - return ((s - _s) - count); -} - -- cgit v1.2.3-54-g00ecf