From 7f11c68a439e10e97bee7964efe844818cff9bd5 Mon Sep 17 00:00:00 2001
From: gotmor <gotmor@f2baff5b-bf2c-0410-a398-912abdc3d8b2>
Date: Thu, 12 Feb 2009 11:52:16 +0000
Subject: replaced libc strlen() with optimized strlen_utf8() version

git-svn-id: http://dzen.googlecode.com/svn/trunk@246 f2baff5b-bf2c-0410-a398-912abdc3d8b2
---
 draw.c | 56 ++++++++++++++++++++++++++++----------------------------
 dzen.h |  1 +
 main.c |  4 ++--
 util.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 87 insertions(+), 30 deletions(-)

diff --git a/draw.c b/draw.c
index 8c65078..54275aa 100644
--- a/draw.c
+++ b/draw.c
@@ -72,7 +72,7 @@ textnw(Fnt *font, const char *text, unsigned int len) {
 	}
 	return XTextWidth(font->xfont, text, len);
 #else
-	XftTextExtentsUtf8(dzen.dpy, dzen.font.xftfont, (unsigned const char *) text, strlen(text), dzen.font.extents);
+	XftTextExtentsUtf8(dzen.dpy, dzen.font.xftfont, (unsigned const char *) text, strlen_utf8(text), dzen.font.extents);
 	if(dzen.font.extents->height > dzen.font.height)
 		dzen.font.height = dzen.font.extents->height;
 	return dzen.font.extents->width;
@@ -153,9 +153,9 @@ setfont(const char *fontstr) {
 	if(!dzen.font.xftfont)
 	   fprintf(stderr, "error, cannot load font: '%s'\n", fontstr);
 	dzen.font.extents = malloc(sizeof(XGlyphInfo));
-	XftTextExtentsUtf8(dzen.dpy, dzen.font.xftfont, (unsigned const char *) fontstr, strlen(fontstr), dzen.font.extents);
+	XftTextExtentsUtf8(dzen.dpy, dzen.font.xftfont, (unsigned const char *) fontstr, strlen_utf8(fontstr), dzen.font.extents);
 	dzen.font.height = dzen.font.xftfont->ascent + dzen.font.xftfont->descent;
-	dzen.font.width = (dzen.font.extents->width)/strlen(fontstr);
+	dzen.font.width = (dzen.font.extents->width)/strlen_utf8(fontstr);
 #endif
 }
 
@@ -441,7 +441,7 @@ parse_line(const char *line, int lnr, int align, int reverse, int nodraw) {
 	}
 
 
-	for(i=0; (unsigned)i < strlen(line); i++) {
+	for(i=0; (unsigned)i < strlen_utf8(line); i++) {
 		if(*(line+i) == ESC_CHAR) {
 			lbuf[j] = '\0';
 
@@ -664,10 +664,10 @@ parse_line(const char *line, int lnr, int align, int reverse, int nodraw) {
 				}
 
 				/* check if text is longer than window's width */
-				ow = j; tw = textnw(cur_fnt, lbuf, strlen(lbuf));
+				ow = j; tw = textnw(cur_fnt, lbuf, strlen_utf8(lbuf));
 				while( ((tw + px) > (dzen.w - h)) && j>=0) {
 					lbuf[--j] = '\0';
-					tw = textnw(cur_fnt, lbuf, strlen(lbuf));
+					tw = textnw(cur_fnt, lbuf, strlen_utf8(lbuf));
 				}
 				if(j < ow) {
 					if(j > 1)
@@ -684,15 +684,15 @@ parse_line(const char *line, int lnr, int align, int reverse, int nodraw) {
 #ifndef DZEN_XFT
 				if(cur_fnt->set)
 					XmbDrawString(dzen.dpy, pm, cur_fnt->set,
-							dzen.tgc, px, py + cur_fnt->ascent, lbuf, strlen(lbuf));
+							dzen.tgc, px, py + cur_fnt->ascent, lbuf, strlen_utf8(lbuf));
 				else
-					XDrawString(dzen.dpy, pm, dzen.tgc, px, py+dzen.font.ascent, lbuf, strlen(lbuf));
+					XDrawString(dzen.dpy, pm, dzen.tgc, px, py+dzen.font.ascent, lbuf, strlen_utf8(lbuf));
 #else
 				XftColorAllocName(dzen.dpy, DefaultVisual(dzen.dpy, dzen.screen),
 						DefaultColormap(dzen.dpy, dzen.screen),  xftcs,  &xftc);
 
 				XftDrawStringUtf8(xftd, &xftc, 
-						cur_fnt->xftfont, px, py + dzen.font.xftfont->ascent, lbuf, strlen(lbuf));
+						cur_fnt->xftfont, px, py + dzen.font.xftfont->ascent, lbuf, strlen_utf8(lbuf));
 
 				if(xftcs_f) {
 					free(xftcs);
@@ -934,10 +934,10 @@ parse_line(const char *line, int lnr, int align, int reverse, int nodraw) {
 		}
 
 		/* check if text is longer than window's width */
-		ow = j; tw = textnw(cur_fnt, lbuf, strlen(lbuf));
+		ow = j; tw = textnw(cur_fnt, lbuf, strlen_utf8(lbuf));
 		while( ((tw + px) > (dzen.w - h)) && j>=0) {
 			lbuf[--j] = '\0';
-			tw = textnw(cur_fnt, lbuf, strlen(lbuf));
+			tw = textnw(cur_fnt, lbuf, strlen_utf8(lbuf));
 		}
 		if(j < ow) {
 			if(j > 1)
@@ -954,15 +954,15 @@ parse_line(const char *line, int lnr, int align, int reverse, int nodraw) {
 #ifndef DZEN_XFT
 		if(cur_fnt->set)
 			XmbDrawString(dzen.dpy, pm, cur_fnt->set,
-					dzen.tgc, px, py + cur_fnt->ascent, lbuf, strlen(lbuf));
+					dzen.tgc, px, py + cur_fnt->ascent, lbuf, strlen_utf8(lbuf));
 		else
-			XDrawString(dzen.dpy, pm, dzen.tgc, px, py+dzen.font.ascent, lbuf, strlen(lbuf));
+			XDrawString(dzen.dpy, pm, dzen.tgc, px, py+dzen.font.ascent, lbuf, strlen_utf8(lbuf));
 #else
 		XftColorAllocName(dzen.dpy, DefaultVisual(dzen.dpy, dzen.screen),
 				DefaultColormap(dzen.dpy, dzen.screen), xftcs,  &xftc);
 
 		XftDrawStringUtf8(xftd, &xftc, 
-				cur_fnt->xftfont, px, py + dzen.font.xftfont->ascent, lbuf, strlen(lbuf));
+				cur_fnt->xftfont, px, py + dzen.font.xftfont->ascent, lbuf, strlen_utf8(lbuf));
 
 		if(xftcs_f) {
 			free(xftcs);
@@ -1038,66 +1038,66 @@ parse_non_drawing_commands(char * text) {
 	if(!text)
 		return 1;
 
-	if(!strncmp(text, "^togglecollapse()", strlen("^togglecollapse()"))) {
+	if(!strncmp(text, "^togglecollapse()", strlen_utf8("^togglecollapse()"))) {
 		a_togglecollapse(NULL);
 		return 0;
 	}
-	if(!strncmp(text, "^collapse()", strlen("^collapse()"))) {
+	if(!strncmp(text, "^collapse()", strlen_utf8("^collapse()"))) {
 		a_collapse(NULL);
 		return 0;
 	}
-	if(!strncmp(text, "^uncollapse()", strlen("^uncollapse()"))) {
+	if(!strncmp(text, "^uncollapse()", strlen_utf8("^uncollapse()"))) {
 		a_uncollapse(NULL);
 		return 0;
 	}
 
-	if(!strncmp(text, "^togglestick()", strlen("^togglestick()"))) {
+	if(!strncmp(text, "^togglestick()", strlen_utf8("^togglestick()"))) {
 		a_togglestick(NULL);
 		return 0;
 	}
-	if(!strncmp(text, "^stick()", strlen("^stick()"))) {
+	if(!strncmp(text, "^stick()", strlen_utf8("^stick()"))) {
 		a_stick(NULL);
 		return 0;
 	}
-	if(!strncmp(text, "^unstick()", strlen("^unstick()"))) {
+	if(!strncmp(text, "^unstick()", strlen_utf8("^unstick()"))) {
 		a_unstick(NULL);
 		return 0;
 	}
 
-	if(!strncmp(text, "^togglehide()", strlen("^togglehide()"))) {
+	if(!strncmp(text, "^togglehide()", strlen_utf8("^togglehide()"))) {
 		a_togglehide(NULL);
 		return 0;
 	}
-	if(!strncmp(text, "^hide()", strlen("^ide()"))) {
+	if(!strncmp(text, "^hide()", strlen_utf8("^ide()"))) {
 		a_hide(NULL);
 		return 0;
 	}
-	if(!strncmp(text, "^unhide()", strlen("^unhide()"))) {
+	if(!strncmp(text, "^unhide()", strlen_utf8("^unhide()"))) {
 		a_unhide(NULL);
 		return 0;
 	}
 
-	if(!strncmp(text, "^raise()", strlen("^raise()"))) {
+	if(!strncmp(text, "^raise()", strlen_utf8("^raise()"))) {
 		a_raise(NULL);
 		return 0;
 	}
 
-	if(!strncmp(text, "^lower()", strlen("^lower()"))) {
+	if(!strncmp(text, "^lower()", strlen_utf8("^lower()"))) {
 		a_lower(NULL);
 		return 0;
 	}
 
-	if(!strncmp(text, "^scrollhome()", strlen("^scrollhome()"))) {
+	if(!strncmp(text, "^scrollhome()", strlen_utf8("^scrollhome()"))) {
 		a_scrollhome(NULL);
 		return 0;
 	}
 
-	if(!strncmp(text, "^scrollend()", strlen("^scrollend()"))) {
+	if(!strncmp(text, "^scrollend()", strlen_utf8("^scrollend()"))) {
 		a_scrollend(NULL);
 		return 0;
 	}
 
-	if(!strncmp(text, "^exit()", strlen("^exit()"))) {
+	if(!strncmp(text, "^exit()", strlen_utf8("^exit()"))) {
 		a_exit(NULL);
 		return 0;
 	}
diff --git a/dzen.h b/dzen.h
index 9d17502..294953a 100644
--- a/dzen.h
+++ b/dzen.h
@@ -162,3 +162,4 @@ extern void *emalloc(unsigned int size);		/* allocates memory, exits on error */
 extern void eprint(const char *errstr, ...);	/* prints errstr and exits with 1 */
 extern char *estrdup(const char *str);			/* duplicates str, exits on allocation error */
 extern void spawn(const char *arg);             /* execute arg */
+extern size_t strlen_utf8(const char * _s);
diff --git a/main.c b/main.c
index f9bb44a..2dcedfd 100644
--- a/main.c
+++ b/main.c
@@ -104,8 +104,8 @@ chomp(char *inbuf, char *outbuf, int start, int len) {
 	int off=start;
 
 	if(rem) {
-		strncpy(outbuf, rem, strlen(rem));
-		i += strlen(rem);
+		strncpy(outbuf, rem, strlen_utf8(rem));
+		i += strlen_utf8(rem);
 		free(rem);
 		rem = NULL;
 	}
diff --git a/util.c b/util.c
index ced6368..b931b35 100644
--- a/util.c
+++ b/util.c
@@ -13,6 +13,8 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
+#define ONEMASK ((size_t)(-1) / 0xFF)
+
 void *
 emalloc(unsigned int size) {
 	void *res = malloc(size);
@@ -62,3 +64,57 @@ spawn(const char *arg) {
 	wait(0);
 }
 
+/* http://www.daemonology.net/blog/2008-06-05-faster-utf8-strlen.html */
+size_t
+strlen_utf8(const char * _s)
+{
+	const char * s;
+	size_t count = 0;
+	size_t u;
+	unsigned char b;
+
+	/* Handle any initial misaligned bytes. */
+	for (s = _s; (uintptr_t)(s) & (sizeof(size_t) - 1); s++) {
+		b = *s;
+
+		/* Exit if we hit a zero byte. */
+		if (b == '\0')
+			goto done;
+
+		/* Is this byte NOT the first byte of a character? */
+		count += (b >> 7) & ((~b) >> 6);
+	}
+
+	/* Handle complete blocks. */
+	for (; ; s += sizeof(size_t)) {
+		/* Prefetch 256 bytes ahead. */
+		__builtin_prefetch(&s[256], 0, 0);
+
+		/* Grab 4 or 8 bytes of UTF-8 data. */
+		u = *(size_t *)(s);
+
+		/* Exit the loop if there are any zero bytes. */
+		if ((u - ONEMASK) & (~u) & (ONEMASK * 0x80))
+			break;
+
+		/* Count bytes which are NOT the first byte of a character. */
+		u = ((u & (ONEMASK * 0x80)) >> 7) & ((~u) >> 6);
+		count += (u * ONEMASK) >> ((sizeof(size_t) - 1) * 8);
+	}
+
+	/* Take care of any left-over bytes. */
+	for (; ; s++) {
+		b = *s;
+
+		/* Exit if we hit a zero byte. */
+		if (b == '\0')
+			break;
+
+		/* Is this byte NOT the first byte of a character? */
+		count += (b >> 7) & ((~b) >> 6);
+	}
+
+done:
+	return ((s - _s) - count);
+}
+
-- 
cgit v1.2.3