update to Unicode 5

SVN=126184
2008-07-07 14:07:46 -07:00 · 2008-07-07 14:07:46 -07:00 · 5b904a3bde
commit 5b904a3bde
parent 0d079a5362
24 changed files with 1520 additions and 1256 deletions
--- a/src/lib/math/asin.go
+++ b/src/lib/math/asin.go
@ -34,7 +34,7 @@ asin(arg double)double
 		sign = true;
 	}
 	if arg > 1 {
-		panic "return sys.NaN()";
+		return sys.NaN();
 	}
 	temp = sqrt(1 - x*x);
@ -54,7 +54,7 @@ func
 acos(arg double)double
 {
 	if(arg > 1 || arg < -1) {
-		panic "return sys.NaN()";
+		return sys.NaN();
 	}
 	return pio2 - asin(arg);
 }
--- a/src/lib/math/exp.go
+++ b/src/lib/math/exp.go
@ -40,7 +40,7 @@ exp(arg double) double
 		return 0.;
 	}
 	if arg > maxf {
-		panic "return sys.Inf(1)"
+		return sys.Inf(1)
 	}
 	x = arg*log2e;
--- a/src/lib/math/log.go
+++ b/src/lib/math/log.go
@ -36,7 +36,7 @@ log(arg double) double
 	var exp int;
 	if arg <= 0 {
-		panic "return sys.NaN()";
+		return sys.NaN();
 	}
 	exp,x = sys.frexp(arg);
@ -63,7 +63,7 @@ log10(arg double) double
 {
 	if arg <= 0 {
-		panic "return sys.NaN()";
+		return sys.NaN();
 	}
 	return log(arg) * ln10o1;
 }
--- a/src/lib/math/main.go
+++ b/src/lib/math/main.go
@ -5,7 +5,25 @@
 package main
-import math "math"
+//import math "math"
 //////////////////
 import math "asin"
 import math "atan"
 import math "atan2"
 import math "exp"
 import math "fabs"
 import math "floor"
 import math "fmod"
 import math "hypot"
 import math "log"
 import math "pow"
 import math "pow10"
 import math "sin"
 import math "sinh"
 import math "sqrt"
 import math "tan"
 import math "tanh"
 const
 (
--- a/src/lib/math/pow.go
+++ b/src/lib/math/pow.go
@ -26,14 +26,14 @@ pow(arg1,arg2 double) double
 	if arg1 <= 0 {
 		if(arg1 == 0) {
 			if arg2 <= 0 {
-				panic "return sys.NaN()";
+				return sys.NaN();
 			}
 			return 0;
 		}
 		temp = floor(arg2);
 		if temp != arg2 {
-			panic "return sys.NaN()";
+			panic sys.NaN();
 		}
 		l = long(temp);
--- a/src/lib/math/sinh.go
+++ b/src/lib/math/sinh.go
@ -48,7 +48,7 @@ sinh(arg double) double
 		temp = exp(arg)/2;
 	case arg > 0.5:
-//		temp = (exp(arg) - exp(-arg))/2;
+		temp = (exp(arg) - exp(-arg))/2;
 	default:
 		argsq = arg*arg;
@ -71,5 +71,5 @@ cosh(arg double) double
 	if arg > 21 {
 		return exp(arg)/2;
 	}
-//	return (exp(arg) + exp(-arg))/2;
+	return (exp(arg) + exp(-arg))/2;
 }
--- a/src/lib/math/sqrt.go
+++ b/src/lib/math/sqrt.go
@ -19,11 +19,10 @@ sqrt(arg double) double
 	var x, temp double;
 	var exp, i int;
 /* BUG: NO isINF
 	if sys.isInf(arg, 1) {
 		return arg;
 	}
-*/
+
 	if arg <= 0 {
 		if arg < 0 {
 			panic "return sys.NaN()"
--- a/src/lib/math/tan.go
+++ b/src/lib/math/tan.go
@ -62,7 +62,7 @@ tan(arg double) double
 	if flag {
 		if(temp == 0) {
-			panic "return sys.NaN()";
+			panic sys.NaN();
 		}
 		temp = 1/temp;
 	}
--- a/src/lib9/utf/mkrunetype.c
+++ b/src/lib9/utf/mkrunetype.c
@ -0,0 +1,733 @@
 // Copyright 2009 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 /*
 * make is(upper|lower|title|space|alpha)rune and
 * to(upper|lower|title)rune from a UnicodeData.txt file.
 * these can be found at unicode.org
 *
 * with -c, runs a check of the existing runetype functions vs.
 * those extracted from UnicodeData.
 *
 * with -p, generates tables for pairs of chars, as well as for ranges
 * and singletons.
 *
 * UnicodeData defines 4 fields of interest:
 * 1) a category
 * 2) an upper case mapping
 * 3) a lower case mapping
 * 4) a title case mapping
 *
 * toupper, tolower, and totitle are defined directly from the mapping.
 *
 * isalpharune(c) is true iff c is a "letter" category
 * isupperrune(c) is true iff c is the target of toupperrune,
 *	or is in the uppercase letter category
 * similarly for islowerrune and istitlerune.
 * isspacerune is true for space category chars, "C" locale white space chars,
 *	and two additions:
 *	0085	"next line" control char
 *	feff]	"zero-width non-break space"
 * isdigitrune is true iff c is a numeric-digit category.
 */
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>
 #include <string.h>
 #include <libgen.h>
 #include "utf.h"
 #include "utfdef.h"
 enum {
 	/*
 	 * fields in the unicode data file
 	 */
 	FIELD_CODE,
 	FIELD_NAME,
 	FIELD_CATEGORY,
 	FIELD_COMBINING,
 	FIELD_BIDIR,
 	FIELD_DECOMP,
 	FIELD_DECIMAL_DIG,
 	FIELD_DIG,
 	FIELD_NUMERIC_VAL,
 	FIELD_MIRRORED,
 	FIELD_UNICODE_1_NAME,
 	FIELD_COMMENT,
 	FIELD_UPPER,
 	FIELD_LOWER,
 	FIELD_TITLE,
 	NFIELDS,
 	MAX_LINE	= 1024,
 	TO_OFFSET	= 1 << 20,
 	NRUNES		= 1 << 21,
 };
 #define TO_DELTA(xmapped,x)	(TO_OFFSET + (xmapped) - (x))
 static char	myisspace[NRUNES];
 static char	myisalpha[NRUNES];
 static char	myisdigit[NRUNES];
 static char	myisupper[NRUNES];
 static char	myislower[NRUNES];
 static char	myistitle[NRUNES];
 static int	mytoupper[NRUNES];
 static int	mytolower[NRUNES];
 static int	mytotitle[NRUNES];
 static void	check(void);
 static void	mktables(char *src, int usepairs);
 static void	fatal(const char *fmt, ...);
 static int	mygetfields(char **fields, int nfields, char *str, const char *delim);
 static int	getunicodeline(FILE *in, char **fields, char *buf);
 static int	getcode(char *s);
 static void
 usage(void)
 {
 	fprintf(stderr, "usage: mktables [-cp] <UnicodeData.txt>\n");
 	exit(1);
 }
 int
 main(int argc, char *argv[]){
 	FILE *in;
 	char buf[MAX_LINE], buf2[MAX_LINE];
 	char *fields[NFIELDS + 1], *fields2[NFIELDS + 1];
 	char *p;
 	int i, code, last, docheck, usepairs;
 	docheck = 0;
 	usepairs = 0;
 	ARGBEGIN{
 	case 'c':
 		docheck = 1;
 		break;
 	case 'p':
 		usepairs = 1;
 		break;
 	default:
 		usage();
 	}ARGEND
 	if(argc != 1){
 		usage();
 	}
 	in = fopen(argv[0], "r");
 	if(in == NULL){
 		fatal("can't open %s", argv[0]);
 	}
 	for(i = 0; i < NRUNES; i++){
 		mytoupper[i] = i;
 		mytolower[i] = i;
 		mytotitle[i] = i;
 	}
 	/*
 	 * make sure isspace has all of the "C" locale whitespace chars
 	 */
 	myisspace['\t'] = 1;
 	myisspace['\n'] = 1;
 	myisspace['\r'] = 1;
 	myisspace['\f'] = 1;
 	myisspace['\v'] = 1;
 	/*
 	 * a couple of other exceptions
 	 */
 	myisspace[0x85] = 1;	/* control char, "next line" */
 	myisspace[0xfeff] = 1;	/* zero-width non-break space */
 	last = -1;
 	while(getunicodeline(in, fields, buf)){
 		code = getcode(fields[FIELD_CODE]);
                if (code >= NRUNES)
                  fatal("code-point value too big: %x", code);
 		if(code <= last)
 			fatal("bad code sequence: %x then %x", last, code);
 		last = code;
 		/*
 		 * check for ranges
 		 */
 		p = fields[FIELD_CATEGORY];
 		if(strstr(fields[FIELD_NAME], ", First>") != NULL){
 			if(!getunicodeline(in, fields2, buf2))
 				fatal("range start at eof");
 			if (strstr(fields2[FIELD_NAME], ", Last>") == NULL)
 				fatal("range start not followed by range end");
 			last = getcode(fields2[FIELD_CODE]);
 			if(last <= code)
 				fatal("range out of sequence: %x then %x", code, last);
 			if(strcmp(p, fields2[FIELD_CATEGORY]) != 0)
 				fatal("range with mismatched category");
 		}
 		/*
 		 * set properties and conversions
 		 */
 		for (; code <= last; code++){
 			if(p[0] == 'L')
 				myisalpha[code] = 1;
 			if(p[0] == 'Z')
 				myisspace[code] = 1;
 			if(strcmp(p, "Lu") == 0)
 				myisupper[code] = 1;
 			if(strcmp(p, "Ll") == 0)
 				myislower[code] = 1;
 			if(strcmp(p, "Lt") == 0)
 				myistitle[code] = 1;
 			if(strcmp(p, "Nd") == 0)
 				myisdigit[code] = 1;
 			/*
 			 * when finding conversions, also need to mark
 			 * upper/lower case, since some chars, like
 			 * "III" (0x2162), aren't defined as letters but have a
 			 * lower case mapping ("iii" (0x2172)).
 			 */
 			if(fields[FIELD_UPPER][0] != '\0'){
 				mytoupper[code] = getcode(fields[FIELD_UPPER]);
 			}
 			if(fields[FIELD_LOWER][0] != '\0'){
 				mytolower[code] = getcode(fields[FIELD_LOWER]);
 			}
 			if(fields[FIELD_TITLE][0] != '\0'){
 				mytotitle[code] = getcode(fields[FIELD_TITLE]);
 			}
 		}
 	}
 	fclose(in);
 	/*
 	 * check for codes with no totitle mapping but a toupper mapping.
 	 * these appear in UnicodeData-2.0.14.txt, but are almost certainly
 	 * erroneous.
 	 */
 	for(i = 0; i < NRUNES; i++){
 		if(mytotitle[i] == i
 		&& mytoupper[i] != i
 		&& !myistitle[i])
 			fprintf(stderr, "warning: code=%.4x not istitle, totitle is same, toupper=%.4x\n", i, mytoupper[i]);
 	}
 	/*
 	 * make sure isupper[c] is true if for some x toupper[x]  == c
 	 * ditto for islower and istitle
 	 */
 	for(i = 0; i < NRUNES; i++) {
 		if(mytoupper[i] != i)
 			myisupper[mytoupper[i]] = 1;
 		if(mytolower[i] != i)
 			myislower[mytolower[i]] = 1;
 		if(mytotitle[i] != i)
 			myistitle[mytotitle[i]] = 1;
 	}
 	if(docheck){
 		check();
 	}else{
 		mktables(argv[0], usepairs);
 	}
 	return 0;
 }
 /*
 * generate a properties array for ranges, clearing those cases covered.
 * if force, generate one-entry ranges for singletons.
 */
 static int
 mkisrange(const char* label, char* prop, int force)
 {
 	int start, stop, some;
 	/*
 	 * first, the ranges
 	 */
 	some = 0;
 	for(start = 0; start < NRUNES; ) {
 		if(!prop[start]){
 			start++;
 			continue;
 		}
 		for(stop = start + 1; stop < NRUNES; stop++){
 			if(!prop[stop]){
 				break;
 			}
 			prop[stop] = 0;
 		}
 		if(force || stop != start + 1){
 			if(!some){
 				printf("static Rune __is%sr[] = {\n", label);
 				some = 1;
 			}
 			prop[start] = 0;
 			printf("\t0x%.4x, 0x%.4x,\n", start, stop - 1);
 		}
 		start = stop;
 	}
 	if(some)
 		printf("};\n\n");
 	return some;
 }
 /*
 * generate a mapping array for pairs with a skip between,
 * clearing those entries covered.
 */
 static int
 mkispair(const char *label, char *prop)
 {
 	int start, stop, some;
 	some = 0;
 	for(start = 0; start + 2 < NRUNES; ) {
 		if(!prop[start]){
 			start++;
 			continue;
 		}
 		for(stop = start + 2; stop < NRUNES; stop += 2){
 			if(!prop[stop]){
 				break;
 			}
 			prop[stop] = 0;
 		}
 		if(stop != start + 2){
 			if(!some){
 				printf("static Rune __is%sp[] = {\n", label);
 				some = 1;
 			}
 			prop[start] = 0;
 			printf("\t0x%.4x, 0x%.4x,\n", start, stop - 2);
 		}
 		start = stop;
 	}
 	if(some)
 		printf("};\n\n");
 	return some;
 }
 /*
 * generate a properties array for singletons, clearing those cases covered.
 */
 static int
 mkissingle(const char *label, char *prop)
 {
 	int start, some;
 	some = 0;
 	for(start = 0; start < NRUNES; start++) {
 		if(!prop[start]){
 			continue;
 		}
 		if(!some){
 			printf("static Rune __is%ss[] = {\n", label);
 			some = 1;
 		}
 		prop[start] = 0;
 		printf("\t0x%.4x,\n", start);
 	}
 	if(some)
 		printf("};\n\n");
 	return some;
 }
 /*
 * generate tables and a function for is<label>rune
 */
 static void
 mkis(const char* label, char* prop, int usepairs)
 {
 	int isr, isp, iss;
 	isr = mkisrange(label, prop, 0);
 	isp = 0;
 	if(usepairs)
 		isp = mkispair(label, prop);
 	iss = mkissingle(label, prop);
 	printf(
 		"int\n"
 		"is%srune(Rune c)\n"
 		"{\n"
 		"	Rune *p;\n"
 		"\n",
 		label);
 	if(isr)
 		printf(
 			"	p = rbsearch(c, __is%sr, nelem(__is%sr)/2, 2);\n"
 			"	if(p && c >= p[0] && c <= p[1])\n"
 			"		return 1;\n",
 			label, label);
 	if(isp)
 		printf(
 			"	p = rbsearch(c, __is%sp, nelem(__is%sp)/2, 2);\n"
 			"	if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
 			"		return 1;\n",
 			label, label);
 	if(iss)
 		printf(
 			"	p = rbsearch(c, __is%ss, nelem(__is%ss), 1);\n"
 			"	if(p && c == p[0])\n"
 			"		return 1;\n",
 			label, label);
 	printf(
 		"	return 0;\n"
 		"}\n"
 		"\n"
 	);
 }
 /*
 * generate a mapping array for ranges, clearing those entries covered.
 * if force, generate one-entry ranges for singletons.
 */
 static int
 mktorange(const char* label, int* map, int force)
 {
 	int start, stop, delta, some;
 	some = 0;
 	for(start = 0; start < NRUNES; ) {
 		if(map[start] == start){
 			start++;
 			continue;
 		}
 		delta = TO_DELTA(map[start], start);
 		if(delta != (Rune)delta)
 			fatal("bad map delta %d", delta);
 		for(stop = start + 1; stop < NRUNES; stop++){
 			if(TO_DELTA(map[stop], stop) != delta){
 				break;
 			}
 			map[stop] = stop;
 		}
 		if(stop != start + 1){
 			if(!some){
 				printf("static Rune __to%sr[] = {\n", label);
 				some = 1;
 			}
 			map[start] = start;
 			printf("\t0x%.4x, 0x%.4x, %d,\n", start, stop - 1, delta);
 		}
 		start = stop;
 	}
 	if(some)
 		printf("};\n\n");
 	return some;
 }
 /*
 * generate a mapping array for pairs with a skip between,
 * clearing those entries covered.
 */
 static int
 mktopair(const char* label, int* map)
 {
 	int start, stop, delta, some;
 	some = 0;
 	for(start = 0; start + 2 < NRUNES; ) {
 		if(map[start] == start){
 			start++;
 			continue;
 		}
 		delta = TO_DELTA(map[start], start);
 		if(delta != (Rune)delta)
 			fatal("bad map delta %d", delta);
 		for(stop = start + 2; stop < NRUNES; stop += 2){
 			if(TO_DELTA(map[stop], stop) != delta){
 				break;
 			}
 			map[stop] = stop;
 		}
 		if(stop != start + 2){
 			if(!some){
 				printf("static Rune __to%sp[] = {\n", label);
 				some = 1;
 			}
 			map[start] = start;
 			printf("\t0x%.4x, 0x%.4x, %d,\n", start, stop - 2, delta);
 		}
 		start = stop;
 	}
 	if(some)
 		printf("};\n\n");
 	return some;
 }
 /*
 * generate a mapping array for singletons, clearing those entries covered.
 */
 static int
 mktosingle(const char* label, int* map)
 {
 	int start, delta, some;
 	some = 0;
 	for(start = 0; start < NRUNES; start++) {
 		if(map[start] == start){
 			continue;
 		}
 		delta = TO_DELTA(map[start], start);
 		if(delta != (Rune)delta)
 			fatal("bad map delta %d", delta);
 		if(!some){
 			printf("static Rune __to%ss[] = {\n", label);
 			some = 1;
 		}
 		map[start] = start;
 		printf("\t0x%.4x, %d,\n", start, delta);
 	}
 	if(some)
 		printf("};\n\n");
 	return some;
 }
 /*
 * generate tables and a function for to<label>rune
 */
 static void
 mkto(const char* label, int* map, int usepairs)
 {
 	int tor, top, tos;
 	tor = mktorange(label, map, 0);
 	top = 0;
 	if(usepairs)
 		top = mktopair(label, map);
 	tos = mktosingle(label, map);
 	printf(
 		"Rune\n"
 		"to%srune(Rune c)\n"
 		"{\n"
 		"	Rune *p;\n"
 		"\n",
 		label);
 	if(tor)
 		printf(
 			"	p = rbsearch(c, __to%sr, nelem(__to%sr)/3, 3);\n"
 			"	if(p && c >= p[0] && c <= p[1])\n"
 			"		return c + p[2] - %d;\n",
 			label, label, TO_OFFSET);
 	if(top)
 		printf(
 			"	p = rbsearch(c, __to%sp, nelem(__to%sp)/3, 3);\n"
 			"	if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))\n"
 			"		return c + p[2] - %d;\n",
 			label, label, TO_OFFSET);
 	if(tos)
 		printf(
 			"	p = rbsearch(c, __to%ss, nelem(__to%ss)/2, 2);\n"
 			"	if(p && c == p[0])\n"
 			"		return c + p[1] - %d;\n",
 			label, label, TO_OFFSET);
 	printf(
 		"	return c;\n"
 		"}\n"
 		"\n"
 	);
 }
 // Make only range tables and a function for is<label>rune.
 static void
 mkisronly(const char* label, char* prop) {
 	mkisrange(label, prop, 1);
 	printf(
 		"int\n"
 		"is%srune(Rune c)\n"
 		"{\n"
 		"	Rune *p;\n"
 		"\n"
 		"	p = rbsearch(c, __is%sr, nelem(__is%sr)/2, 2);\n"
 		"	if(p && c >= p[0] && c <= p[1])\n"
 		"		return 1;\n"
 		"	return 0;\n"
 		"}\n"
 		"\n",
 	        label, label, label);
 }
 /*
 * generate the body of runetype.
 * assumes there is a function Rune* rbsearch(Rune c, Rune *t, int n, int ne);
 */
 static void
 mktables(char *src, int usepairs)
 {
 	printf("/* generated automatically by mkrunetype.c from %s */\n\n",
 		basename(src));
 	/*
 	 * we special case the space and digit tables, since they are assumed
 	 * to be small with several ranges.
 	 */
 	mkisronly("space", myisspace);
 	mkisronly("digit", myisdigit);
 	mkis("alpha", myisalpha, 0);
 	mkis("upper", myisupper, usepairs);
 	mkis("lower", myislower, usepairs);
 	mkis("title", myistitle, usepairs);
 	mkto("upper", mytoupper, usepairs);
 	mkto("lower", mytolower, usepairs);
 	mkto("title", mytotitle, usepairs);
 }
 /*
 * find differences between the newly generated tables and current runetypes.
 */
 static void
 check(void)
 {
 	int i;
 	for(i = 0; i < NRUNES; i++){
 		if(isdigitrune(i) != myisdigit[i])
 			fprintf(stderr, "isdigit diff at %x: runetype=%x, unicode=%x\n",
 				i, isdigitrune(i), myisdigit[i]);
 		if(isspacerune(i) != myisspace[i])
 			fprintf(stderr, "isspace diff at %x: runetype=%x, unicode=%x\n",
 				i, isspacerune(i), myisspace[i]);
 		if(isupperrune(i) != myisupper[i])
 			fprintf(stderr, "isupper diff at %x: runetype=%x, unicode=%x\n",
 				i, isupperrune(i), myisupper[i]);
 		if(islowerrune(i) != myislower[i])
 			fprintf(stderr, "islower diff at %x: runetype=%x, unicode=%x\n",
 				i, islowerrune(i), myislower[i]);
 		if(isalpharune(i) != myisalpha[i])
 			fprintf(stderr, "isalpha diff at %x: runetype=%x, unicode=%x\n",
 				i, isalpharune(i), myisalpha[i]);
 		if(toupperrune(i) != mytoupper[i])
 			fprintf(stderr, "toupper diff at %x: runetype=%x, unicode=%x\n",
 				i, toupperrune(i), mytoupper[i]);
 		if(tolowerrune(i) != mytolower[i])
 			fprintf(stderr, "tolower diff at %x: runetype=%x, unicode=%x\n",
 				i, tolowerrune(i), mytolower[i]);
 		if(istitlerune(i) != myistitle[i])
 			fprintf(stderr, "istitle diff at %x: runetype=%x, unicode=%x\n",
 				i, istitlerune(i), myistitle[i]);
 		if(totitlerune(i) != mytotitle[i])
 			fprintf(stderr, "totitle diff at %x: runetype=%x, unicode=%x\n",
 				i, totitlerune(i), mytotitle[i]);
 	}
 }
 static int
 mygetfields(char **fields, int nfields, char *str, const char *delim)
 {
 	int nf;
 	fields[0] = str;
 	nf = 1;
 	if(nf >= nfields)
 		return nf;
 	for(; *str; str++){
 		if(strchr(delim, *str) != NULL){
 			*str = '\0';
 			fields[nf++] = str + 1;
 			if(nf >= nfields)
 				break;
 		}
 	}
 	return nf;
 }
 static int
 getunicodeline(FILE *in, char **fields, char *buf)
 {
 	char *p;
 	if(fgets(buf, MAX_LINE, in) == NULL)
 		return 0;
 	p = strchr(buf, '\n');
 	if (p == NULL)
 		fatal("line too long");
 	*p = '\0';
 	if (mygetfields(fields, NFIELDS + 1, buf, ";") != NFIELDS)
 		fatal("bad number of fields");
 	return 1;
 }
 static int
 getcode(char *s)
 {
 	int i, code;
 	code = 0;
        i = 0;
        /* Parse a hex number */
 	while(s[i]) {
 		code <<= 4;
 		if(s[i] >= '0' && s[i] <= '9')
 			code += s[i] - '0';
 		else if(s[i] >= 'A' && s[i] <= 'F')
 			code += s[i] - 'A' + 10;
 		else
 			fatal("bad code char '%c'", s[i]);
                i++;
 	}
 	return code;
 }
 static void
 fatal(const char *fmt, ...)
 {
 	va_list arg;
 	fprintf(stderr, "%s: fatal error: ", argv0);
 	va_start(arg, fmt);
 	vfprintf(stderr, fmt, arg);
 	va_end(arg);
 	fprintf(stderr, "\n");
 	exit(1);
 }
--- a/src/lib9/utf/rune.c
+++ b/src/lib9/utf/rune.c
@ -1,20 +1,21 @@
 /*
 * The authors of this software are Rob Pike and Ken Thompson.
 *              Copyright (c) 2002 by Lucent Technologies.
 *              Portions Copyright (c) 2009 The Go Authors.  All rights reserved.
 * Permission to use, copy, modify, and distribute this software for any
 * purpose without fee is hereby granted, provided that this entire notice
 * is included in all copies of any software which is or includes a copy
 * or modification of this software and in all copies of the supporting
 * documentation for such software.
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 */
 #include <stdarg.h>
 #include <string.h>
 #include "plan9.h"
 #include "utf.h"
 #include "utfdef.h"
 enum
 {
@ -23,27 +24,150 @@ enum
 	Bit2	= 5,
 	Bit3	= 4,
 	Bit4	= 3,
 	Bit5	= 2,
 	T1	= ((1<<(Bit1+1))-1) ^ 0xFF,	/* 0000 0000 */
 	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
 	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
 	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
 	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
 	T5	= ((1<<(Bit5+1))-1) ^ 0xFF,	/* 1111 1000 */
 	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */
 	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */
 	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */
 	Rune4	= (1<<(Bit4+3*Bitx))-1,
                                        /* 0001 1111 1111 1111 1111 1111 */
 	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
 	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */
-	Bad	= Runeerror
+	Bad	= Runeerror,
 };
 /*
 * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
 * This is a slower but "safe" version of the old chartorune
 * that works on strings that are not necessarily null-terminated.
 *
 * If you know for sure that your string is null-terminated,
 * chartorune will be a bit faster.
 *
 * It is guaranteed not to attempt to access "length"
 * past the incoming pointer.  This is to avoid
 * possible access violations.  If the string appears to be
 * well-formed but incomplete (i.e., to get the whole Rune
 * we'd need to read past str+length) then we'll set the Rune
 * to Bad and return 0.
 *
 * Note that if we have decoding problems for other
 * reasons, we return 1 instead of 0.
 */
 int
-chartorune(Rune *rune, char *str)
+charntorune(Rune *rune, const char *str, int length)
 {
-	int c, c1, c2;
+	int c, c1, c2, c3;
 	long l;
 	/* When we're not allowed to read anything */
 	if(length <= 0) {
 		goto badlen;
 	}
 	/*
 	 * one character sequence (7-bit value)
 	 *	00000-0007F => T1
 	 */
 	c = *(uchar*)str;
 	if(c < Tx) {
 		*rune = c;
 		return 1;
 	}
 	// If we can't read more than one character we must stop
 	if(length <= 1) {
 		goto badlen;
 	}
 	/*
 	 * two character sequence (11-bit value)
 	 *	0080-07FF => T2 Tx
 	 */
 	c1 = *(uchar*)(str+1) ^ Tx;
 	if(c1 & Testx)
 		goto bad;
 	if(c < T3) {
 		if(c < T2)
 			goto bad;
 		l = ((c << Bitx) | c1) & Rune2;
 		if(l <= Rune1)
 			goto bad;
 		*rune = l;
 		return 2;
 	}
 	// If we can't read more than two characters we must stop
 	if(length <= 2) {
 		goto badlen;
 	}
 	/*
 	 * three character sequence (16-bit value)
 	 *	0800-FFFF => T3 Tx Tx
 	 */
 	c2 = *(uchar*)(str+2) ^ Tx;
 	if(c2 & Testx)
 		goto bad;
 	if(c < T4) {
 		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
 		if(l <= Rune2)
 			goto bad;
 		*rune = l;
 		return 3;
 	}
 	if (length <= 3)
 		goto badlen;
 	/*
 	 * four character sequence (21-bit value)
 	 *	10000-1FFFFF => T4 Tx Tx Tx
 	 */
 	c3 = *(uchar*)(str+3) ^ Tx;
 	if (c3 & Testx)
 		goto bad;
 	if (c < T5) {
 		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
 		if (l <= Rune3)
 			goto bad;
 		*rune = l;
 		return 4;
 	}
 	// Support for 5-byte or longer UTF-8 would go here, but
 	// since we don't have that, we'll just fall through to bad.
 	/*
 	 * bad decoding
 	 */
 bad:
 	*rune = Bad;
 	return 1;
 badlen:
 	*rune = Bad;
 	return 0;
 }
 /*
 * This is the older "unsafe" version, which works fine on
 * null-terminated strings.
 */
 int
 chartorune(Rune *rune, const char *str)
 {
 	int c, c1, c2, c3;
 	long l;
 	/*
@ -88,6 +212,26 @@ chartorune(Rune *rune, char *str)
 		return 3;
 	}
 	/*
 	 * four character sequence (21-bit value)
 	 *	10000-1FFFFF => T4 Tx Tx Tx
 	 */
 	c3 = *(uchar*)(str+3) ^ Tx;
 	if (c3 & Testx)
 		goto bad;
 	if (c < T5) {
 		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
 		if (l <= Rune3)
 			goto bad;
 		*rune = l;
 		return 4;
 	}
 	/*
 	 * Support for 5-byte or longer UTF-8 would go here, but
 	 * since we don't have that, we'll just fall through to bad.
 	 */
 	/*
 	 * bad decoding
 	 */
@ -97,9 +241,16 @@ bad:
 }
 int
-runetochar(char *str, Rune *rune)
+isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) {
 	*consumed = charntorune(rune, str, length);
 	return *rune != Runeerror || *consumed == 3;
 }
 int
 runetochar(char *str, const Rune *rune)
 {
-	long c;
+	/* Runes are signed, so convert to unsigned for range check. */
 	unsigned long c;
 	/*
 	 * one character sequence
@ -121,57 +272,80 @@ runetochar(char *str, Rune *rune)
 		return 2;
 	}
 	/*
 	 * If the Rune is out of range, convert it to the error rune.
 	 * Do this test here because the error rune encodes to three bytes.
 	 * Doing it earlier would duplicate work, since an out of range
 	 * Rune wouldn't have fit in one or two bytes.
 	 */
 	if (c > Runemax)
 		c = Runeerror;
 	/*
 	 * three character sequence
 	 *	0800-FFFF => T3 Tx Tx
 	 */
-	str[0] = T3 |  (c >> 2*Bitx);
+	if (c <= Rune3) {
-	str[1] = Tx | ((c >> 1*Bitx) & Maskx);
+		str[0] = T3 |  (c >> 2*Bitx);
-	str[2] = Tx |  (c & Maskx);
+		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
-	return 3;
+		str[2] = Tx |  (c & Maskx);
 		return 3;
 	}
 	/*
 	 * four character sequence (21-bit value)
 	 *     10000-1FFFFF => T4 Tx Tx Tx
 	 */
 	str[0] = T4 | (c >> 3*Bitx);
 	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
 	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
 	str[3] = Tx | (c & Maskx);
 	return 4;
 }
 int
-runelen(long c)
+runelen(Rune rune)
 {
 	Rune rune;
 	char str[10];
 	rune = c;
 	return runetochar(str, &rune);
 }
 int
-runenlen(Rune *r, int nrune)
+runenlen(const Rune *r, int nrune)
 {
 	int nb, c;
 	nb = 0;
 	while(nrune--) {
 		c = *r++;
-		if(c <= Rune1)
+		if (c <= Rune1)
 			nb++;
-		else
+		else if (c <= Rune2)
 		if(c <= Rune2)
 			nb += 2;
-		else
+		else if (c <= Rune3)
 			nb += 3;
 		else /* assert(c <= Rune4) */
 			nb += 4;
 	}
 	return nb;
 }
 int
-fullrune(char *str, int n)
+fullrune(const char *str, int n)
 {
-	int c;
+	if (n > 0) {
-
+		int c = *(uchar*)str;
-	if(n > 0) {
+		if (c < Tx)
 		c = *(uchar*)str;
 		if(c < Tx)
 			return 1;
-		if(n > 1)
+		if (n > 1) {
-			if(c < T3 || n > 2)
+			if (c < T3)
 				return 1;
 			if (n > 2) {
 				if (c < T4 || n > 3)
 					return 1;
 			}
 		}
 	}
 	return 0;
 }
--- a/src/lib9/utf/runetype.c
+++ b/src/lib9/utf/runetype.c
--- a/src/lib9/utf/utf.h
+++ b/src/lib9/utf/utf.h
@ -0,0 +1,248 @@
 /*
 * The authors of this software are Rob Pike and Ken Thompson.
 *              Copyright (c) 1998-2002 by Lucent Technologies.
 *              Portions Copyright (c) 2009 The Go Authors.  All rights reserved.
 * Permission to use, copy, modify, and distribute this software for any
 * purpose without fee is hereby granted, provided that this entire notice
 * is included in all copies of any software which is or includes a copy
 * or modification of this software and in all copies of the supporting
 * documentation for such software.
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 */
 #ifndef _UTFH_
 #define _UTFH_ 1
 #include <stdint.h>
 typedef signed int Rune;	/* Code-point values in Unicode 4.0 are 21 bits wide.*/
 enum
 {
  UTFmax	= 4,		/* maximum bytes per rune */
  Runesync	= 0x80,		/* cannot represent part of a UTF sequence (<) */
  Runeself	= 0x80,		/* rune and UTF sequences are the same (<) */
  Runeerror	= 0xFFFD,	/* decoding error in UTF */
  Runemax	= 0x10FFFF,	/* maximum rune value */
 };
 #ifdef	__cplusplus
 extern "C" {
 #endif
 /*
 * rune routines
 */
 /*
 * These routines were written by Rob Pike and Ken Thompson
 * and first appeared in Plan 9.
 * SEE ALSO
 * utf (7)
 * tcs (1)
 */
 // runetochar copies (encodes) one rune, pointed to by r, to at most
 // UTFmax bytes starting at s and returns the number of bytes generated.
 int runetochar(char* s, const Rune* r);
 // chartorune copies (decodes) at most UTFmax bytes starting at s to
 // one rune, pointed to by r, and returns the number of bytes consumed.
 // If the input is not exactly in UTF format, chartorune will set *r
 // to Runeerror and return 1.
 //
 // Note: There is no special case for a "null-terminated" string. A
 // string whose first byte has the value 0 is the UTF8 encoding of the
 // Unicode value 0 (i.e., ASCII NULL). A byte value of 0 is illegal
 // anywhere else in a UTF sequence.
 int chartorune(Rune* r, const char* s);
 // charntorune is like chartorune, except that it will access at most
 // n bytes of s.  If the UTF sequence is incomplete within n bytes,
 // charntorune will set *r to Runeerror and return 0. If it is complete
 // but not in UTF format, it will set *r to Runeerror and return 1.
 // 
 // Added 2004-09-24 by Wei-Hwa Huang
 int charntorune(Rune* r, const char* s, int n);
 // isvalidcharntorune(str, n, r, consumed)
 // is a convenience function that calls "*consumed = charntorune(r, str, n)"
 // and returns an int (logically boolean) indicating whether the first
 // n bytes of str was a valid and complete UTF sequence.
 int isvalidcharntorune(const char* str, int n, Rune* r, int* consumed);
 // runelen returns the number of bytes required to convert r into UTF.
 int runelen(Rune r);
 // runenlen returns the number of bytes required to convert the n
 // runes pointed to by r into UTF.
 int runenlen(const Rune* r, int n);
 // fullrune returns 1 if the string s of length n is long enough to be
 // decoded by chartorune, and 0 otherwise. This does not guarantee
 // that the string contains a legal UTF encoding. This routine is used
 // by programs that obtain input one byte at a time and need to know
 // when a full rune has arrived.
 int fullrune(const char* s, int n);
 // The following routines are analogous to the corresponding string
 // routines with "utf" substituted for "str", and "rune" substituted
 // for "chr".
 // utflen returns the number of runes that are represented by the UTF
 // string s. (cf. strlen)
 int utflen(const char* s);
 // utfnlen returns the number of complete runes that are represented
 // by the first n bytes of the UTF string s. If the last few bytes of
 // the string contain an incompletely coded rune, utfnlen will not
 // count them; in this way, it differs from utflen, which includes
 // every byte of the string. (cf. strnlen)
 int utfnlen(const char* s, long n);
 // utfrune returns a pointer to the first occurrence of rune r in the
 // UTF string s, or 0 if r does not occur in the string.  The NULL
 // byte terminating a string is considered to be part of the string s.
 // (cf. strchr)
 const char* utfrune(const char* s, Rune r);
 // utfrrune returns a pointer to the last occurrence of rune r in the
 // UTF string s, or 0 if r does not occur in the string.  The NULL
 // byte terminating a string is considered to be part of the string s.
 // (cf. strrchr)
 const char* utfrrune(const char* s, Rune r);
 // utfutf returns a pointer to the first occurrence of the UTF string
 // s2 as a UTF substring of s1, or 0 if there is none. If s2 is the
 // null string, utfutf returns s1. (cf. strstr)
 const char* utfutf(const char* s1, const char* s2);
 // utfecpy copies UTF sequences until a null sequence has been copied,
 // but writes no sequences beyond es1.  If any sequences are copied,
 // s1 is terminated by a null sequence, and a pointer to that sequence
 // is returned.  Otherwise, the original s1 is returned. (cf. strecpy)
 char* utfecpy(char *s1, char *es1, const char *s2);
 // These functions are rune-string analogues of the corresponding
 // functions in strcat (3).
 // 
 // These routines first appeared in Plan 9.
 // SEE ALSO
 // memmove (3)
 // rune (3)
 // strcat (2)
 //
 // BUGS: The outcome of overlapping moves varies among implementations.
 Rune* runestrcat(Rune* s1, const Rune* s2);
 Rune* runestrncat(Rune* s1, const Rune* s2, long n);
 const Rune* runestrchr(const Rune* s, Rune c);
 int runestrcmp(const Rune* s1, const Rune* s2);
 int runestrncmp(const Rune* s1, const Rune* s2, long n);
 Rune* runestrcpy(Rune* s1, const Rune* s2);
 Rune* runestrncpy(Rune* s1, const Rune* s2, long n);
 Rune* runestrecpy(Rune* s1, Rune* es1, const Rune* s2);
 Rune* runestrdup(const Rune* s);
 const Rune* runestrrchr(const Rune* s, Rune c);
 long runestrlen(const Rune* s);
 const Rune* runestrstr(const Rune* s1, const Rune* s2);
 // The following routines test types and modify cases for Unicode
 // characters.  Unicode defines some characters as letters and
 // specifies three cases: upper, lower, and title.  Mappings among the
 // cases are also defined, although they are not exhaustive: some
 // upper case letters have no lower case mapping, and so on.  Unicode
 // also defines several character properties, a subset of which are
 // checked by these routines.  These routines are based on Unicode
 // version 3.0.0.
 //
 // NOTE: The routines are implemented in C, so the boolean functions
 // (e.g., isupperrune) return 0 for false and 1 for true.
 //
 //
 // toupperrune, tolowerrune, and totitlerune are the Unicode case
 // mappings. These routines return the character unchanged if it has
 // no defined mapping.
 Rune toupperrune(Rune r);
 Rune tolowerrune(Rune r);
 Rune totitlerune(Rune r);
 // isupperrune tests for upper case characters, including Unicode
 // upper case letters and targets of the toupper mapping. islowerrune
 // and istitlerune are defined analogously. 
 int isupperrune(Rune r);
 int islowerrune(Rune r);
 int istitlerune(Rune r);
 // isalpharune tests for Unicode letters; this includes ideographs in
 // addition to alphabetic characters.
 int isalpharune(Rune r);
 // isdigitrune tests for digits. Non-digit numbers, such as Roman
 // numerals, are not included.
 int isdigitrune(Rune r);
 // isideographicrune tests for ideographic characters and numbers, as
 // defined by the Unicode standard.
 int isideographicrune(Rune r);
 // isspacerune tests for whitespace characters, including "C" locale
 // whitespace, Unicode defined whitespace, and the "zero-width
 // non-break space" character.
 int isspacerune(Rune r);
 // (The comments in this file were copied from the manpage files rune.3,
 // isalpharune.3, and runestrcat.3. Some formatting changes were also made
 // to conform to Google style. /JRM 11/11/05)
 #ifdef	__cplusplus
 }
 #endif
 #endif
--- a/src/lib9/utf/utfdef.h
+++ b/src/lib9/utf/utfdef.h
@ -12,36 +12,17 @@
 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 */
-/*
+#define uchar _utfuchar
- * compiler directive on Plan 9
+#define ushort _utfushort
- */
+#define uint _utfuint
-#ifndef USED
+#define ulong _utfulong
-#define USED(x) if(x);else
+#define vlong _utfvlong
-#endif
+#define uvlong _utfuvlong
 /*
 * easiest way to make sure these are defined
 */
 #define uchar	_fmtuchar
 #define ushort	_fmtushort
 #define uint	_fmtuint
 #define ulong	_fmtulong
 #define vlong	_fmtvlong
 #define uvlong	_fmtuvlong
 typedef unsigned char		uchar;
 typedef unsigned short		ushort;
 typedef unsigned int		uint;
 typedef unsigned long		ulong;
 typedef unsigned long long	uvlong;
 typedef long long		vlong;
 /*
 * nil cannot be ((void*)0) on ANSI C,
 * because it is used for function pointers
 */
 #undef	nil
 #define	nil	0
 #undef	nelem
 #define	nelem	((void*)0)
 #define nelem(x) (sizeof(x)/sizeof((x)[0]))
 #define nil ((void*)0)
--- a/src/lib9/utf/utfecpy.c
+++ b/src/lib9/utf/utfecpy.c
@ -7,18 +7,17 @@
 * or modification of this software and in all copies of the supporting
 * documentation for such software.
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 */
 #define _BSD_SOURCE 1	/* memccpy */
 #include <stdarg.h>
 #include <string.h>
 #include "plan9.h"
 #include "utf.h"
 #include "utfdef.h"
 char*
-utfecpy(char *to, char *e, char *from)
+utfecpy(char *to, char *e, const char *from)
 {
 	char *end;
--- a/src/lib9/utf/utflen.c
+++ b/src/lib9/utf/utflen.c
@ -7,17 +7,17 @@
 * or modification of this software and in all copies of the supporting
 * documentation for such software.
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 */
 #include <stdarg.h>
 #include <string.h>
 #include "plan9.h"
 #include "utf.h"
 #include "utfdef.h"
 int
-utflen(char *s)
+utflen(const char *s)
 {
 	int c;
 	long n;
@ -34,4 +34,5 @@ utflen(char *s)
 			s += chartorune(&rune, s);
 		n++;
 	}
 	return 0;
 }
--- a/src/lib9/utf/utfnlen.c
+++ b/src/lib9/utf/utfnlen.c
@ -7,22 +7,22 @@
 * or modification of this software and in all copies of the supporting
 * documentation for such software.
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 */
 #include <stdarg.h>
 #include <string.h>
 #include "plan9.h"
 #include "utf.h"
 #include "utfdef.h"
 int
-utfnlen(char *s, long m)
+utfnlen(const char *s, long m)
 {
 	int c;
 	long n;
 	Rune rune;
-	char *es;
+	const char *es;
 	es = s + m;
 	for(n = 0; s < es; n++) {
--- a/src/lib9/utf/utfrrune.c
+++ b/src/lib9/utf/utfrrune.c
@ -7,21 +7,22 @@
 * or modification of this software and in all copies of the supporting
 * documentation for such software.
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 */
 #include <stdarg.h>
 #include <string.h>
 #include "plan9.h"
 #include "utf.h"
 #include "utfdef.h"
 const
 char*
-utfrrune(char *s, long c)
+utfrrune(const char *s, Rune c)
 {
 	long c1;
 	Rune r;
-	char *s1;
+	const char *s1;
 	if(c < Runesync)		/* not part of utf sequence */
 		return strrchr(s, c);
@ -42,4 +43,5 @@ utfrrune(char *s, long c)
 			s1 = s;
 		s += c1;
 	}
 	return 0;
 }
--- a/src/lib9/utf/utfrune.c
+++ b/src/lib9/utf/utfrune.c
@ -7,17 +7,18 @@
 * or modification of this software and in all copies of the supporting
 * documentation for such software.
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 */
 #include <stdarg.h>
 #include <string.h>
 #include "plan9.h"
 #include "utf.h"
 #include "utfdef.h"
 const
 char*
-utfrune(char *s, long c)
+utfrune(const char *s, Rune c)
 {
 	long c1;
 	Rune r;
@ -41,4 +42,5 @@ utfrune(char *s, long c)
 			return s;
 		s += n;
 	}
 	return 0;
 }
--- a/src/lib9/utf/utfutf.c
+++ b/src/lib9/utf/utfutf.c
@ -7,24 +7,25 @@
 * or modification of this software and in all copies of the supporting
 * documentation for such software.
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 */
 #include <stdarg.h>
 #include <string.h>
 #include "plan9.h"
 #include "utf.h"
 #include "utfdef.h"
 /*
 * Return pointer to first occurrence of s2 in s1,
 * 0 if none
 */
 const
 char*
-utfutf(char *s1, char *s2)
+utfutf(const char *s1, const char *s2)
 {
-	char *p;
+	const char *p;
 	long f, n1, n2;
 	Rune r;
@ -34,7 +35,7 @@ utfutf(char *s1, char *s2)
 		return strstr(s1, s2);
 	n2 = strlen(s2);
-	for(p=s1; p=utfrune(p, f); p+=n1)
+	for(p=s1; (p=utfrune(p, f)) != 0; p+=n1)
 		if(strncmp(p, s2, n2) == 0)
 			return p;
 	return 0;
--- a/src/runtime/Makefile
+++ b/src/runtime/Makefile
@ -20,6 +20,7 @@ LIBOFILES=\
 	runtime.$O\
 	map.$O\
 	print.$O\
 	rune.$O\
 	string.$O\
 	sys_file.$O\
--- a/src/runtime/rune.c
+++ b/src/runtime/rune.c
@ -0,0 +1,224 @@
 /*
 * The authors of this software are Rob Pike and Ken Thompson.
 *              Copyright (c) 2002 by Lucent Technologies.
 * Permission to use, copy, modify, and distribute this software for any
 * purpose without fee is hereby granted, provided that this entire notice
 * is included in all copies of any software which is or includes a copy
 * or modification of this software and in all copies of the supporting
 * documentation for such software.
 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
 * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
 */
 /*
 * This code is copied, with slight editing due to type differences,
 * from a subset of ../lib9/utf/rune.c
 */
 #include "runtime.h"
 enum
 {
 	Bit1	= 7,
 	Bitx	= 6,
 	Bit2	= 5,
 	Bit3	= 4,
 	Bit4	= 3,
 	Bit5	= 2, 
 	T1	= ((1<<(Bit1+1))-1) ^ 0xFF,	/* 0000 0000 */
 	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
 	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
 	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
 	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
 	T5	= ((1<<(Bit5+1))-1) ^ 0xFF,	/* 1111 1000 */
 	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */
 	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */
 	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */
 	Rune4	= (1<<(Bit4+3*Bitx))-1,
                                        /* 0001 1111 1111 1111 1111 1111 */
 	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
 	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */
 	Runeerror	= 0xFFFD,
 	Runeself	= 0x80,
 	Bad	= Runeerror,
 	Runemax	= 0x10FFFF,	/* maximum rune value */
 };
 /*
 * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
 * This is a slower but "safe" version of the old chartorune 
 * that works on strings that are not necessarily null-terminated.
 * 
 * If you know for sure that your string is null-terminated,
 * chartorune will be a bit faster.
 *
 * It is guaranteed not to attempt to access "length"
 * past the incoming pointer.  This is to avoid
 * possible access violations.  If the string appears to be
 * well-formed but incomplete (i.e., to get the whole Rune
 * we'd need to read past str+length) then we'll set the Rune
 * to Bad and return 0.
 *
 * Note that if we have decoding problems for other
 * reasons, we return 1 instead of 0.
 */
 int32
 charntorune(int32 *rune, byte *str, int32 length)
 {
 	int32 c, c1, c2, c3;
 	int32 l;
 	/* When we're not allowed to read anything */
 	if(length <= 0) {
 		goto badlen;
 	}
 	/*
 	 * one character sequence (7-bit value)
 	 *	00000-0007F => T1
 	 */
 	c = *(byte*)str;  /* cast not necessary, but kept for safety */
 	if(c < Tx) {
 		*rune = c;
 		return 1;
 	}
 	// If we can't read more than one character we must stop
 	if(length <= 1) {
 		goto badlen;
 	}
 	/*
 	 * two character sequence (11-bit value)
 	 *	0080-07FF => T2 Tx
 	 */
 	c1 = *(byte*)(str+1) ^ Tx;
 	if(c1 & Testx)
 		goto bad;
 	if(c < T3) {
 		if(c < T2)
 			goto bad;
 		l = ((c << Bitx) | c1) & Rune2;
 		if(l <= Rune1)
 			goto bad;
 		*rune = l;
 		return 2;
 	}
 	// If we can't read more than two characters we must stop
 	if(length <= 2) {
 		goto badlen;
 	}
 	/*
 	 * three character sequence (16-bit value)
 	 *	0800-FFFF => T3 Tx Tx
 	 */
 	c2 = *(byte*)(str+2) ^ Tx;
 	if(c2 & Testx)
 		goto bad;
 	if(c < T4) {
 		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
 		if(l <= Rune2)
 			goto bad;
 		*rune = l;
 		return 3;
 	}
 	if (length <= 3)
 		goto badlen;
 	/*
 	 * four character sequence (21-bit value)
 	 *	10000-1FFFFF => T4 Tx Tx Tx
 	 */
 	c3 = *(byte*)(str+3) ^ Tx;
 	if (c3 & Testx)
 		goto bad;
 	if (c < T5) {
 		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
 		if (l <= Rune3)
 			goto bad;
 		*rune = l;
 		return 4;
 	}
 	// Support for 5-byte or longer UTF-8 would go here, but
 	// since we don't have that, we'll just fall through to bad.
 	/*
 	 * bad decoding
 	 */
 bad:
 	*rune = Bad;
 	return 1;
 badlen:
 	*rune = Bad;
 	return 0;
 }
 int32
 runetochar(byte *str, int32 rune)  /* note: in original, arg2 was pointer */
 {
 	/* Runes are signed, so convert to unsigned for range check. */
 	uint32 c;
 	/*
 	 * one character sequence
 	 *	00000-0007F => 00-7F
 	 */
 	c = rune;
 	if(c <= Rune1) {
 		str[0] = c;
 		return 1;
 	}
 	/*
 	 * two character sequence
 	 *	0080-07FF => T2 Tx
 	 */
 	if(c <= Rune2) {
 		str[0] = T2 | (c >> 1*Bitx);
 		str[1] = Tx | (c & Maskx);
 		return 2;
 	}
 	/*
 	 * If the Rune is out of range, convert it to the error rune.
 	 * Do this test here because the error rune encodes to three bytes.
 	 * Doing it earlier would duplicate work, since an out of range
 	 * Rune wouldn't have fit in one or two bytes.
 	 */
 	if (c > Runemax)
 		c = Runeerror;
 	/*
 	 * three character sequence
 	 *	0800-FFFF => T3 Tx Tx
 	 */
 	if (c <= Rune3) {
 		str[0] = T3 |  (c >> 2*Bitx);
 		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
 		str[2] = Tx |  (c & Maskx);
 		return 3;
 	}
 	/*
 	 * four character sequence (21-bit value)
 	 *     10000-1FFFFF => T4 Tx Tx Tx
 	 */
 	str[0] = T4 | (c >> 3*Bitx);
 	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
 	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
 	str[3] = Tx | (c & Maskx);
 	return 4;
 }
--- a/src/runtime/runtime.h
+++ b/src/runtime/runtime.h
@ -85,6 +85,8 @@ enum
 int32 strcmp(byte*, byte*);
 int32 findnull(int8*);
 void	dump(byte*, int32);
 int32 runetochar(byte*, int32);
 int32 chartorune(uint32*, byte*);
 extern string	emptystring;
 extern int32 debug;
--- a/src/runtime/string.c
+++ b/src/runtime/string.c
@ -151,55 +151,6 @@ sys·indexstring(string s, int32 i, byte b)
 	FLUSH(&b);
 }
 /*
 * this is the plan9 runetochar
 * extended for 36 bits in 7 bytes
 * note that it truncates to 32 bits
 * through the argument passing.
 */
 static int32
 runetochar(byte *str, uint32 c)
 {
 	int32 i, n;
 	uint32 mask, mark;
 	/*
 	 * one character in 7 bits
 	 */
 	if(c <= 0x07FUL) {
 		str[0] = c;
 		return 1;
 	}
 	/*
 	 * every new character picks up 5 bits
 	 * one less in the first byte and
 	 * six more in an extension byte
 	 */
 	mask = 0x7ffUL;
 	mark = 0xC0UL;
 	for(n=1;; n++) {
 		if(c <= mask)
 			break;
 		mask = (mask<<5) | 0x1fUL;
 		mark = (mark>>1) | 0x80UL;
 	}
 	/*
 	 * lay down the bytes backwards
 	 * n is the number of extension bytes
 	 * mask is the max codepoint
 	 * mark is the zeroth byte indicator
 	 */
 	for(i=n; i>0; i--) {
 		str[i] = 0x80UL | (c&0x3fUL);
 		c >>= 6;
 	}
 	str[0] = mark|c;
 	return n+1;
 }
 void
 sys·intstring(int64 v, string s)
 {
--- a/test/string_lit.go
+++ b/test/string_lit.go
@ -75,5 +75,14 @@ func main() {
 	       `\000\123\x00\312\xFE\u0123\ubabe\U0000babe`,
           "backslashes 2 (backquote)");
 	assert("\\x\\u\\U\\", `\x\u\U\`, "backslash 3 (backquote)");
 	// test large runes. perhaps not the most logical place for this test.
 	var r int32;
 	r = 0x10ffff;	// largest rune value
 	s = string(r);
 	assert(s, "\xf4\x8f\xbf\xbf", "largest rune");
 	r = 0x10ffff + 1;
 	s = string(r);
 	assert(s, "\xef\xbf\xbd", "too-large rune");
 	sys.exit(ecode);
 }