diff options
| author | Urban Müller | 2012-03-26 15:11:39 +0000 | 
|---|---|---|
| committer | Urban Müller | 2012-03-26 15:11:39 +0000 | 
| commit | b7200b739ff651a7647d2d666e3674a7fe3cb6e2 (patch) | |
| tree | 6362be7d1ea725fc9371839a50c506cfebaa1ef3 | |
| parent | 5e55c26d6ae3ab321a765fc66b7359a5a9edae8f (diff) | |
| download | itools-b7200b739ff651a7647d2d666e3674a7fe3cb6e2.tar.gz itools-b7200b739ff651a7647d2d666e3674a7fe3cb6e2.tar.bz2 itools-b7200b739ff651a7647d2d666e3674a7fe3cb6e2.zip | |
fixed it_html::fix_encoding
| -rw-r--r-- | it_html.class | 17 | ||||
| -rwxr-xr-x | tests/it_html.t | 12 | 
2 files changed, 19 insertions, 10 deletions
| diff --git a/it_html.class b/it_html.class index 41d799f..71ac965 100644 --- a/it_html.class +++ b/it_html.class @@ -259,18 +259,15 @@ function _parse_args($args)  # internal -function fix_encoding($string) +function fix_encoding($string, $silent = false)  {  	if (preg_match('/[\x20-\x7f][\x80-\xff][\x20-\x7f]/', $string)) -	{ -		it::error(array('title' => utf8_encode("incorrectly utf8-encoded: " . trim($string)), 'skipfiles' => "it_html")); -		$string = utf8_encode($string); -	} -	else if ($string && preg_match('/[\x80-\xff]/', $string) && htmlspecialchars(utf8_decode($string), ENT_COMPAT, "utf-8") !== "") -	{ -		it::error(array('title' => utf8_encode("doubly utf8-encoded: " . trim($string)), 'skipfiles' => "it_html")); -		$string = utf8_decode($string); -	} +		list($string, $error) = array(utf8_encode($string), utf8_encode("incorrectly utf8-encoded: " . trim($string))); +	else if ($string && preg_match('/[\x80-\xff]/', $string) && utf8_encode(utf8_decode($string)) === $string && htmlspecialchars(utf8_decode($string), ENT_COMPAT, "utf-8") !== "") +		list($string, $error) = array(utf8_decode($string), utf8_encode("doubly utf8-encoded: " . trim($string))); + +	if ($error && !$silent) +		it::error(array('title' => $error, 'skipfiles' => "it_html"));  	return $string;  } diff --git a/tests/it_html.t b/tests/it_html.t index c955359..a576b47 100755 --- a/tests/it_html.t +++ b/tests/it_html.t @@ -168,4 +168,16 @@ is(  is(it_html::entity_decode("’"), "'");  is(it_html::entity_decode("࿿"), " ");  is(it_html::entity_decode("ϧ"),  " "); + +is(it_html::fix_encoding("Meier"), "Meier"); +is(it_html::fix_encoding("Müller"), "Müller"); +is(it_html::fix_encoding("Aslı"), "Aslı"); +is(it_html::fix_encoding("é»"), "é»"); + +is(it_html::fix_encoding(utf8_encode("Müller"), true), "Müller", "double encoded latin1");	# Double encoded latin1 +is(it_html::fix_encoding(utf8_encode("Aslı"), true), "Aslı");		# Double encoded non-latin1 +is(it_html::fix_encoding(utf8_encode("é»"), true), "é»");		# Double encoded special combination + +is(it_html::fix_encoding(utf8_decode("Müller"), true), "Müller");	# Incorrectly decoded latin1 +  ?> |