diff options
| -rw-r--r-- | it_xml.class | 11 | ||||
| -rwxr-xr-x | tests/it_xml.t | 35 | 
2 files changed, 26 insertions, 20 deletions
| diff --git a/it_xml.class b/it_xml.class index 0679c69..f854682 100644 --- a/it_xml.class +++ b/it_xml.class @@ -29,7 +29,7 @@ class it_xml   * @param $p associative array   * @param $p['forcearray'] xml tags to ALWAYS return as array   * @param $p['safety'] 2 causes program abort with invalid xml, 1 (default) causes error report, 0 just returns false - * @param $p['encoding'] Output character encoding (e.g. UTF-8, default: ISO-8859-1) + * @param $p['encoding'] Output character encoding (utf-8, iso-8859-1 or us-ascii, default: ini_get('default_charset')   * @param $p['prefix'] Optional prefix for class names   * @param $p['lowercase'] Lowercase all tag and attribute names   * @return XML object tree or null on failure @@ -49,23 +49,22 @@ function it_xml($xmldata = "", $p = array())  function create($xmldata, $p = array())  {  	$xml = new it_xml; -  	return $xml->from_xml($xmldata, array('factory' => true) + $p) ? $xml->_root : null;  }  function from_xml($xmldata, $p)  { -	$this->_p = $p + array('encoding' => "ISO-8859-1", 'safety' => 1); +	$this->_p = $p + array('encoding' => ini_get('default_charset'), 'safety' => 1);  	$this->_arrayforce = array_flip((array)$this->_p['forcearray']);  	$this->_stack = array();  	unset($this->error); -	$parser = xml_parser_create($this->_p['encoding']); +	$parser = xml_parser_create();  	xml_set_object($parser, $this);  	xml_set_element_handler($parser, "start_element", "end_element");  	xml_set_character_data_handler($parser, "character_data");  	xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);  	xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $this->_p['encoding']); -	 +  	$result = true;  	if (is_resource($xmldata)) @@ -123,7 +122,7 @@ function _sanitize($xmldata, $isutf8 = null)  		if (!preg_match('/^(<\?xml|\xEF\xBB\xBF|\xFE\xFF|\xFF\xFE|\x00\x00\xFE\xFF|\x00\x00\xFF\xFE)/', $xmldata))  			$xmldata = '<?xml version="1.0" encoding="' . $this->_p['encoding'] . '"?>' . $xmldata; -		$isutf8 = (!preg_match('/^<\?xml[^>]* encoding=/i', $xmldata) || preg_match('/^<\?xml[^>]* encoding=.UTF-8/i', $xmldata)); +		$isutf8 = (!preg_match('/^<\?xml[^>]* encoding=/i', $xmldata) || preg_match('/^<\?xml[^>]* encoding=.utf-8/i', $xmldata));  	}  	# Decode illegal entities but protect semantically important ones diff --git a/tests/it_xml.t b/tests/it_xml.t index f74c54b..d74fadf 100755 --- a/tests/it_xml.t +++ b/tests/it_xml.t @@ -5,13 +5,14 @@  function match($xmldata, $expected, $name, $prefix = "", $p = array())  { -	$classname = $prefix ? ($prefix . "_xml") : "it_xml"; +	$classname = ($prefix ?: "it") . "_xml";  	$varname  = $prefix . "foo";  	$xmldata = "<root>$xmldata</root>";  	$xml = new $classname($xmldata, $p); +	$mod_utf8 = $p['encoding'] != "iso-8859-1" ? "u" : "";  	is( -		preg_replace('/[#\s]+/', " ", print_r($xml->$varname, true)), +		preg_replace('/[#\s]+/' . $mod_utf8, " ", print_r($xml->$varname, true)),  		$expected,  		"$name (string)"  	); @@ -24,11 +25,10 @@ function match($xmldata, $expected, $name, $prefix = "", $p = array())  	fclose($tmpfile);  	is( -		preg_replace('/[#\s]+/', " ", print_r($xml->$varname, true)), +		preg_replace('/[#\s]+/' . $mod_utf8, " ", print_r($xml->$varname, true)),  		$expected,  		"$name (file)"  	); -  }  match( @@ -44,8 +44,8 @@ match(  );  match( -	'<foo title="Zürich">Stüssihofstadt</foo>', -	'foo Object ( [attr] => Array ( [title] => Zürich ) [val] => Stüssihofstadt ) ', +	'<foo title="Zürich">Stüssihofstadt</foo>', +	'foo Object ( [attr] => Array ( [title] => Zürich ) [val] => Stüssihofstadt ) ',  	'simple tag with latin1 content and attribute'  ); @@ -62,26 +62,33 @@ match(  );  match( -	'<foo>&amp; <a> &amp; <b> &amp; <c> ü</foo>', -	'foo Object ( [val] => & <a> & <b> & <c> ü ) ', -	'Predecode illegal entities while keeping properly encoded ones' +	'<foo>x ü y</foo>', +	utf8_decode('foo Object ( [val] => x ü y ) '), +	'Manual encoding override', +	"", +	array('encoding' => "iso-8859-1")  );  match(  	'<foo>&amp; <a> &amp; <b> &amp; <c> ü</foo>', -	utf8_encode('foo Object ( [val] => & <a> & <b> & <c> ü ) '), -	'Predecode illegal entities while keeping properly encoded ones (UTF-8)', -	"", -	array('encoding' => "UTF-8") +	'foo Object ( [val] => & <a> & <b> & <c> ü ) ', +	'Predecode illegal entities while keeping properly encoded ones',  ); +match( +	'<foo>&amp; <a> &amp; <b> &amp; <c> ü</foo>', +	utf8_decode('foo Object ( [val] => & <a> & <b> & <c> ü ) '), +	'Predecode illegal entities while keeping properly encoded ones (iso-8859-1)', +	"", +	array('encoding' => "iso-8859-1") +);  match(  	"<foo>a\x05b</foo>",  	'foo Object ( [val] => a b ) ',  	'Illegal latin 1 character',  	"", -	array('encoding' => "ISO-8859-1") +	array('encoding' => "iso-8859-1")  );  # Test inheritance |