summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--it_xml.class11
-rwxr-xr-xtests/it_xml.t35
2 files changed, 26 insertions, 20 deletions
diff --git a/it_xml.class b/it_xml.class
index 0679c69..f854682 100644
--- a/it_xml.class
+++ b/it_xml.class
@@ -29,7 +29,7 @@ class it_xml
* @param $p associative array
* @param $p['forcearray'] xml tags to ALWAYS return as array
* @param $p['safety'] 2 causes program abort with invalid xml, 1 (default) causes error report, 0 just returns false
- * @param $p['encoding'] Output character encoding (e.g. UTF-8, default: ISO-8859-1)
+ * @param $p['encoding'] Output character encoding (utf-8, iso-8859-1 or us-ascii, default: ini_get('default_charset')
* @param $p['prefix'] Optional prefix for class names
* @param $p['lowercase'] Lowercase all tag and attribute names
* @return XML object tree or null on failure
@@ -49,23 +49,22 @@ function it_xml($xmldata = "", $p = array())
function create($xmldata, $p = array())
{
$xml = new it_xml;
-
return $xml->from_xml($xmldata, array('factory' => true) + $p) ? $xml->_root : null;
}
function from_xml($xmldata, $p)
{
- $this->_p = $p + array('encoding' => "ISO-8859-1", 'safety' => 1);
+ $this->_p = $p + array('encoding' => ini_get('default_charset'), 'safety' => 1);
$this->_arrayforce = array_flip((array)$this->_p['forcearray']);
$this->_stack = array();
unset($this->error);
- $parser = xml_parser_create($this->_p['encoding']);
+ $parser = xml_parser_create();
xml_set_object($parser, $this);
xml_set_element_handler($parser, "start_element", "end_element");
xml_set_character_data_handler($parser, "character_data");
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $this->_p['encoding']);
-
+
$result = true;
if (is_resource($xmldata))
@@ -123,7 +122,7 @@ function _sanitize($xmldata, $isutf8 = null)
if (!preg_match('/^(<\?xml|\xEF\xBB\xBF|\xFE\xFF|\xFF\xFE|\x00\x00\xFE\xFF|\x00\x00\xFF\xFE)/', $xmldata))
$xmldata = '<?xml version="1.0" encoding="' . $this->_p['encoding'] . '"?>' . $xmldata;
- $isutf8 = (!preg_match('/^<\?xml[^>]* encoding=/i', $xmldata) || preg_match('/^<\?xml[^>]* encoding=.UTF-8/i', $xmldata));
+ $isutf8 = (!preg_match('/^<\?xml[^>]* encoding=/i', $xmldata) || preg_match('/^<\?xml[^>]* encoding=.utf-8/i', $xmldata));
}
# Decode illegal entities but protect semantically important ones
diff --git a/tests/it_xml.t b/tests/it_xml.t
index f74c54b..d74fadf 100755
--- a/tests/it_xml.t
+++ b/tests/it_xml.t
@@ -5,13 +5,14 @@
function match($xmldata, $expected, $name, $prefix = "", $p = array())
{
- $classname = $prefix ? ($prefix . "_xml") : "it_xml";
+ $classname = ($prefix ?: "it") . "_xml";
$varname = $prefix . "foo";
$xmldata = "<root>$xmldata</root>";
$xml = new $classname($xmldata, $p);
+ $mod_utf8 = $p['encoding'] != "iso-8859-1" ? "u" : "";
is(
- preg_replace('/[#\s]+/', " ", print_r($xml->$varname, true)),
+ preg_replace('/[#\s]+/' . $mod_utf8, " ", print_r($xml->$varname, true)),
$expected,
"$name (string)"
);
@@ -24,11 +25,10 @@ function match($xmldata, $expected, $name, $prefix = "", $p = array())
fclose($tmpfile);
is(
- preg_replace('/[#\s]+/', " ", print_r($xml->$varname, true)),
+ preg_replace('/[#\s]+/' . $mod_utf8, " ", print_r($xml->$varname, true)),
$expected,
"$name (file)"
);
-
}
match(
@@ -44,8 +44,8 @@ match(
);
match(
- '<foo title="Zürich">Stüssihofstadt</foo>',
- 'foo Object ( [attr] => Array ( [title] => Zürich ) [val] => Stüssihofstadt ) ',
+ '<foo title="Zürich">Stüssihofstadt</foo>',
+ 'foo Object ( [attr] => Array ( [title] => Zürich ) [val] => Stüssihofstadt ) ',
'simple tag with latin1 content and attribute'
);
@@ -62,26 +62,33 @@ match(
);
match(
- '<foo>&amp;amp; &lt;a&gt; &#38;amp; &#60;b&#62; &#x26;amp; &#x3C;c&#x3E; &uuml;</foo>',
- 'foo Object ( [val] => &amp; <a> &amp; <b> &amp; <c> ü ) ',
- 'Predecode illegal entities while keeping properly encoded ones'
+ '<foo>x &uuml; y</foo>',
+ utf8_decode('foo Object ( [val] => x ü y ) '),
+ 'Manual encoding override',
+ "",
+ array('encoding' => "iso-8859-1")
);
match(
'<foo>&amp;amp; &lt;a&gt; &#38;amp; &#60;b&#62; &#x26;amp; &#x3C;c&#x3E; &uuml;</foo>',
- utf8_encode('foo Object ( [val] => &amp; <a> &amp; <b> &amp; <c> ü ) '),
- 'Predecode illegal entities while keeping properly encoded ones (UTF-8)',
- "",
- array('encoding' => "UTF-8")
+ 'foo Object ( [val] => &amp; <a> &amp; <b> &amp; <c> ü ) ',
+ 'Predecode illegal entities while keeping properly encoded ones',
);
+match(
+ '<foo>&amp;amp; &lt;a&gt; &#38;amp; &#60;b&#62; &#x26;amp; &#x3C;c&#x3E; &#xFC;</foo>',
+ utf8_decode('foo Object ( [val] => &amp; <a> &amp; <b> &amp; <c> ü ) '),
+ 'Predecode illegal entities while keeping properly encoded ones (iso-8859-1)',
+ "",
+ array('encoding' => "iso-8859-1")
+);
match(
"<foo>a\x05b</foo>",
'foo Object ( [val] => a b ) ',
'Illegal latin 1 character',
"",
- array('encoding' => "ISO-8859-1")
+ array('encoding' => "iso-8859-1")
);
# Test inheritance