summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Schneider2007-07-05 14:10:43 +0000
committerChristian Schneider2007-07-05 14:10:43 +0000
commit3279ee8a5057f915d12cf4a0693ecfa0449d3f93 (patch)
treeb91dd033f5fa0b79c798c6cb25d2eb850ceb26fc
parent43a2a17959a4ed185c89b2d9c1a990a83d02d54e (diff)
downloaditools-3279ee8a5057f915d12cf4a0693ecfa0449d3f93.tar.gz
itools-3279ee8a5057f915d12cf4a0693ecfa0449d3f93.tar.bz2
itools-3279ee8a5057f915d12cf4a0693ecfa0449d3f93.zip
Handle XML files with BOM marker (Byte Order Mark)
-rw-r--r--xml.class5
1 files changed, 4 insertions, 1 deletions
diff --git a/xml.class b/xml.class
index 24a9f23..8cd386a 100644
--- a/xml.class
+++ b/xml.class
@@ -56,6 +56,7 @@ function from_xml($xmldata, $p)
xml_set_element_handler($parser, "start_element", "end_element");
xml_set_character_data_handler($parser, "character_data");
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
+ xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $this->_p['encoding']);
$result = true;
@@ -74,7 +75,9 @@ function from_xml($xmldata, $p)
}
else
{
- if (!preg_match('/^<\?xml/', $xmldata)) # Prepend XML header for charset detection in PHP5
+ # Add header for charset detection (PHP5) if no header/BOM
+ # See http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing
+ if (!preg_match('/^(<\?xml|\xEF\xBB\xBF|\xFE\xFF|\xFF\xFE|\x00\x00\xFE\xFF|\x00\x00\xFF\xFE)/', $xmldata))
$xmldata = '<?xml version="1.0" encoding="' . $this->_p['encoding'] . '"?>' . $xmldata;
$result = xml_parse($parser, $xmldata);