From 3279ee8a5057f915d12cf4a0693ecfa0449d3f93 Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Thu, 5 Jul 2007 14:10:43 +0000 Subject: Handle XML files with BOM marker (Byte Order Mark) --- xml.class | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xml.class b/xml.class index 24a9f23..8cd386a 100644 --- a/xml.class +++ b/xml.class @@ -56,6 +56,7 @@ function from_xml($xmldata, $p) xml_set_element_handler($parser, "start_element", "end_element"); xml_set_character_data_handler($parser, "character_data"); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); + xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $this->_p['encoding']); $result = true; @@ -74,7 +75,9 @@ function from_xml($xmldata, $p) } else { - if (!preg_match('/^<\?xml/', $xmldata)) # Prepend XML header for charset detection in PHP5 + # Add header for charset detection (PHP5) if no header/BOM + # See http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing + if (!preg_match('/^(<\?xml|\xEF\xBB\xBF|\xFE\xFF|\xFF\xFE|\x00\x00\xFE\xFF|\x00\x00\xFF\xFE)/', $xmldata)) $xmldata = '_p['encoding'] . '"?>' . $xmldata; $result = xml_parse($parser, $xmldata); -- cgit v1.2.3