diff options
Diffstat (limited to 'devel-utf8/it_xml.class')
-rw-r--r-- | devel-utf8/it_xml.class | 381 |
1 files changed, 0 insertions, 381 deletions
diff --git a/devel-utf8/it_xml.class b/devel-utf8/it_xml.class deleted file mode 100644 index 0679c69..0000000 --- a/devel-utf8/it_xml.class +++ /dev/null @@ -1,381 +0,0 @@ -<?php -/* -** $Id$ -** -** Copyright (C) 1995-2007 by the ITools Authors. -** This file is part of ITools - the Internet Tools Library -** -** ITools is free software; you can redistribute it and/or modify -** it under the terms of the GNU General Public License as published by -** the Free Software Foundation; either version 3 of the License, or -** (at your option) any later version. -** -** ITools is distributed in the hope that it will be useful, -** but WITHOUT ANY WARRANTY; without even the implied warranty of -** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -** GNU General Public License for more details. -** -** You should have received a copy of the GNU General Public License -** along with this program. If not, see <http://www.gnu.org/licenses/>. -** -** it_xml.class - XML parser / object factory -*/ - -class it_xml -{ -/* - * Add xml snippet as attribute tree to $this - * @param $xmldata XML string or filehandle (result from fopen) to parse - * @param $p associative array - * @param $p['forcearray'] xml tags to ALWAYS return as array - * @param $p['safety'] 2 causes program abort with invalid xml, 1 (default) causes error report, 0 just returns false - * @param $p['encoding'] Output character encoding (e.g. UTF-8, default: ISO-8859-1) - * @param $p['prefix'] Optional prefix for class names - * @param $p['lowercase'] Lowercase all tag and attribute names - * @return XML object tree or null on failure - */ - -function it_xml($xmldata = "", $p = array()) -{ - if ($xmldata) - $this->from_xml($xmldata, $p); -} - -/** - * Factory method to return XML object tree from XML string. Params like constructor - * Example: $root = it_xml::create("<a><b id='2'/></a>", array("b")); - */ - -function create($xmldata, $p = array()) -{ - $xml = new it_xml; - - return $xml->from_xml($xmldata, array('factory' => true) + $p) ? $xml->_root : null; -} - -function from_xml($xmldata, $p) -{ - $this->_p = $p + array('encoding' => "ISO-8859-1", 'safety' => 1); - $this->_arrayforce = array_flip((array)$this->_p['forcearray']); - $this->_stack = array(); - unset($this->error); - $parser = xml_parser_create($this->_p['encoding']); - xml_set_object($parser, $this); - xml_set_element_handler($parser, "start_element", "end_element"); - xml_set_character_data_handler($parser, "character_data"); - xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); - xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $this->_p['encoding']); - - $result = true; - - if (is_resource($xmldata)) - { - while ($result && !feof($xmldata)) - { - $data = fread($xmldata, 1024 * 1024); - while (!feof($xmldata) && preg_match('/[\x80-\xff]$/', $data)) # Make sure end of chunk is not in the middle of a UTF8 character - $data .= fread($xmldata, 1); - - $xmlorig = $data; # Do not append to keep memory footprint down! - list($data, $isutf8) = $this->_sanitize($data, $isutf8); - $result = xml_parse($parser, $data); - } - - if ($result) - $result = xml_parse($parser, "", true); - } - else - { - $xmlorig = $xmldata; - list($xmldata, $isutf8) = $this->_sanitize($xmldata); - $result = xml_parse($parser, $xmldata); - } - - if (!$result) - $this->error .= sprintf("it_xml error: %s at line %d", xml_error_string(xml_get_error_code($parser)), xml_get_current_line_number($parser)); - - if ($this->error) - { - if ($this->_p['safety'] >= 2) - it::fatal(array('title' => $this->error, 'body' => $xmlorig)); - else if ($this->_p['safety'] >= 1) - it::error(array('title' => $this->error, 'body' => $xmlorig)); - - if ($this->_p['factory']) - $GLOBALS['IT_XML_ERROR'] = $this->error; - } - - unset($this->_arrayforce, $this->_p['safety'], $this->_p['factory'], $this->_stack); - xml_parser_free($parser); - - return empty($this->error); -} - -# Use various heuristics to fix real-world XML files -function _sanitize($xmldata, $isutf8 = null) -{ - if (!isset($isutf8)) # Check if we already decided on charset yet - { - $xmldata = ltrim($xmldata); - - # Add header for charset detection (PHP5) if no header/BOM - # See http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing - if (!preg_match('/^(<\?xml|\xEF\xBB\xBF|\xFE\xFF|\xFF\xFE|\x00\x00\xFE\xFF|\x00\x00\xFF\xFE)/', $xmldata)) - $xmldata = '<?xml version="1.0" encoding="' . $this->_p['encoding'] . '"?>' . $xmldata; - - $isutf8 = (!preg_match('/^<\?xml[^>]* encoding=/i', $xmldata) || preg_match('/^<\?xml[^>]* encoding=.UTF-8/i', $xmldata)); - } - - # Decode illegal entities but protect semantically important ones - $xmldata = html_entity_decode(preg_replace('/&(amp|lt|gt|#38|#60|#62|#x26|#x3C|#x3E);/i', '&$1;', $xmldata), ENT_NOQUOTES, $this->_p['encoding']); - - # If should be utf-8 and can't be decoded as such, fix it, even if mixed between both - if ($isutf8 && preg_match('/[^\x80-\xff][\x80-\xff][^\x80-\xff]/', $xmldata)) - $xmldata = preg_replace_callback('/[\x80-\xff]{1,4}/', function($m) { return it_xml::_utf8_fix($m[0]); }, $xmldata); - - # If not utf-8, remove characters illegal for latin-1 - if (!$isutf8 && preg_match('/[\x00-\x08\x0b-\x0c\x0e-\x1f\x80-\x9f]/', $xmldata)) - $xmldata = it_html::latinize($xmldata); - - return array($xmldata, $isutf8); -} - -# Encode non-utf8 characters in a string, leave utf8 alone -function _utf8_fix($str) -{ - return preg_match('/^([\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]|[\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf])$/', $str) ? $str : utf8_encode($str); -} - -function consume(/* $p */) -{ - return false; -} - -/** - * Utility function which recursively flattens container into array - * @param with_attr if true, include attributes in result array - * @return Array with key/values-pairs for vals and optionally attrs of sub-objects - */ -function flatten($with_attr = false) -{ - $result = $with_attr ? (array)$this->attr : array(); - - foreach ($this->elements() as $key => $values) - { - if (is_array($values)) - { - $result[$key] = array(); - - foreach ($values as $value) - $result[$key][] = isset($value->val) || !get_object_vars($value) ? $value->val : $value->flatten($with_attr); - } - else - $result[$key] = isset($values->val) || !get_object_vars($values) ? $values->val : $values->flatten($with_attr); # don' take recursion if val is set or object is empty - } - - return $result; -} - -# Strip namespace and convert extra characters to _ -function _make_identifier($name) -{ - $id = preg_replace(array('/^.*:/', '/\W/'), array('', '_'), $name); - return $this->_p['lowercase'] ? strtolower($id) : $id; -} - -function start_element($dummy_parser, $name, $attrs) -{ - $name = $this->_p['prefix'] . $this->_make_identifier($name); - - if (!class_exists($name)) - eval("class $name extends it_xml {}"); # Extending the base class caused problems with tel_xmlentry - - if (!$this->_stack && !$this->_p['factory']) - array_unshift($this->_stack, $this); - else - array_unshift($this->_stack, new $name); - - if (!is_a($this->_stack[0], "it_xml")) - $this->error .= "Class $name used in it_xml for tag $name does not extend it_xml: Name clash?\n"; - - if ($attrs) - { - foreach ($attrs as $key => $val) - $this->_stack[0]->attr[$this->_make_identifier($key)] = $val; - } -} - -function end_element($dummy_parser, $name) -{ - $name = $this->_make_identifier($name); - - if (!$this->_stack[0]->consume($this->_p)) - { - if (is_array($this->_stack[1]->$name)) - array_push($this->_stack[1]->$name, $this->_stack[0]); - else if (isset($this->_stack[1]->$name)) - { - if (is_a($this->_stack[1]->$name, "it_xml")) - $this->_stack[1]->$name = array($this->_stack[1]->$name, &$this->_stack[0]); - else - $this->error .= "Variable $name used in it_xml for tag {$name} is already used in object: Name clash?\n"; - } - else if (isset($this->_arrayforce[$name])) - $this->_stack[1]->$name = array(&$this->_stack[0]); - else - $this->_stack[1]->$name =& $this->_stack[0]; - } - - $node = array_shift($this->_stack); - - if ($this->_p['factory']) - $this->_root = $node; -} - -function character_data($dummy_parser, $cdata) -{ - if (isset($this->_stack[0]->val) || (trim($cdata) !== "")) - $this->_stack[0]->val .= $cdata; -} - -/** - * Convert XML object tree back to string representation - * @param $p Associative array - * tag Name of root XML tag (otherwise determined by get_class) - * indent String to indent with (default: " ") - * lineseparator String to start new line with (default: "\n") - * prefix String to prefix class names with (default: "") - * sort value true causes to_xml() to output elements sorted - * @return XML string representation of this object - */ -function to_xml($p = array()) -{ - $p += array( - 'tag' => substr(get_class($this), strlen($p['prefix'])), - 'indent' => " ", - 'lineseparator' => "\n", - 'prefix' => "", - 'sort' => false, - 'stripempty' => false, # remove xml tags without content - ); - - $currenttag = $p['tag']; - $tag = $p['nextindentation'] . "<$currenttag"; - - if (empty($p['nextindentation'])) - $p['nextindentation'] = $p['lineseparator']; - - $currentindentation = $p['nextindentation']; - $p['nextindentation'] .= $p['indent']; - - foreach ((array)$this->attr as $key => $value) - $tag .= " $key=\"" . htmlspecialchars($value, ENT_COMPAT, ini_get('default_charset')) . '"'; - - $vars = get_object_vars($this); - - if ($p['sort']) - ksort($vars); - - foreach ($vars as $key => $values) - { - $p['tag'] = $key; # Manually set as PHP4 lowercases get_class() - - if (!preg_match('/^(_|attr$)/', $key)) # Skip implementation detail fields - { - if (is_array($values)) - { - foreach ($values as $value) - { - if (is_a($value, "it_xml")) - { - $body .= $value->to_xml($p); - $indentation = $currentindentation; - } - } - } - else if (is_object($values)) - { - if (is_a($values, "it_xml")) - { - $body .= $values->to_xml($p); - $indentation = $currentindentation; - } - } - else if ($key == 'val') - $body .= htmlspecialchars($values, ENT_COMPAT, ini_get('default_charset')); - } - } - - return isset($body) ? "$tag>$body$indentation</$currenttag>" : ($p['stripempty'] ? "" : "$tag/>"); -} - -/** - * Get array containing child elements (key value pairs) - * @return Array containing child elements - */ -function elements() -{ - $result = array(); - - foreach (get_object_vars($this) as $key => $value) - { - if (is_a($value, "it_xml") || (is_array($value) && is_a($value[0], "it_xml"))) - $result[$key] =& $this->$key; - } - - return $result; -} - -function error() -{ - return is_a($this, "it_xml") ? $this->error : $GLOBALS['IT_XML_ERROR']; -} - -/** - * Set nodes as subnode of current node - * @param values assoc array of key => value pairs. May contain associative or numeric subarrays. - */ - -function set($vals, $parentprefix = null) -{ - $prefix = $parentprefix ? $parentprefix : $this->_p['prefix']; - - if (is_array($vals)) - { - foreach ($vals as $key => $val) - { - $classname = $prefix . $this->_make_identifier($key); - if (!class_exists($classname)) - eval("class $classname extends it_xml {}"); - - if (is_array($val) && $val === array_values($val)) - { - $arr = (array)$this->$key; - foreach ($val as $i => $v) - { - if (!is_a($arr[$i], "it_xml")) - $arr[$i] = new $classname; - $arr[$i]->set($v, $prefix); - } - $this->$key = $arr; - } - else - { - if (!is_a($this->$key, "it_xml")) - $this->$key = new $classname; - $this->$key->set($val, $prefix); - } - } - } - else - $this->val = $vals; -} - -function __toString() -{ - return (string)$this->val; -} - -} - -?> |