<?php
/*
**	Copyright (C) 1995-2007 by the ITools Authors.
**	This file is part of ITools - the Internet Tools Library
**
**	ITools is free software; you can redistribute it and/or modify
**	it under the terms of the GNU General Public License as published by
**	the Free Software Foundation; either version 3 of the License, or
**	(at your option) any later version.
**
**	ITools is distributed in the hope that it will be useful,
**	but WITHOUT ANY WARRANTY; without even the implied warranty of
**	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
**	GNU General Public License for more details.
**
**	You should have received a copy of the GNU General Public License
**	along with this program.  If not, see <http://www.gnu.org/licenses/>.
**
**	it_xml.class - XML parser / object factory
*/

class it_xml
{
/* 
 * Add xml snippet as attribute tree to $this
 * @param $xmldata XML string or filehandle (result from fopen) to parse
 * @param $p associative array
 * @param $p['forcearray'] xml tags to ALWAYS return as array
 * @param $p['safety'] 2 causes program abort with invalid xml, 1 (default) causes error report, 0 just returns false
 * @param $p['encoding'] Output character encoding (utf-8, iso-8859-1 or us-ascii, default: ini_get('default_charset')
 * @param $p['prefix'] Optional prefix for class names
 * @param $p['lowercase'] Lowercase all tag and attribute names
 * @return XML object tree or null on failure
 */

function it_xml($xmldata = "", $p = array())
{
	if ($xmldata)
		$this->from_xml($xmldata, $p);
}

/**
 * Factory method to return XML object tree from XML string. Params like constructor
 * Example: $root = it_xml::create("<a><b id='2'/></a>", array("b"));
 */

static function create($xmldata, $p = array())
{
	$xml = new it_xml;
	return $xml->from_xml($xmldata, array('factory' => true) + $p) ? $xml->_root : null;
}

function from_xml($xmldata, $p)
{
	$this->_p = $p + array('encoding' => ini_get('default_charset'), 'safety' => 1);
	$this->_arrayforce = array_flip((array)$this->_p['forcearray']);
	$this->_stack = array();
	unset($this->error);
	$parser = xml_parser_create();
	xml_set_object($parser, $this);
	xml_set_element_handler($parser, "start_element", "end_element");
	xml_set_character_data_handler($parser, "character_data");
	xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
	xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $this->_p['encoding']);

	$result = true;

	if (is_resource($xmldata))
	{
		while ($result && !feof($xmldata))
		{
			$data = fread($xmldata, 1024 * 1024);
			while (!feof($xmldata) && preg_match('/[\x80-\xff]$/', $data))	# Make sure end of chunk is not in the middle of a UTF8 character
				$data .= fread($xmldata, 1);

			$xmlorig = $data;	# Do not append to keep memory footprint down!
			list($data, $isutf8) = $this->_sanitize($data, $isutf8);
			$result = xml_parse($parser, $data);
		}

		if ($result)
			$result = xml_parse($parser, "", true);
	}
	else
	{
		$xmlorig = $xmldata;
		list($xmldata, $isutf8) = $this->_sanitize($xmldata);
		$result = xml_parse($parser, $xmldata, true);
	}

	if (!$result)
		$this->error .= sprintf("it_xml error: %s at line %d", xml_error_string(xml_get_error_code($parser)), xml_get_current_line_number($parser));

	if ($this->error)
	{
		if ($this->_p['safety'] >= 2)
			it::fatal(array('title' => $this->error, 'body' => $xmlorig));
		else if ($this->_p['safety'] >= 1)
			it::error(array('title' => $this->error, 'body' => $xmlorig));

		if ($this->_p['factory'])
			$GLOBALS['IT_XML_ERROR'] = $this->error;
	}

	unset($this->_arrayforce, $this->_p['safety'], $this->_p['factory'], $this->_stack);
	xml_parser_free($parser);

	return empty($this->error);
}

# Use various heuristics to fix real-world XML files
function _sanitize($xmldata, $isutf8 = null)
{
	if (!isset($isutf8))	# Check if we already decided on charset yet
	{
		$xmldata = ltrim($xmldata);

		# Add header for charset detection (PHP5) if no header/BOM
		# See http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing
		if (!preg_match('/^(<\?xml|\xEF\xBB\xBF|\xFE\xFF|\xFF\xFE|\x00\x00\xFE\xFF|\x00\x00\xFF\xFE)/', $xmldata))
			$xmldata = '<?xml version="1.0" encoding="' . $this->_p['encoding'] . '"?>' . $xmldata;

		$isutf8 = (!preg_match('/^<\?xml[^>]* encoding=/i', $xmldata) || preg_match('/^<\?xml[^>]* encoding=.utf-?8/i', $xmldata));
	}

	# Decode illegal entities but protect semantically important ones
	$xmldata = html_entity_decode(preg_replace('/&(amp|lt|gt|#38|#60|#62|#x26|#x3C|#x3E);/i', '&amp;$1;', $xmldata), ENT_NOQUOTES, $this->_p['encoding']);

	# If should be utf-8 and can't be decoded as such, fix it, even if mixed between both
	if ($isutf8 && preg_match('/[^\x80-\xff][\x80-\xff][^\x80-\xff]/', $xmldata))
		$xmldata = preg_replace_callback('/[\x80-\xff]{1,4}/', function($m) { return it_xml::_utf8_fix($m[0]); }, $xmldata);

	# If not utf-8, remove characters illegal for latin-1
	if (!$isutf8 && preg_match('/[\x00-\x08\x0b-\x0c\x0e-\x1f\x80-\x9f]/', $xmldata))
		$xmldata = it_html::_cleanup($xmldata, $isutf8 ? "utf-8" : "iso-8859-1");

	return array($xmldata, $isutf8);
}

# Encode non-utf8 characters in a string, leave utf8 alone
static function _utf8_fix($str)
{
	return preg_match('/^([\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]|[\xf0-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf])$/', $str) ? $str : utf8_encode($str);
}

function consume(/* $p */)
{
	return false;
}

/**
 * Utility function which recursively flattens container into array
 * @param with_attr if true, include attributes in result array
 * @return Array with key/values-pairs for vals and optionally attrs of sub-objects
 */
function flatten($with_attr = false)
{
	$result = $with_attr ? (array)$this->attr : array();

	foreach ($this->elements() as $key => $values)
	{
		if (is_array($values))
		{
			$result[$key] = array();

			foreach ($values as $value)
				$result[$key][] = isset($value->val) || !get_object_vars($value) ? $value->val : $value->flatten($with_attr);
		}
		else
			$result[$key] = isset($values->val) || !get_object_vars($values) ? $values->val : $values->flatten($with_attr);	# don' take recursion if val is set or object is empty
	}

	return $result;
}

# Strip namespace and convert extra characters to _
function _make_identifier($name)
{
	$id = preg_replace(array('/^.*:/', '/\W/'), array('', '_'), $name);
	return $this->_p['lowercase'] ? strtolower($id) : $id;
}

function start_element($dummy_parser, $name, $attrs)
{
	$name = $this->_p['prefix'] . $this->_make_identifier($name);

	if (!class_exists($name))
		eval("class $name extends it_xml {}");	# Extending the base class caused problems with tel_xmlentry

	if (!$this->_stack && !$this->_p['factory'])
		array_unshift($this->_stack, $this);
	else
		array_unshift($this->_stack, new $name);

	if (!is_a($this->_stack[0], "it_xml"))
		$this->error .= "Class $name used in it_xml for tag $name does not extend it_xml: Name clash?\n";

	if ($attrs)
	{
		foreach ($attrs as $key => $val)
			$this->_stack[0]->attr[$this->_make_identifier($key)] = $val;
	}
}

function end_element($dummy_parser, $name)
{
	$name = $this->_make_identifier($name);

	if (!$this->_stack[0]->consume($this->_p))
	{
		if (is_array($this->_stack[1]->$name))
			array_push($this->_stack[1]->$name, $this->_stack[0]);
		else if (isset($this->_stack[1]->$name))
		{
			if (is_a($this->_stack[1]->$name, "it_xml"))
				$this->_stack[1]->$name = array($this->_stack[1]->$name, &$this->_stack[0]);
			else
				$this->error .= "Variable $name used in it_xml for tag {$name} is already used in object: Name clash?\n";
		}
		else if (isset($this->_arrayforce[$name]))
			$this->_stack[1]->$name = array(&$this->_stack[0]);
		else
			$this->_stack[1]->$name =& $this->_stack[0];
	}

	$node = array_shift($this->_stack);

	if ($this->_p['factory'])
		$this->_root = $node;
}

function character_data($dummy_parser, $cdata)
{
	if (isset($this->_stack[0]->val) || (trim($cdata) !== ""))
		$this->_stack[0]->val .= $cdata;
}

/**
 * Convert XML object tree back to string representation
 * @param $p['tag'] Name of root XML tag (otherwise determined by get_class)
 * @param $p['indent'] String to indent with (default: "  ")
 * @param $p['lineseparator'] String to start new line with (default: "\n")
 * @param $p['prefix'] String to prefix class names with (default: "")
 * @param $p['sort'] value true causes to_xml() to output elements sorted
 * @return XML string representation of this object (only attributes with is_a == it_xml and no leading _)
 */
function to_xml($p = array())
{
	$p += array(
		'tag' => substr(get_class($this), strlen($p['prefix'])),
		'indent' => "  ",
		'lineseparator' => "\n",
		'prefix' => "",
		'sort' => false,
		'stripempty' => false, # remove xml tags without content
	);

	$currenttag = $p['tag'];
	$tag = $p['nextindentation'] . "<$currenttag";

	if (empty($p['nextindentation']))
		$p['nextindentation'] = $p['lineseparator'];

	$currentindentation = $p['nextindentation'];
	$p['nextindentation'] .= $p['indent'];

	foreach ((array)$this->attr as $key => $value)
		$tag .= " $key=\"" . htmlspecialchars($value, ENT_COMPAT, ini_get('default_charset')) . '"';

	$vars = get_object_vars($this);

	if ($p['sort'])
		ksort($vars);

	foreach ($vars as $key => $values)
	{
		$p['tag'] = $key;	# Manually set as PHP4 lowercases get_class()

		if (!preg_match('/^(_|attr$)/', $key))	# Skip implementation detail fields
		{
			if (is_array($values))
			{
				foreach ($values as $value)
				{
					if (is_a($value, "it_xml"))
					{
						$body .= $value->to_xml($p);
						$indentation = $currentindentation;
					}
				}
			}
			else if (is_object($values))
			{
				if (is_a($values, "it_xml"))
				{
					$body .= $values->to_xml($p);
					$indentation = $currentindentation;
				}
			}
			else if ($key == 'val')
				$body .= htmlspecialchars($values, ENT_COMPAT, ini_get('default_charset'));
		}
	}

	return isset($body) ? "$tag>$body$indentation</$currenttag>" : ($p['stripempty'] ? "" : "$tag/>");
}

/**
 * Get array containing child elements (key value pairs)
 * @return Array containing child elements
 */
function elements()
{
	$result = array();

	foreach (get_object_vars($this) as $key => $value)
	{
		if (is_a($value, "it_xml") || (is_array($value) && is_a($value[0], "it_xml")))
			$result[$key] =& $this->$key;
	}

	return $result;
}

function error()
{
	return is_a($this, "it_xml") ? $this->error : $GLOBALS['IT_XML_ERROR'];
}

/**
 * Set nodes as subnode of current node
 * @param values assoc array of key => value pairs. May contain associative or numeric subarrays. 
 */

function set($vals, $parentprefix = null)
{
	$prefix = $parentprefix ? $parentprefix : $this->_p['prefix'];

	if (is_array($vals))
	{
		foreach ($vals as $key => $val)
		{
			$classname = $prefix . $this->_make_identifier($key);
			if (!class_exists($classname))
				eval("class $classname extends it_xml {}");

			if (is_array($val) && $val === array_values($val))
			{
				$arr = (array)$this->$key;
				foreach ($val as $i => $v)
				{
					if (!is_a($arr[$i], "it_xml"))
						$arr[$i] = new $classname;
					$arr[$i]->set($v, $prefix);
				}
				$this->$key = $arr;
			}
			else
			{
				if (!is_a($this->$key, "it_xml"))
					$this->$key = new $classname;
				$this->$key->set($val, $prefix);
			}
		}
	}
	else
		$this->val = $vals;
}

function __toString()
{
	return (string)$this->val;
}

} 

?>