244 lines
6.3 KiB
PHP
244 lines
6.3 KiB
PHP
|
|
<?php
|
||
|
|
/**
|
||
|
|
* SAXY Lite is a non-validating, but lightweight and fast SAX parser for PHP, modelled on the Expat parser
|
||
|
|
* @package saxy-xmlparser
|
||
|
|
* @subpackage saxy-xmlparser-lite
|
||
|
|
* @version 1.0
|
||
|
|
* @copyright (C) 2004 John Heinstein. All rights reserved
|
||
|
|
* @license http://www.gnu.org/copyleft/lesser.html LGPL License
|
||
|
|
* @author John Heinstein <johnkarl@nbnet.nb.ca>
|
||
|
|
* @link http://www.engageinteractive.com/saxy/ SAXY Home Page
|
||
|
|
* SAXY is Free Software
|
||
|
|
**/
|
||
|
|
|
||
|
|
if (!defined('SAXY_INCLUDE_PATH')) {
|
||
|
|
define('SAXY_INCLUDE_PATH', (dirname(__FILE__) . "/"));
|
||
|
|
}
|
||
|
|
|
||
|
|
/** current version of SAXY Lite */
|
||
|
|
define ('SAXY_LITE_VERSION', '1.0');
|
||
|
|
|
||
|
|
/** initial saxy lite parse state, before anything is encountered */
|
||
|
|
define('SAXY_STATE_NONE', 0);
|
||
|
|
/** saxy lite parse state, processing main document */
|
||
|
|
define('SAXY_STATE_PARSING', 1);
|
||
|
|
|
||
|
|
require_once(SAXY_INCLUDE_PATH . 'xml_saxy_shared.php');
|
||
|
|
|
||
|
|
/**
|
||
|
|
* The SAX Parser class
|
||
|
|
*
|
||
|
|
* @package saxy-xmlparser
|
||
|
|
* @subpackage saxy-xmlparser-lite
|
||
|
|
* @author John Heinstein <johnkarl@nbnet.nb.ca>
|
||
|
|
*/
|
||
|
|
class SAXY_Lite_Parser extends SAXY_Parser_Base {
|
||
|
|
/**
|
||
|
|
* Constructor for SAX parser
|
||
|
|
*/
|
||
|
|
function SAXY_Lite_Parser() {
|
||
|
|
$this->SAXY_Parser_Base();
|
||
|
|
$this->state = SAXY_STATE_NONE;
|
||
|
|
} //SAXY_Lite_Parser
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Returns the current version of SAXY Lite
|
||
|
|
* @return Object The current version of SAXY Lite
|
||
|
|
*/
|
||
|
|
function getVersion() {
|
||
|
|
return SAXY_LITE_VERSION;
|
||
|
|
} //getVersion
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Processes the xml prolog, doctype, and any other nodes that exist outside of the main xml document
|
||
|
|
* @param string The xml text to be processed
|
||
|
|
* @return string The preprocessed xml text
|
||
|
|
*/
|
||
|
|
function preprocessXML($xmlText) {
|
||
|
|
//strip prolog
|
||
|
|
$xmlText = trim($xmlText);
|
||
|
|
$total = strlen($xmlText);
|
||
|
|
|
||
|
|
for ($i = 0; $i < $total; $i++) {
|
||
|
|
// if ($xmlText{$i} == '<') {
|
||
|
|
if (substr($xmlText, $i, 1) == '<') {
|
||
|
|
switch ($xmlText{($i + 1)}) {
|
||
|
|
case '?':
|
||
|
|
case '!':
|
||
|
|
break;
|
||
|
|
default:
|
||
|
|
$this->state = SAXY_STATE_PARSING;
|
||
|
|
return (substr($xmlText, $i));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} //preprocessXML
|
||
|
|
|
||
|
|
/**
|
||
|
|
* The controlling method for the parsing process
|
||
|
|
* @param string The xml text to be processed
|
||
|
|
* @return boolean True if parsing is successful
|
||
|
|
*/
|
||
|
|
function parse ($xmlText) {
|
||
|
|
$xmlText = $this->preprocessXML($xmlText);
|
||
|
|
$total = strlen($xmlText);
|
||
|
|
|
||
|
|
for ($i = 0; $i < $total; $i++) {
|
||
|
|
// $currentChar = $xmlText{$i};
|
||
|
|
$currentChar = substr($xmlText, $i, 1);
|
||
|
|
|
||
|
|
switch ($this->state) {
|
||
|
|
case SAXY_STATE_PARSING:
|
||
|
|
|
||
|
|
switch ($currentChar) {
|
||
|
|
case '<':
|
||
|
|
if (substr($this->charContainer, 0, SAXY_CDATA_LEN) == SAXY_SEARCH_CDATA) {
|
||
|
|
$this->charContainer .= $currentChar;
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
$this->parseBetweenTags($this->charContainer);
|
||
|
|
$this->charContainer = '';
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
|
||
|
|
case '>':
|
||
|
|
if ((substr($this->charContainer, 0, SAXY_CDATA_LEN) == SAXY_SEARCH_CDATA) &&
|
||
|
|
!(($this->getCharFromEnd($this->charContainer, 0) == ']') &&
|
||
|
|
($this->getCharFromEnd($this->charContainer, 1) == ']'))) {
|
||
|
|
$this->charContainer .= $currentChar;
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
$this->parseTag($this->charContainer);
|
||
|
|
$this->charContainer = '';
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
|
||
|
|
default:
|
||
|
|
$this->charContainer .= $currentChar;
|
||
|
|
}
|
||
|
|
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return true;
|
||
|
|
} //parse
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Parses an element tag
|
||
|
|
* @param string The interior text of the element tag
|
||
|
|
*/
|
||
|
|
function parseTag($tagText) {
|
||
|
|
$tagText = trim($tagText);
|
||
|
|
$firstChar = $tagText{0};
|
||
|
|
$myAttributes = array();
|
||
|
|
|
||
|
|
switch ($firstChar) {
|
||
|
|
case '/':
|
||
|
|
$tagName = substr($tagText, 1);
|
||
|
|
$this->fireEndElementEvent($tagName);
|
||
|
|
break;
|
||
|
|
|
||
|
|
case '!':
|
||
|
|
$upperCaseTagText = strtoupper($tagText);
|
||
|
|
|
||
|
|
if (strpos($upperCaseTagText, SAXY_SEARCH_CDATA) !== false) { //CDATA Section
|
||
|
|
$total = strlen($tagText);
|
||
|
|
$openBraceCount = 0;
|
||
|
|
$textNodeText = '';
|
||
|
|
|
||
|
|
for ($i = 0; $i < $total; $i++) {
|
||
|
|
// $currentChar = $tagText{$i};
|
||
|
|
$currentChar = substr($tagText, $i, 1);
|
||
|
|
|
||
|
|
if (($currentChar == ']') && ($tagText{($i + 1)} == ']')) {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
else if ($openBraceCount > 1) {
|
||
|
|
$textNodeText .= $currentChar;
|
||
|
|
}
|
||
|
|
else if ($currentChar == '[') { //this won't be reached after the first open brace is found
|
||
|
|
$openBraceCount ++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if ($this->cDataSectionHandler == null) {
|
||
|
|
$this->fireCharacterDataEvent($textNodeText);
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
$this->fireCDataSectionEvent($textNodeText);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
else if (strpos($upperCaseTagText, SAXY_SEARCH_NOTATION) !== false) { //NOTATION node, discard
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
else if (substr($tagText, 0, 2) == '!-') { //comment node, discard
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
break;
|
||
|
|
|
||
|
|
case '?':
|
||
|
|
//Processing Instruction node, discard
|
||
|
|
return;
|
||
|
|
|
||
|
|
default:
|
||
|
|
if ((strpos($tagText, '"') !== false) || (strpos($tagText, "'") !== false)) {
|
||
|
|
$total = strlen($tagText);
|
||
|
|
$tagName = '';
|
||
|
|
|
||
|
|
for ($i = 0; $i < $total; $i++) {
|
||
|
|
// $currentChar = $tagText{$i};
|
||
|
|
$currentChar = substr($tagText, $i, 1);
|
||
|
|
|
||
|
|
if (($currentChar == ' ') || ($currentChar == "\t") ||
|
||
|
|
($currentChar == "\n") || ($currentChar == "\r") ||
|
||
|
|
($currentChar == "\x0B")) {
|
||
|
|
$myAttributes = $this->parseAttributes(substr($tagText, $i));
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
$tagName .= $currentChar;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (strrpos($tagText, '/') == (strlen($tagText) - 1)) { //check $tagText, but send $tagName
|
||
|
|
$this->fireStartElementEvent($tagName, $myAttributes);
|
||
|
|
$this->fireEndElementEvent($tagName);
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
$this->fireStartElementEvent($tagName, $myAttributes);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
if (strpos($tagText, '/') !== false) {
|
||
|
|
$tagText = trim(substr($tagText, 0, (strrchr($tagText, '/') - 1)));
|
||
|
|
$this->fireStartElementEvent($tagText, $myAttributes);
|
||
|
|
$this->fireEndElementEvent($tagText);
|
||
|
|
}
|
||
|
|
else {
|
||
|
|
$this->fireStartElementEvent($tagText, $myAttributes);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
} //parseTag
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Returns the current error code (non-functional for SAXY Lite)
|
||
|
|
* @return int The current error code
|
||
|
|
*/
|
||
|
|
function xml_get_error_code() {
|
||
|
|
return -1;
|
||
|
|
} //xml_get_error_code
|
||
|
|
|
||
|
|
/**
|
||
|
|
* Returns a textual description of the error code (non-functional for SAXY Lite)
|
||
|
|
* @param int The error code
|
||
|
|
* @return string The error message
|
||
|
|
*/
|
||
|
|
function xml_error_string($code) {
|
||
|
|
return "";
|
||
|
|
} //xml_error_string
|
||
|
|
} //SAXY_Lite_Parser
|
||
|
|
?>
|