PHP4.2.0 $GLOBALS['microtime_total'] = 0; $GLOBALS['microtime_prev'] = 0; function microtime_float() { list($usec, $sec) = explode(" ", microtime()); $newtime = ((float)$usec + (float)$sec); if($GLOBALS['microtime_prev']) { $GLOBALS['microtime_total'] += ($newtime-$GLOBALS['microtime_prev']); //echo 'T:'.$GLOBALS['microtime_total'].',d:'.($newtime-$GLOBALS['microtime_prev']); //echo ":
\n"; } $GLOBALS['microtime_prev'] = $newtime; } class TagParser { # main parser class function TagParser() { # Constructor } function Parse(&$task) { # Parses Text for tag-based transformation of task text # remove=1 -> Remove Tags with illegal Content microtime_float(); // fast access $interpreter =& $task->interpreter; $skip = $task->dry; $remove = $task->drop_errtag; // output text is input text $text =& $task->text; // internal state pass $pos_act =& $task->pos_act; $pos_act = 0; $pos_encode_last =& $task->pos_encode_last; $pos_encode_last = 0; // encode as soon as a matching tag is executed $st =& $task->st; $st = Array(); $sti = 0; // stackarr and TopPositionOfStack // scan for candidate of tag $textlen = strlen($text); $pos = 0; while($pos<$textlen) { microtime_float(); // next tag candidate if($interpreter->ParseNext($task)!==TAGPARSER_RET_OK) { break; // terminate event } // tag start detected - no further manipulation of inner functions except offset $tag_start = $pos_act; // verify escape, remove escape if($interpreter->UnEscape($task)==TAGPARSER_RET_REPLACED) { // was escaped, cancel tag, did unescape, continue after pos // no further linear encoding - later on continue; // terminate event } // parse UNescaped STARTORENDTAG-Start [ found $tag = NULL; if($interpreter->ParseTag($tag, $task) !==TAGPARSER_RET_OK) { # ERROR SEPARATE FROM UNSUPPORTED REMOVES $offset = 0; $this->RemoveOrEncode($offset, $task, $tag_start, $tag->tag_end, 'parsetag'); $pos_act += $offset; unset($offset); // opt continue; // continue (if needed) after tag } $tag_end = $tag->tag_end; #echo 'TAG:'.$tag_start.":".$tag_end.":".$tag->name; #echo "\n"; #var_dump($tag); #echo "\n"; // verify tag validity content if($interpreter->CheckTag($task, $tag) !==TAGPARSER_RET_OK) { # ERROR SEPARATE FROM UNSUPPORTED REMOVES # we have found a syntactically correct tag, check semantics or remove # ERROR ENCODE WOULD BE WRONG!! (DOUBLE ENCODE LATER ON) $offset = 0; $this->RemoveOrEncode($offset, $task, $tag_start, $tag_end, 'checktag'); $pos_act += $offset; unset($offset); // opt continue; // continue (if needed) after tag } // now realtag! // linear encode till current tag $encode_len = $tag_start-$pos_encode_last; $textnew = ''; if(!$skip && ($task->interpreter->Encode($textnew, $task , substr($text, $pos_encode_last, $encode_len), 'text') !==TAGPARSER_RET_NOTHING)) { // Replaced $encode_diff = strlen($textnew)-$encode_len; $text = substr($text, 0, $pos_encode_last) .$textnew.substr($text, $tag_start); $tag->Offset($encode_diff); $tag_start += $encode_diff; $tag_end += $encode_diff; $pos_act += $encode_diff; unset($encode_diff); //opt #echo 'ENCODE:'.$pos_encode_last.":".$tag_start.":"; #echo "\n"; } unset($textnew); // opt $pos_encode_last = $tag_start; unset($encode_len); //opt // tag length $tag_len = $tag_end-$tag_start+1; // [<5x<6]<7 7-5=2 // go tag events #echo 'CLASS:'.get_class($tag); #echo "\n"; #if(get_class($tag)=='ParserEventTagEnd') { #if(strget_class($tag)=='ParserEventTagEnd') { if(is_a($tag, 'ParserEventTagEnd')) { // ENDTAG found $i=$sti-1; // seek tag on stack while($i>=0) { $temp = $st[$i]; if($temp->name==$tag->name) { // $i representing index of starting tag break; } $i--; } unset($temp); // opt if($i==-1) { // Tag not on Stack -> Ignore // endtag without start -- illegal in any case - no event $err = new ParserErrorContext('parser.err.tag.nostart'); $err->GrabContext($task, $tag); $task->ErrorPush($err); unset($err); //opt if($remove) { // remove tag, continue on prev tagstart $text = substr($text, 0, $tag_start).substr($text, $tag_end+1); $pos_act = $tag_start; } else { // tag wrong, linear encoding follows! continue parsing after $pos_act = $tag_end+1; } // option would be to encode it as tagremove continue; } // Tag on Stack at Pos $i -> reduce stack to $i IF needed! while($sti>($i+1)) { // pop top --$sti; $starttag =& $st[$sti]; $starttag_len = $starttag->tag_end-$starttag->tag_start+1; // late event $tag_new = NULL; if($interpreter->TagSingleLate($tag_new, $task, $starttag) !==TAGPARSER_RET_NOTHING) { if($skip) { continue; } // tag replacement $templen = strlen($tag_new)-$starttag_len; $text = substr($text, 0, $starttag->tag_start) .$tag_new.substr($text, $starttag->tag_end+1); // marks are always behind tag! $tag->Offset($templen); $tag_start += $templen; $tag_end += $templen; $pos_act += $templen; unset($templen); //opt continue; } else { // bad tag on stack (open tag only) $err = new ParserErrorContext('parser.err.tag.remain'); $err->GrabContext($task, $starttag); $task->ErrorPush($err); unset($err); //opt $offset = 0; $this->RemoveOrEncode($offset, $task, $starttag->tag_start, $starttag->tag_end, 'unsupported'); // was tag from stack - so all indices are behind $tag->Offset($offset); $tag_start += $offset; $tag_end += $offset; $pos_encode_last += $offset; $pos_act += $offset; unset($offset); //opt continue; // continue (if needed) after tag } unset($starttag, $starttag_len); //opt } unset($i); //opt // Pop Top-Element (Actual) unset($st[$sti]); //opt $sti--; $starttag =& $st[$sti]; // TRY STD-&EXT-TAG-REPLACEMENT OR KILL TAGS (START & END) $tag_new = $tag_new_start = $tag_new_end = NULL; if($task->interpreter->TagStandard($tag_new_start, $tag_new_end, $task , $starttag) !==TAGPARSER_RET_NOTHING) { if($skip) { continue; } // length in between tags $midlen = $tag_start-$starttag->tag_end-1; $text = substr($text, 0, $starttag->tag_start).$tag_new_start .substr($text, $starttag->tag_end+1, $midlen) .$tag_new_end.substr($text, $tag_end+1); // To Starttag End $totallen = strlen($tag_new_start)+$midlen+strlen($tag_new_end); $pos_act = $starttag->tag_start+$totallen; // linear encoding continue after $pos_encode_last = $pos_act; unset($midlen, $totallen); //opt } else if($task->interpreter->TagExtended($tag_new, $task, $starttag, substr($text, $starttag->tag_end+1, $tag_start-$starttag->tag_end-1)) !==TAGPARSER_RET_NOTHING) { if($skip) { continue; } $text = substr($text, 0, $starttag->tag_start) .$tag_new.substr($text, $tag_end+1); $templen = strlen($tag_new); // linear encoding continue after $pos_encode_last = $pos_act = $starttag->tag_start+$templen; unset($templen); //opt } else { // UNSUPPORTED TAG $err = new ParserErrorContext('parser.err.tag.unsupported'); $err->GrabContext($task, $starttag); $task->ErrorPush($err); unset($err); //opt $offset_start = $offset_end = 0; $this->RemoveOrEncode($offset_end, $task, $tag_start, $tag_end, 'unsupported'); $this->RemoveOrEncode($offset_start, $task, $starttag->tag_start, $starttag->tag_end, 'unsupported'); $tag->Offset($offset_end+$offset_start); $pos_act += $offset_end+$offset_start; $pos_encode_last = $pos_act; unset($offset_end, $offset_start); //opt } unset($starttag); //opt } else { // STARTTAG FOUND $tag_new = NULL; $kind = $task->interpreter->TagSingle($tag_new, $task, $tag); if($kind!==TAGPARSER_RET_NOTHING) { if($skip) { continue; } $text = substr($text, 0, $tag_start).$tag_new.substr($text, $tag_end+1); if($kind==TAGPARSER_RET_RECURSIVE) { // recursive parsing possible, start from prev tagposition! $pos_act = $tag_start; $pos_encode_last = $pos_act; } else { $templen = strlen($tag_new); // NONrecursive parsing $pos_act = $tag_start+$templen; // parse continue after $pos_encode_last = $pos_act; // linear encoding continue after } } else { // PUSH new ELEM $st[$sti] = $tag; // not by ref -- store & forget unset($tag); $sti++; $pos_act = $tag_end+1; // parse continue after $pos_encode_last = $pos_act; // linear encoding continue after } } } // encode last linear part $textnew = ''; if(!$skip && ($task->interpreter->Encode($textnew, $task, substr($text, $pos_encode_last), 'text') !==TAGPARSER_RET_NOTHING)) { $text = substr($text, 0, $pos_encode_last).$textnew; } unset($textnew); //opt // empty stack, stack should be empty while($sti>0) { // pop top --$sti; $starttag =& $st[$sti]; $starttag_len = $starttag->tag_end-$starttag->tag_start+1; #var_dump($starttag); // late event $tag_new = NULL; if($interpreter->TagSingleLate($tag_new, $task, $starttag) !==TAGPARSER_RET_NOTHING) { if($skip) { continue; } // tag replacement $text = substr($text, 0, $starttag->tag_start) .$tag_new.substr($text, $starttag->tag_end+1); // no more marks tag_start ... pos_act } else { // bad tag on stack $err = new ParserErrorContext('parser.err.tag.remain'); $err->GrabContext($task, $starttag); $task->ErrorPush($err); unset($err); //opt $offset = 0; $this->RemoveOrEncode($offset, $task, $starttag->tag_start, $starttag->tag_end, 'unsupported'); // no more marks tag_start ... pos_act unset($offset); //opt } unset($starttag, $starttag_len); //opt } microtime_float(); $task->interpreter->PostProcessing($task); if(count($task->errarr)) { return TAGPARSER_RET_ERR; } return TAGPARSER_RET_OK; } function RemoveOrEncode(&$offset, &$task, $tag_start, $tag_end, $context) { # Remove or encode tag range #echo "ROE:".$tag_start.":".$tag_end.":".$task->text; #echo "\n"; # SKIP handling on request if($task->dry) { return TAGPARSER_RET_OK; } $offset = 0; $text =& $task->text; $tag_len = $tag_end-$tag_start+1; if($task->drop_errtag) { // Remove #echo 'DROP'; #echo "\n"; $text = substr($text, 0, $tag_start) .substr($text, $tag_end+1); $offset = -$tag_len; } else { // encode tag with context $textnew = ''; if($task->interpreter->Encode($textnew, $task , substr($text, $tag_start, $tag_len) , 'tagremove.'.$context) !==TAGPARSER_RET_NOTHING) { // Replaced $text = substr($text, 0, $tag_start) .$textnew.substr($text, $tag_end+1); $offset = strlen($textnew)-$tag_len; } } #echo "ROEE:".$task->text; #echo "\n"; return TAGPARSER_RET_OK; } } class TagInterpreter { # class to define interface and describe event types var $parser = NULL; function TagInterpreter(&$parser) { # Interpreter constructor $this->parser =& $parser; } function &NewTask() { # Builds new Task # RET # object: the task object # TAGPARSER_RET_ERR: error creating $task = new ParserTask($this); return $task; } // All Parse functions need no parent call function ParseNext(&$task) { # Parse next candidate for execution # Candidate could be cancelled later on! # $text: the full text # &$pos_act: position to begin parsing # RET: # TAGPARSER_RET_OK: found candidate # TAGPARSER_RET_ERR: end of parsing, latest occurence return TAGPARSER_RET_ERR; } function UnEscape(&$task) { # Check if current tag is escaped, unescape # Linear encoding is done later on! # This escape only prevents tag parser from early tag identification abort # RET: # TAGPARSER_RET_NOTHING: No Escaping done, continue # TAGPARSER_RET_REPLACED: Escaping done // Default: Say there are no escaped value - treat it as tag return TAGPARSER_RET_NOTHING; } function ParseTag(&$tag, &$task) { # Parse Tag content and create construct # Tag is getting omitted if TAGPARSER_RET_ERR (later encoded as text) # $pos_end MUST be >$task->tag_start on exit # RET: # TAGPARSER_RET_OK continue # TAGPARSER_RET_ERR tag parse error, skip return TAGPARSER_RET_ERR; } function CheckTag(&$task, $tag) { # Check tag content for conformity to drop wrong # RET: # TAGPARSER_RET_OK OK # TAGPARSER_RET_ERR with $err return TAGPARSER_RET_ERR; } // All String functions need consideration for parent calls! function Encode(&$text_new, &$task, $text_old, $context) { # Encode strings for output # Regard interpreter mode if needed # IN: $text_old, $context # As simple strings # OUT: $tag_new # As full replacement # context: 'text' # context: 'tagremove.parsetag', 'tagremove.checktag', 'tagremove.unsupported' # RET: # TAGPARSER_RET_NOTHING: No Encoding done # TAGPARSER_RET_REPLACED: Encoding done return TAGPARSER_RET_NOTHING; } // All Tag functions need no parent calls! function TagSingle(&$tag_new, &$task, $tag) { # Funktion replaces TAGs with corresponding # IN: $tag As object # OUT: $tag_new As full replacement # RET: # TAGPARSER_RET_NOTHING: None done # TAGPARSER_RET_REPLACED: done recursive # TAGPARSER_RET_RECURSIVE: done nonrecursive # NOTE: return 0 means later TagStandard on close event or TagSingleLate return TAGPARSER_RET_NOTHING; } function TagStandard(&$tag_new_start, &$tag_new_end, &$task, $tag) { # Function replaces TAGs with corresponding # IN: $tag As object # OUT: $tag_new_start, $tag_new_end # As tag replacement # RET: # TAGPARSER_RET_NOTHING: None done # TAGPARSER_RET_REPLACED: done recursive # TAGPARSER_RET_RECURSIVE: done nonrecursive (UNIMPLEMENTED) # NOTE: return 0 means TagExtended is checked return TAGPARSER_RET_NOTHING; } function TagExtended(&$tag_new, &$task, $tag, $between) { # Funktion replaces TAGs with corresponding # IN: $tag, between As object, between text # OUT: $tag_new As full replacement # RET: # TAGPARSER_RET_NOTHING: None done # TAGPARSER_RET_REPLACED: done recursive # TAGPARSER_RET_RECURSIVE: done nonrecursive (UNIMPLEMENTED) # NOTE: TAGPARSER_RET_NOTHING means finally unsupported tag return TAGPARSER_RET_NOTHING; } function TagSingleLate(&$tag_new, &$task, $tag) { # Funktion replaces TAGs with corresponding # IN: $tag As object # OUT: $tag_new As full replacement # RET: # TAGPARSER_RET_NOTHING: None done # TAGPARSER_RET_REPLACED: done recursive # TAGPARSER_RET_RECURSIVE: done nonrecursive # NOTE: return TAGPARSER_RET_NOTHING means unsupported return TAGPARSER_RET_NOTHING; } function PostProcessing(&$task) { // Override if post processing is required... } } class ParserTask { # A specific Task to get parsed continuing parser states // object assoc var $interpreter = NULL; // persistent process data var $errarr = array(); var $text = NULL; // config // dry run, disables replacements var $dry = FALSE; // drop errortag drops each tag producing errors var $drop_errtag = FALSE; // run specific // stack var $st = array(); // scan states var $pos_act = 0; // encode as soon as a matching tag is executed var $pos_encode_last = 0; function ParserTask($interpreter) { # DO NOT CALL DIRECTLY: # use $interpreter->NewTask() $this->interpreter =& $interpreter; } function setText($text) { # define new text to parse $this->text = $text; return TAGPARSER_RET_OK; } function Reset() { # Reset this tasks' error state $this->errarr = array(); return TAGPARSER_RET_OK; } function Parse($text=NULL) { # Parse this task if($text!==NULL) { $this->text = $text; } // call parser framework // ERROR REMOVE (, 1)?? return $this->interpreter->parser->Parse($this); } function ErrorPush($err) { # Push err to tasks ErrorArray $this->errarr[] = $err; } function ErrorShow() { # Show all errors of this task reset($this->errarr); while(list($tempkey, $tempval) = each($this->errarr)) { // check interface echo $tempval->Show(); echo '
'; echo "\n"; } } } class ParserRun { # UNUSED, should be inside Parse() instead of $task # A single run of the parser function # encapsulating the run information instead of the resulting persistent object var $task = null; // stack ERROR REALLY? var $st = array(); // scan states var $pos_act = 0; // encode as soon as a matching tag is executed var $pos_encode_last = 0; function ParserRun($task) { $this->task = $task; } } class ParserEvent { # A ParserEvent happens on TagInterpreter::ParseTag var $tag_start = NULL; var $tag_end = NULL; var $name = ''; function ParserEvent($tag_start, $tag_end, $name) { # Constructor $this->tag_start = $tag_start; $this->tag_end = $tag_end; $this->name = $name; } function Offset($offset) { # Move tag by offset (case replace prev tags) $this->tag_start += $offset; $this->tag_end += $offset; } } class ParserEventTag extends ParserEvent { # Representing a starttag with options # Derive from to put tags with special infos on stack var $options = array(); function setOptions($opt) { // clone $this->options = $opt; } } class ParserEventTagEnd extends ParserEvent { # Representing an endtag } class ParserErrorContext { # A dump of a present TagParser::Parse error state with context var $error; var $pos = NULL; var $text = NULL; var $tag = NULL; function ParserErrorContext($error) { # Constructor $this->error = $error; } function GrabContext($task, $tag=NULL) { # Grab Context state of this error via COPY. This is a dump // keep in mind, pos is after encoding, not input related! $this->pos = $task->pos_act; // pos -errtext_neg +errtext_pos $this->text = substr($task->text, $task->pos_act-10, $task->pos_act+20); // snip bigger and store real pos? // tag if($tag!==NULL) { $this->tag = $tag; $this->pos = $tag->tag_start; $tag_len = $tag->tag_end-$tag->tag_start+1; $this->text = substr($task->text, $tag->tag_start, $tag_len); // snip bigger and store real pos? } // input counters instead of output counters?! // could also grab line number? (parse for in parser) } function Show() { # Show this error echo 'ERROR:'.$this->error.' @'.$this->pos.' near "'.$this->text.'"'; } } ?>