forked from phcode-dev/staging.phcode.dev
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathHTMLTokenizer.js
More file actions
1 lines (1 loc) · 11.8 KB
/
HTMLTokenizer.js
File metadata and controls
1 lines (1 loc) · 11.8 KB
1
define(function(require,exports,module){var i=0,TEXT=i++,BEFORE_TAG_NAME=i++,IN_TAG_NAME=i++,BEFORE_CLOSING_TAG_NAME=i++,IN_CLOSING_TAG_NAME=i++,AFTER_CLOSING_TAG_NAME=i++,AFTER_SELFCLOSE_SLASH=i++,BEFORE_ATTRIBUTE_NAME=i++,AFTER_QUOTED_ATTRIBUTE_VALUE=i++,IN_ATTRIBUTE_NAME=i++,AFTER_ATTRIBUTE_NAME=i++,BEFORE_ATTRIBUTE_VALUE=i++,IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES=i++,IN_ATTRIBUTE_VALUE_SINGLE_QUOTES=i++,IN_ATTRIBUTE_VALUE_NO_QUOTES=i++,BEFORE_DECLARATION=i++,IN_DECLARATION=i++,IN_PROCESSING_INSTRUCTION=i++,BEFORE_COMMENT=i++,IN_COMMENT=i++,AFTER_COMMENT_1=i++,AFTER_COMMENT_2=i++,BEFORE_CDATA_1=i++,BEFORE_CDATA_2=i++,BEFORE_CDATA_3=i++,BEFORE_CDATA_4=i++,BEFORE_CDATA_5=i++,BEFORE_CDATA_6=i++,IN_CDATA=i++,AFTER_CDATA_1=i++,AFTER_CDATA_2=i++,BEFORE_SPECIAL=i++,BEFORE_SPECIAL_END=i++,BEFORE_SCRIPT_1=i++,BEFORE_SCRIPT_2=i++,BEFORE_SCRIPT_3=i++,BEFORE_SCRIPT_4=i++,BEFORE_SCRIPT_5=i++,AFTER_SCRIPT_1=i++,AFTER_SCRIPT_2=i++,AFTER_SCRIPT_3=i++,AFTER_SCRIPT_4=i++,AFTER_SCRIPT_5=i++,BEFORE_STYLE_1=i++,BEFORE_STYLE_2=i++,BEFORE_STYLE_3=i++,BEFORE_STYLE_4=i++,AFTER_STYLE_1=i++,AFTER_STYLE_2=i++,AFTER_STYLE_3=i++,AFTER_STYLE_4=i++;function isWhitespace(c){return" "===c||"\t"===c||"\r"===c||"\n"===c}function isLegalInTagName(c){return/[A-Za-z0-9\-]/.test(c)}function isLegalInAttributeName(c){return'"'!==c&&"'"!==c&&"<"!==c&&"="!==c}function isLegalInUnquotedAttributeValue(c){return"<"!==c&&"="!==c}function _clonePos(pos,offset){return pos?{line:pos.line,ch:pos.ch+(offset||0)}:null}function Tokenizer(text){this._state=TEXT,this._buffer=text,this._sectionStart=0,this._sectionStartPos={line:0,ch:0},this._index=0,this._indexPos={line:0,ch:0},this._special=0,this._token=null,this._nextToken=null}Tokenizer.prototype.nextToken=function(){if(this._token=null,this._nextToken){var result=this._nextToken;return this._nextToken=null,result}for(;this._index<this._buffer.length&&!this._token;){var c=this._buffer.charAt(this._index);if(this._state===TEXT)"<"===c&&(this._emitTokenIfNonempty("text"),this._state=BEFORE_TAG_NAME,this._startSection());else if(this._state===BEFORE_TAG_NAME)if("/"===c)this._state=BEFORE_CLOSING_TAG_NAME;else if(">"===c||this._special>0)this._state=TEXT;else if("!"===c)this._state=BEFORE_DECLARATION,this._startSection(1);else if("?"===c)this._state=IN_PROCESSING_INSTRUCTION,this._startSection(1);else if("s"===c||"S"===c)this._state=BEFORE_SPECIAL,this._startSection();else{if(!isLegalInTagName(c)){this._emitSpecialToken("error");break}isWhitespace(c)||(this._state=IN_TAG_NAME,this._startSection())}else if(this._state===IN_TAG_NAME){if("/"===c)this._emitToken("opentagname"),this._emitSpecialToken("selfclosingtag",this._index+2,_clonePos(this._indexPos,2)),this._state=AFTER_SELFCLOSE_SLASH;else if(">"===c)this._emitToken("opentagname"),this._emitSpecialToken("opentagend",this._index+1,_clonePos(this._indexPos,1)),this._state=TEXT,this._startSection(1);else if(isWhitespace(c))this._emitToken("opentagname"),this._state=BEFORE_ATTRIBUTE_NAME;else if(!isLegalInTagName(c)){this._emitSpecialToken("error");break}}else if(this._state===BEFORE_CLOSING_TAG_NAME)if(">"===c)this._state=TEXT;else if(this._special>0){if("s"!==c&&"S"!==c){this._state=TEXT;continue}this._state=BEFORE_SPECIAL_END}else{if(!isLegalInTagName(c)){this._emitSpecialToken("error");break}isWhitespace(c)||(this._state=IN_CLOSING_TAG_NAME,this._startSection())}else if(this._state===IN_CLOSING_TAG_NAME){if(">"===c)this._emitToken("closetag"),this._state=TEXT,this._startSection(1),this._special=0;else if(isWhitespace(c))this._emitToken("closetag"),this._state=AFTER_CLOSING_TAG_NAME,this._special=0;else if(!isLegalInTagName(c)){this._emitSpecialToken("error");break}}else if(this._state===AFTER_CLOSING_TAG_NAME){if(">"===c)this._state=TEXT,this._startSection(1);else if(!isWhitespace(c)){this._emitSpecialToken("error");break}}else if(this._state===AFTER_SELFCLOSE_SLASH){if(">"!==c){this._emitSpecialToken("error");break}this._state=TEXT,this._startSection(1)}else if(this._state===BEFORE_ATTRIBUTE_NAME)if(">"===c)this._state=TEXT,this._emitSpecialToken("opentagend",this._index+1,_clonePos(this._indexPos,1)),this._startSection(1);else if("/"===c)this._emitSpecialToken("selfclosingtag",this._index+2,_clonePos(this._indexPos,2)),this._state=AFTER_SELFCLOSE_SLASH;else{if(!isLegalInAttributeName(c)){this._emitSpecialToken("error");break}isWhitespace(c)||(this._state=IN_ATTRIBUTE_NAME,this._startSection())}else if(this._state===IN_ATTRIBUTE_NAME)if("="===c)this._emitTokenIfNonempty("attribname"),this._state=BEFORE_ATTRIBUTE_VALUE;else if(isWhitespace(c))this._emitTokenIfNonempty("attribname"),this._state=AFTER_ATTRIBUTE_NAME;else{if("/"===c||">"===c){this._emitTokenIfNonempty("attribname"),this._state=BEFORE_ATTRIBUTE_NAME;continue}if(!isLegalInAttributeName(c)){this._emitSpecialToken("error");break}}else if(this._state===AFTER_ATTRIBUTE_NAME)if("="===c)this._state=BEFORE_ATTRIBUTE_VALUE;else{if("/"===c||">"===c){this._state=BEFORE_ATTRIBUTE_NAME;continue}if(!isLegalInAttributeName(c)){this._emitSpecialToken("error");break}isWhitespace(c)||(this._state=IN_ATTRIBUTE_NAME,this._startSection())}else if(this._state===BEFORE_ATTRIBUTE_VALUE)if('"'===c)this._state=IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES,this._startSection(1);else if("'"===c)this._state=IN_ATTRIBUTE_VALUE_SINGLE_QUOTES,this._startSection(1);else{if(!isLegalInUnquotedAttributeValue(c)){this._emitSpecialToken("error");break}isWhitespace(c)||(this._state=IN_ATTRIBUTE_VALUE_NO_QUOTES,this._startSection())}else if(this._state===IN_ATTRIBUTE_VALUE_DOUBLE_QUOTES)'"'===c&&(this._emitToken("attribvalue"),this._state=AFTER_QUOTED_ATTRIBUTE_VALUE);else if(this._state===IN_ATTRIBUTE_VALUE_SINGLE_QUOTES)"'"===c&&(this._state=AFTER_QUOTED_ATTRIBUTE_VALUE,this._emitToken("attribvalue"));else if(this._state===IN_ATTRIBUTE_VALUE_NO_QUOTES){if(">"===c)this._emitToken("attribvalue"),this._emitSpecialToken("opentagend",this._index+1,_clonePos(this._indexPos,1)),this._state=TEXT,this._startSection(1);else if(isWhitespace(c))this._emitToken("attribvalue"),this._state=BEFORE_ATTRIBUTE_NAME;else if(!isLegalInUnquotedAttributeValue(c)){this._emitSpecialToken("error");break}}else if(this._state===AFTER_QUOTED_ATTRIBUTE_VALUE)if(">"===c)this._state=TEXT,this._emitSpecialToken("opentagend",this._index+1,_clonePos(this._indexPos,1)),this._startSection(1);else if("/"===c)this._emitSpecialToken("selfclosingtag",this._index+2,_clonePos(this._indexPos,2)),this._state=AFTER_SELFCLOSE_SLASH;else{if(!isWhitespace(c)){this._emitSpecialToken("error");break}this._state=BEFORE_ATTRIBUTE_NAME}else if(this._state===BEFORE_DECLARATION)this._state="["===c?BEFORE_CDATA_1:"-"===c?BEFORE_COMMENT:IN_DECLARATION;else if(this._state===IN_DECLARATION)">"===c&&(this._emitToken("declaration"),this._state=TEXT,this._startSection(1));else if(this._state===IN_PROCESSING_INSTRUCTION)">"===c&&(this._emitToken("processinginstruction"),this._state=TEXT,this._startSection(1));else if(this._state===BEFORE_COMMENT)"-"===c?(this._state=IN_COMMENT,this._startSection(1)):this._state=IN_DECLARATION;else if(this._state===IN_COMMENT)"-"===c&&(this._state=AFTER_COMMENT_1);else if(this._state===AFTER_COMMENT_1)this._state="-"===c?AFTER_COMMENT_2:IN_COMMENT;else if(this._state===AFTER_COMMENT_2)">"===c?(this._emitToken("comment",this._index-2,_clonePos(this._indexPos,-2)),this._state=TEXT,this._startSection(1)):"-"!==c&&(this._state=IN_COMMENT);else if(this._state===BEFORE_CDATA_1)this._state="C"===c?BEFORE_CDATA_2:IN_DECLARATION;else if(this._state===BEFORE_CDATA_2)this._state="D"===c?BEFORE_CDATA_3:IN_DECLARATION;else if(this._state===BEFORE_CDATA_3)this._state="A"===c?BEFORE_CDATA_4:IN_DECLARATION;else if(this._state===BEFORE_CDATA_4)this._state="T"===c?BEFORE_CDATA_5:IN_DECLARATION;else if(this._state===BEFORE_CDATA_5)this._state="A"===c?BEFORE_CDATA_6:IN_DECLARATION;else if(this._state===BEFORE_CDATA_6)"["===c?(this._state=IN_CDATA,this._startSection(1)):this._state=IN_DECLARATION;else if(this._state===IN_CDATA)"]"===c&&(this._state=AFTER_CDATA_1);else if(this._state===AFTER_CDATA_1)this._state="]"===c?AFTER_CDATA_2:IN_CDATA;else if(this._state===AFTER_CDATA_2)">"===c?(this._emitToken("cdata",this._index-2,_clonePos(this._indexPos,-2)),this._state=TEXT,this._startSection(1)):"]"!==c&&(this._state=IN_CDATA);else if(this._state===BEFORE_SPECIAL)if("c"===c||"C"===c)this._state=BEFORE_SCRIPT_1;else{if("t"!==c&&"T"!==c){this._state=IN_TAG_NAME;continue}this._state=BEFORE_STYLE_1}else if(this._state===BEFORE_SPECIAL_END)1!==this._special||"c"!==c&&"C"!==c?2!==this._special||"t"!==c&&"T"!==c?this._state=TEXT:this._state=AFTER_STYLE_1:this._state=AFTER_SCRIPT_1;else if(this._state===BEFORE_SCRIPT_1){if("r"!==c&&"R"!==c){this._state=IN_TAG_NAME;continue}this._state=BEFORE_SCRIPT_2}else if(this._state===BEFORE_SCRIPT_2){if("i"!==c&&"I"!==c){this._state=IN_TAG_NAME;continue}this._state=BEFORE_SCRIPT_3}else if(this._state===BEFORE_SCRIPT_3){if("p"!==c&&"P"!==c){this._state=IN_TAG_NAME;continue}this._state=BEFORE_SCRIPT_4}else if(this._state===BEFORE_SCRIPT_4){if("t"!==c&&"T"!==c){this._state=IN_TAG_NAME;continue}this._state=BEFORE_SCRIPT_5}else{if(this._state===BEFORE_SCRIPT_5){("/"===c||">"===c||isWhitespace(c))&&(this._special=1),this._state=IN_TAG_NAME;continue}if(this._state===AFTER_SCRIPT_1)this._state="r"===c||"R"===c?AFTER_SCRIPT_2:TEXT;else if(this._state===AFTER_SCRIPT_2)this._state="i"===c||"I"===c?AFTER_SCRIPT_3:TEXT;else if(this._state===AFTER_SCRIPT_3)this._state="p"===c||"P"===c?AFTER_SCRIPT_4:TEXT;else if(this._state===AFTER_SCRIPT_4)this._state="t"===c||"T"===c?AFTER_SCRIPT_5:TEXT;else if(this._state===AFTER_SCRIPT_5){if(">"===c||isWhitespace(c)){this._state=IN_CLOSING_TAG_NAME,this._startSection(-6);continue}this._state=TEXT}else if(this._state===BEFORE_STYLE_1){if("y"!==c&&"Y"!==c){this._state=IN_TAG_NAME;continue}this._state=BEFORE_STYLE_2}else if(this._state===BEFORE_STYLE_2){if("l"!==c&&"L"!==c){this._state=IN_TAG_NAME;continue}this._state=BEFORE_STYLE_3}else if(this._state===BEFORE_STYLE_3){if("e"!==c&&"E"!==c){this._state=IN_TAG_NAME;continue}this._state=BEFORE_STYLE_4}else{if(this._state===BEFORE_STYLE_4){("/"===c||">"===c||isWhitespace(c))&&(this._special=2),this._state=IN_TAG_NAME;continue}if(this._state===AFTER_STYLE_1)this._state="y"===c||"Y"===c?AFTER_STYLE_2:TEXT;else if(this._state===AFTER_STYLE_2)this._state="l"===c||"L"===c?AFTER_STYLE_3:TEXT;else if(this._state===AFTER_STYLE_3)this._state="e"===c||"E"===c?AFTER_STYLE_4:TEXT;else{if(this._state!==AFTER_STYLE_4){console.error("HTMLTokenizer: Encountered unknown state"),this._emitSpecialToken("error");break}if(">"===c||isWhitespace(c)){this._state=IN_CLOSING_TAG_NAME,this._startSection(-5);continue}this._state=TEXT}}}"\n"===c?(this._indexPos.line++,this._indexPos.ch=0):this._indexPos.ch++,this._index++}return this._token||(this._state!==TEXT?this._emitSpecialToken("error"):(this._emitTokenIfNonempty("text"),this._startSection())),this._token},Tokenizer.prototype._startSection=function(offset){offset=offset||0,this._sectionStart=this._index+offset,this._sectionStartPos=_clonePos(this._indexPos,offset)},Tokenizer.prototype._setToken=function(type,index,indexPos){void 0===index&&(index=this._index),void 0===indexPos&&(indexPos=this._indexPos);var token={type:type,contents:-1===this._sectionStart?"":this._buffer.substring(this._sectionStart,index),start:this._sectionStart,end:index,startPos:_clonePos(this._sectionStartPos),endPos:_clonePos(indexPos)};this._token?(this._nextToken&&console.error("HTMLTokenizer: Tried to emit more than two tokens in a single call"),this._nextToken=token):this._token=token},Tokenizer.prototype._emitToken=function(type,index,indexPos){this._setToken(type,index,indexPos),this._sectionStart=-1,this._sectionStartPos=null},Tokenizer.prototype._emitSpecialToken=function(type,index,indexPos){this._sectionStart=-1,this._sectionStartPos=null,this._emitToken(type,index,indexPos)},Tokenizer.prototype._emitTokenIfNonempty=function(type){this._index>this._sectionStart&&this._setToken(type),this._sectionStart=-1,this._sectionStartPos=null},exports.Tokenizer=Tokenizer});