TreeBuilder.php 169 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844
  1. <?php
  2. /*
  3. Copyright 2007 Jeroen van der Meer <http://jero.net/>
  4. Copyright 2009 Edward Z. Yang <edwardzyang@thewritingpot.com>
  5. Permission is hereby granted, free of charge, to any person obtaining a
  6. copy of this software and associated documentation files (the
  7. "Software"), to deal in the Software without restriction, including
  8. without limitation the rights to use, copy, modify, merge, publish,
  9. distribute, sublicense, and/or sell copies of the Software, and to
  10. permit persons to whom the Software is furnished to do so, subject to
  11. the following conditions:
  12. The above copyright notice and this permission notice shall be included
  13. in all copies or substantial portions of the Software.
  14. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  15. OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  16. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  17. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  18. CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  19. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  20. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21. */
  22. // Tags for FIX ME!!!: (in order of priority)
  23. // XXX - should be fixed NAO!
  24. // XERROR - with regards to parse errors
  25. // XSCRIPT - with regards to scripting mode
  26. // XENCODING - with regards to encoding (for reparsing tests)
  27. // XDOM - DOM specific code (tagName is explicitly not marked).
  28. // this is not (yet) in helper functions.
  29. class HTML5_TreeBuilder {
  30. public $stack = array();
  31. public $content_model;
  32. private $mode;
  33. private $original_mode;
  34. private $secondary_mode;
  35. private $dom;
  36. // Whether or not normal insertion of nodes should actually foster
  37. // parent (used in one case in spec)
  38. private $foster_parent = false;
  39. private $a_formatting = array();
  40. private $head_pointer = null;
  41. private $form_pointer = null;
  42. private $flag_frameset_ok = true;
  43. private $flag_force_quirks = false;
  44. private $ignored = false;
  45. private $quirks_mode = null;
  46. // this gets to 2 when we want to ignore the next lf character, and
  47. // is decrement at the beginning of each processed token (this way,
  48. // code can check for (bool)$ignore_lf_token, but it phases out
  49. // appropriately)
  50. private $ignore_lf_token = 0;
  51. private $fragment = false;
  52. private $root;
  53. private $scoping = array('applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject');
  54. private $formatting = array('a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u');
  55. // dl and ds are speculative
  56. private $special = array('address','area','article','aside','base','basefont','bgsound',
  57. 'blockquote','body','br','center','col','colgroup','command','dc','dd','details','dir','div','dl','ds',
  58. 'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5',
  59. 'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link',
  60. 'listing','menu','meta','nav','noembed','noframes','noscript','ol',
  61. 'p','param','plaintext','pre','script','select','spacer','style',
  62. 'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
  63. private $pendingTableCharacters;
  64. private $pendingTableCharactersDirty;
  65. // Tree construction modes
  66. const INITIAL = 0;
  67. const BEFORE_HTML = 1;
  68. const BEFORE_HEAD = 2;
  69. const IN_HEAD = 3;
  70. const IN_HEAD_NOSCRIPT = 4;
  71. const AFTER_HEAD = 5;
  72. const IN_BODY = 6;
  73. const IN_CDATA_RCDATA = 7;
  74. const IN_TABLE = 8;
  75. const IN_TABLE_TEXT = 9;
  76. const IN_CAPTION = 10;
  77. const IN_COLUMN_GROUP = 11;
  78. const IN_TABLE_BODY = 12;
  79. const IN_ROW = 13;
  80. const IN_CELL = 14;
  81. const IN_SELECT = 15;
  82. const IN_SELECT_IN_TABLE= 16;
  83. const IN_FOREIGN_CONTENT= 17;
  84. const AFTER_BODY = 18;
  85. const IN_FRAMESET = 19;
  86. const AFTER_FRAMESET = 20;
  87. const AFTER_AFTER_BODY = 21;
  88. const AFTER_AFTER_FRAMESET = 22;
  89. /**
  90. * Converts a magic number to a readable name. Use for debugging.
  91. */
  92. private function strConst($number) {
  93. static $lookup;
  94. if (!$lookup) {
  95. $lookup = array();
  96. $r = new ReflectionClass('HTML5_TreeBuilder');
  97. $consts = $r->getConstants();
  98. foreach ($consts as $const => $num) {
  99. if (!is_int($num)) continue;
  100. $lookup[$num] = $const;
  101. }
  102. }
  103. return $lookup[$number];
  104. }
  105. // The different types of elements.
  106. const SPECIAL = 100;
  107. const SCOPING = 101;
  108. const FORMATTING = 102;
  109. const PHRASING = 103;
  110. // Quirks modes in $quirks_mode
  111. const NO_QUIRKS = 200;
  112. const QUIRKS_MODE = 201;
  113. const LIMITED_QUIRKS_MODE = 202;
  114. // Marker to be placed in $a_formatting
  115. const MARKER = 300;
  116. // Namespaces for foreign content
  117. const NS_HTML = null; // to prevent DOM from requiring NS on everything
  118. const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
  119. const NS_SVG = 'http://www.w3.org/2000/svg';
  120. const NS_XLINK = 'http://www.w3.org/1999/xlink';
  121. const NS_XML = 'http://www.w3.org/XML/1998/namespace';
  122. const NS_XMLNS = 'http://www.w3.org/2000/xmlns/';
  123. // Different types of scopes to test for elements
  124. const SCOPE = 0;
  125. const SCOPE_LISTITEM = 1;
  126. const SCOPE_TABLE = 2;
  127. public function __construct() {
  128. $this->mode = self::INITIAL;
  129. $this->dom = new DOMDocument;
  130. $this->dom->encoding = 'UTF-8';
  131. $this->dom->preserveWhiteSpace = true;
  132. $this->dom->substituteEntities = true;
  133. $this->dom->strictErrorChecking = false;
  134. }
  135. public function getQuirksMode(){
  136. return $this->quirks_mode;
  137. }
  138. // Process tag tokens
  139. public function emitToken($token, $mode = null) {
  140. // XXX: ignore parse errors... why are we emitting them, again?
  141. if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return;
  142. if ($mode === null) $mode = $this->mode;
  143. /*
  144. $backtrace = debug_backtrace();
  145. if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n";
  146. echo $this->strConst($mode);
  147. if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
  148. echo "\n ";
  149. token_dump($token);
  150. $this->printStack();
  151. $this->printActiveFormattingElements();
  152. if ($this->foster_parent) echo " -> this is a foster parent mode\n";
  153. if ($this->flag_frameset_ok) echo " -> frameset ok\n";
  154. */
  155. if ($this->ignore_lf_token) $this->ignore_lf_token--;
  156. $this->ignored = false;
  157. // indenting is a little wonky, this can be changed later on
  158. switch ($mode) {
  159. case self::INITIAL:
  160. /* A character token that is one of U+0009 CHARACTER TABULATION,
  161. * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE */
  162. if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  163. /* Ignore the token. */
  164. $this->ignored = true;
  165. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  166. if (
  167. $token['name'] !== 'html' || !empty($token['public']) ||
  168. !empty($token['system']) || $token !== 'about:legacy-compat'
  169. ) {
  170. /* If the DOCTYPE token's name is not a case-sensitive match
  171. * for the string "html", or if the token's public identifier
  172. * is not missing, or if the token's system identifier is
  173. * neither missing nor a case-sensitive match for the string
  174. * "about:legacy-compat", then there is a parse error (this
  175. * is the DOCTYPE parse error). */
  176. // DOCTYPE parse error
  177. }
  178. /* Append a DocumentType node to the Document node, with the name
  179. * attribute set to the name given in the DOCTYPE token, or the
  180. * empty string if the name was missing; the publicId attribute
  181. * set to the public identifier given in the DOCTYPE token, or
  182. * the empty string if the public identifier was missing; the
  183. * systemId attribute set to the system identifier given in the
  184. * DOCTYPE token, or the empty string if the system identifier
  185. * was missing; and the other attributes specific to
  186. * DocumentType objects set to null and empty lists as
  187. * appropriate. Associate the DocumentType node with the
  188. * Document object so that it is returned as the value of the
  189. * doctype attribute of the Document object. */
  190. if (!isset($token['public'])) $token['public'] = null;
  191. if (!isset($token['system'])) $token['system'] = null;
  192. // XDOM
  193. // Yes this is hacky. I'm kind of annoyed that I can't appendChild
  194. // a doctype to DOMDocument. Maybe I haven't chanted the right
  195. // syllables.
  196. $impl = new DOMImplementation();
  197. // This call can fail for particularly pathological cases (namely,
  198. // the qualifiedName parameter ($token['name']) could be missing.
  199. if ($token['name']) {
  200. $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']);
  201. $this->dom->appendChild($doctype);
  202. } else {
  203. // It looks like libxml's not actually *able* to express this case.
  204. // So... don't.
  205. $this->dom->emptyDoctype = true;
  206. }
  207. $public = is_null($token['public']) ? false : strtolower($token['public']);
  208. $system = is_null($token['system']) ? false : strtolower($token['system']);
  209. $publicStartsWithForQuirks = array(
  210. "+//silmaril//dtd html pro v0r11 19970101//",
  211. "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
  212. "-//as//dtd html 3.0 aswedit + extensions//",
  213. "-//ietf//dtd html 2.0 level 1//",
  214. "-//ietf//dtd html 2.0 level 2//",
  215. "-//ietf//dtd html 2.0 strict level 1//",
  216. "-//ietf//dtd html 2.0 strict level 2//",
  217. "-//ietf//dtd html 2.0 strict//",
  218. "-//ietf//dtd html 2.0//",
  219. "-//ietf//dtd html 2.1e//",
  220. "-//ietf//dtd html 3.0//",
  221. "-//ietf//dtd html 3.2 final//",
  222. "-//ietf//dtd html 3.2//",
  223. "-//ietf//dtd html 3//",
  224. "-//ietf//dtd html level 0//",
  225. "-//ietf//dtd html level 1//",
  226. "-//ietf//dtd html level 2//",
  227. "-//ietf//dtd html level 3//",
  228. "-//ietf//dtd html strict level 0//",
  229. "-//ietf//dtd html strict level 1//",
  230. "-//ietf//dtd html strict level 2//",
  231. "-//ietf//dtd html strict level 3//",
  232. "-//ietf//dtd html strict//",
  233. "-//ietf//dtd html//",
  234. "-//metrius//dtd metrius presentational//",
  235. "-//microsoft//dtd internet explorer 2.0 html strict//",
  236. "-//microsoft//dtd internet explorer 2.0 html//",
  237. "-//microsoft//dtd internet explorer 2.0 tables//",
  238. "-//microsoft//dtd internet explorer 3.0 html strict//",
  239. "-//microsoft//dtd internet explorer 3.0 html//",
  240. "-//microsoft//dtd internet explorer 3.0 tables//",
  241. "-//netscape comm. corp.//dtd html//",
  242. "-//netscape comm. corp.//dtd strict html//",
  243. "-//o'reilly and associates//dtd html 2.0//",
  244. "-//o'reilly and associates//dtd html extended 1.0//",
  245. "-//o'reilly and associates//dtd html extended relaxed 1.0//",
  246. "-//spyglass//dtd html 2.0 extended//",
  247. "-//sq//dtd html 2.0 hotmetal + extensions//",
  248. "-//sun microsystems corp.//dtd hotjava html//",
  249. "-//sun microsystems corp.//dtd hotjava strict html//",
  250. "-//w3c//dtd html 3 1995-03-24//",
  251. "-//w3c//dtd html 3.2 draft//",
  252. "-//w3c//dtd html 3.2 final//",
  253. "-//w3c//dtd html 3.2//",
  254. "-//w3c//dtd html 3.2s draft//",
  255. "-//w3c//dtd html 4.0 frameset//",
  256. "-//w3c//dtd html 4.0 transitional//",
  257. "-//w3c//dtd html experimental 19960712//",
  258. "-//w3c//dtd html experimental 970421//",
  259. "-//w3c//dtd w3 html//",
  260. "-//w3o//dtd w3 html 3.0//",
  261. "-//webtechs//dtd mozilla html 2.0//",
  262. "-//webtechs//dtd mozilla html//",
  263. );
  264. $publicSetToForQuirks = array(
  265. "-//w3o//dtd w3 html strict 3.0//",
  266. "-/w3c/dtd html 4.0 transitional/en",
  267. "html",
  268. );
  269. $publicStartsWithAndSystemForQuirks = array(
  270. "-//w3c//dtd html 4.01 frameset//",
  271. "-//w3c//dtd html 4.01 transitional//",
  272. );
  273. $publicStartsWithForLimitedQuirks = array(
  274. "-//w3c//dtd xhtml 1.0 frameset//",
  275. "-//w3c//dtd xhtml 1.0 transitional//",
  276. );
  277. $publicStartsWithAndSystemForLimitedQuirks = array(
  278. "-//w3c//dtd html 4.01 frameset//",
  279. "-//w3c//dtd html 4.01 transitional//",
  280. );
  281. // first, do easy checks
  282. if (
  283. !empty($token['force-quirks']) ||
  284. strtolower($token['name']) !== 'html'
  285. ) {
  286. $this->quirks_mode = self::QUIRKS_MODE;
  287. } else {
  288. do {
  289. if ($system) {
  290. foreach ($publicStartsWithAndSystemForQuirks as $x) {
  291. if (strncmp($public, $x, strlen($x)) === 0) {
  292. $this->quirks_mode = self::QUIRKS_MODE;
  293. break;
  294. }
  295. }
  296. if (!is_null($this->quirks_mode)) break;
  297. foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) {
  298. if (strncmp($public, $x, strlen($x)) === 0) {
  299. $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
  300. break;
  301. }
  302. }
  303. if (!is_null($this->quirks_mode)) break;
  304. }
  305. foreach ($publicSetToForQuirks as $x) {
  306. if ($public === $x) {
  307. $this->quirks_mode = self::QUIRKS_MODE;
  308. break;
  309. }
  310. }
  311. if (!is_null($this->quirks_mode)) break;
  312. foreach ($publicStartsWithForLimitedQuirks as $x) {
  313. if (strncmp($public, $x, strlen($x)) === 0) {
  314. $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
  315. }
  316. }
  317. if (!is_null($this->quirks_mode)) break;
  318. if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
  319. $this->quirks_mode = self::QUIRKS_MODE;
  320. break;
  321. }
  322. foreach ($publicStartsWithForQuirks as $x) {
  323. if (strncmp($public, $x, strlen($x)) === 0) {
  324. $this->quirks_mode = self::QUIRKS_MODE;
  325. break;
  326. }
  327. }
  328. if (is_null($this->quirks_mode)) {
  329. $this->quirks_mode = self::NO_QUIRKS;
  330. }
  331. } while (false);
  332. }
  333. $this->mode = self::BEFORE_HTML;
  334. } else {
  335. // parse error
  336. /* Switch the insertion mode to "before html", then reprocess the
  337. * current token. */
  338. $this->mode = self::BEFORE_HTML;
  339. $this->quirks_mode = self::QUIRKS_MODE;
  340. $this->emitToken($token);
  341. }
  342. break;
  343. case self::BEFORE_HTML:
  344. /* A DOCTYPE token */
  345. if($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  346. // Parse error. Ignore the token.
  347. $this->ignored = true;
  348. /* A comment token */
  349. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  350. /* Append a Comment node to the Document object with the data
  351. attribute set to the data given in the comment token. */
  352. // XDOM
  353. $comment = $this->dom->createComment($token['data']);
  354. $this->dom->appendChild($comment);
  355. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  356. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  357. or U+0020 SPACE */
  358. } elseif($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  359. /* Ignore the token. */
  360. $this->ignored = true;
  361. /* A start tag whose tag name is "html" */
  362. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') {
  363. /* Create an element for the token in the HTML namespace. Append it
  364. * to the Document object. Put this element in the stack of open
  365. * elements. */
  366. // XDOM
  367. $html = $this->insertElement($token, false);
  368. $this->dom->appendChild($html);
  369. $this->stack[] = $html;
  370. $this->mode = self::BEFORE_HEAD;
  371. } else {
  372. /* Create an html element. Append it to the Document object. Put
  373. * this element in the stack of open elements. */
  374. // XDOM
  375. $html = $this->dom->createElementNS(self::NS_HTML, 'html');
  376. $this->dom->appendChild($html);
  377. $this->stack[] = $html;
  378. /* Switch the insertion mode to "before head", then reprocess the
  379. * current token. */
  380. $this->mode = self::BEFORE_HEAD;
  381. $this->emitToken($token);
  382. }
  383. break;
  384. case self::BEFORE_HEAD:
  385. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  386. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  387. or U+0020 SPACE */
  388. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  389. /* Ignore the token. */
  390. $this->ignored = true;
  391. /* A comment token */
  392. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  393. /* Append a Comment node to the current node with the data attribute
  394. set to the data given in the comment token. */
  395. $this->insertComment($token['data']);
  396. /* A DOCTYPE token */
  397. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  398. /* Parse error. Ignore the token */
  399. $this->ignored = true;
  400. // parse error
  401. /* A start tag token with the tag name "html" */
  402. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  403. /* Process the token using the rules for the "in body"
  404. * insertion mode. */
  405. $this->processWithRulesFor($token, self::IN_BODY);
  406. /* A start tag token with the tag name "head" */
  407. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') {
  408. /* Insert an HTML element for the token. */
  409. $element = $this->insertElement($token);
  410. /* Set the head element pointer to this new element node. */
  411. $this->head_pointer = $element;
  412. /* Change the insertion mode to "in head". */
  413. $this->mode = self::IN_HEAD;
  414. /* An end tag whose tag name is one of: "head", "body", "html", "br" */
  415. } elseif(
  416. $token['type'] === HTML5_Tokenizer::ENDTAG && (
  417. $token['name'] === 'head' || $token['name'] === 'body' ||
  418. $token['name'] === 'html' || $token['name'] === 'br'
  419. )) {
  420. /* Act as if a start tag token with the tag name "head" and no
  421. * attributes had been seen, then reprocess the current token. */
  422. $this->emitToken(array(
  423. 'name' => 'head',
  424. 'type' => HTML5_Tokenizer::STARTTAG,
  425. 'attr' => array()
  426. ));
  427. $this->emitToken($token);
  428. /* Any other end tag */
  429. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG) {
  430. /* Parse error. Ignore the token. */
  431. $this->ignored = true;
  432. } else {
  433. /* Act as if a start tag token with the tag name "head" and no
  434. * attributes had been seen, then reprocess the current token.
  435. * Note: This will result in an empty head element being
  436. * generated, with the current token being reprocessed in the
  437. * "after head" insertion mode. */
  438. $this->emitToken(array(
  439. 'name' => 'head',
  440. 'type' => HTML5_Tokenizer::STARTTAG,
  441. 'attr' => array()
  442. ));
  443. $this->emitToken($token);
  444. }
  445. break;
  446. case self::IN_HEAD:
  447. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  448. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  449. or U+0020 SPACE. */
  450. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  451. /* Insert the character into the current node. */
  452. $this->insertText($token['data']);
  453. /* A comment token */
  454. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  455. /* Append a Comment node to the current node with the data attribute
  456. set to the data given in the comment token. */
  457. $this->insertComment($token['data']);
  458. /* A DOCTYPE token */
  459. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  460. /* Parse error. Ignore the token. */
  461. $this->ignored = true;
  462. // parse error
  463. /* A start tag whose tag name is "html" */
  464. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  465. $token['name'] === 'html') {
  466. $this->processWithRulesFor($token, self::IN_BODY);
  467. /* A start tag whose tag name is one of: "base", "command", "link" */
  468. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  469. ($token['name'] === 'base' || $token['name'] === 'command' ||
  470. $token['name'] === 'link')) {
  471. /* Insert an HTML element for the token. Immediately pop the
  472. * current node off the stack of open elements. */
  473. $this->insertElement($token);
  474. array_pop($this->stack);
  475. // YYY: Acknowledge the token's self-closing flag, if it is set.
  476. /* A start tag whose tag name is "meta" */
  477. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') {
  478. /* Insert an HTML element for the token. Immediately pop the
  479. * current node off the stack of open elements. */
  480. $this->insertElement($token);
  481. array_pop($this->stack);
  482. // XERROR: Acknowledge the token's self-closing flag, if it is set.
  483. // XENCODING: If the element has a charset attribute, and its value is a
  484. // supported encoding, and the confidence is currently tentative,
  485. // then change the encoding to the encoding given by the value of
  486. // the charset attribute.
  487. //
  488. // Otherwise, if the element has a content attribute, and applying
  489. // the algorithm for extracting an encoding from a Content-Type to
  490. // its value returns a supported encoding encoding, and the
  491. // confidence is currently tentative, then change the encoding to
  492. // the encoding encoding.
  493. /* A start tag with the tag name "title" */
  494. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') {
  495. $this->insertRCDATAElement($token);
  496. /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
  497. * A start tag whose tag name is one of: "noframes", "style" */
  498. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  499. ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) {
  500. // XSCRIPT: Scripting flag not respected
  501. $this->insertCDATAElement($token);
  502. // XSCRIPT: Scripting flag disable not implemented
  503. /* A start tag with the tag name "script" */
  504. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
  505. /* 1. Create an element for the token in the HTML namespace. */
  506. $node = $this->insertElement($token, false);
  507. /* 2. Mark the element as being "parser-inserted" */
  508. // Uhhh... XSCRIPT
  509. /* 3. If the parser was originally created for the HTML
  510. * fragment parsing algorithm, then mark the script element as
  511. * "already executed". (fragment case) */
  512. // ditto... XSCRIPT
  513. /* 4. Append the new element to the current node and push it onto
  514. * the stack of open elements. */
  515. end($this->stack)->appendChild($node);
  516. $this->stack[] = $node;
  517. // I guess we could squash these together
  518. /* 6. Let the original insertion mode be the current insertion mode. */
  519. $this->original_mode = $this->mode;
  520. /* 7. Switch the insertion mode to "in CDATA/RCDATA" */
  521. $this->mode = self::IN_CDATA_RCDATA;
  522. /* 5. Switch the tokeniser's content model flag to the CDATA state. */
  523. $this->content_model = HTML5_Tokenizer::CDATA;
  524. /* An end tag with the tag name "head" */
  525. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') {
  526. /* Pop the current node (which will be the head element) off the stack of open elements. */
  527. array_pop($this->stack);
  528. /* Change the insertion mode to "after head". */
  529. $this->mode = self::AFTER_HEAD;
  530. // Slight logic inversion here to minimize duplication
  531. /* A start tag with the tag name "head". */
  532. /* An end tag whose tag name is not one of: "body", "html", "br" */
  533. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
  534. ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' &&
  535. $token['name'] !== 'body' && $token['name'] !== 'br')) {
  536. // Parse error. Ignore the token.
  537. $this->ignored = true;
  538. /* Anything else */
  539. } else {
  540. /* Act as if an end tag token with the tag name "head" had been
  541. * seen, and reprocess the current token. */
  542. $this->emitToken(array(
  543. 'name' => 'head',
  544. 'type' => HTML5_Tokenizer::ENDTAG
  545. ));
  546. /* Then, reprocess the current token. */
  547. $this->emitToken($token);
  548. }
  549. break;
  550. case self::IN_HEAD_NOSCRIPT:
  551. if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  552. // parse error
  553. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  554. $this->processWithRulesFor($token, self::IN_BODY);
  555. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') {
  556. /* Pop the current node (which will be a noscript element) from the
  557. * stack of open elements; the new current node will be a head
  558. * element. */
  559. array_pop($this->stack);
  560. $this->mode = self::IN_HEAD;
  561. } elseif (
  562. ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) ||
  563. ($token['type'] === HTML5_Tokenizer::COMMENT) ||
  564. ($token['type'] === HTML5_Tokenizer::STARTTAG && (
  565. $token['name'] === 'link' || $token['name'] === 'meta' ||
  566. $token['name'] === 'noframes' || $token['name'] === 'style'))) {
  567. $this->processWithRulesFor($token, self::IN_HEAD);
  568. // inverted logic
  569. } elseif (
  570. ($token['type'] === HTML5_Tokenizer::STARTTAG && (
  571. $token['name'] === 'head' || $token['name'] === 'noscript')) ||
  572. ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  573. $token['name'] !== 'br')) {
  574. // parse error
  575. } else {
  576. // parse error
  577. $this->emitToken(array(
  578. 'type' => HTML5_Tokenizer::ENDTAG,
  579. 'name' => 'noscript',
  580. ));
  581. $this->emitToken($token);
  582. }
  583. break;
  584. case self::AFTER_HEAD:
  585. /* Handle the token as follows: */
  586. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  587. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  588. or U+0020 SPACE */
  589. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  590. /* Append the character to the current node. */
  591. $this->insertText($token['data']);
  592. /* A comment token */
  593. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  594. /* Append a Comment node to the current node with the data attribute
  595. set to the data given in the comment token. */
  596. $this->insertComment($token['data']);
  597. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  598. // parse error
  599. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  600. $this->processWithRulesFor($token, self::IN_BODY);
  601. /* A start tag token with the tag name "body" */
  602. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') {
  603. $this->insertElement($token);
  604. /* Set the frameset-ok flag to "not ok". */
  605. $this->flag_frameset_ok = false;
  606. /* Change the insertion mode to "in body". */
  607. $this->mode = self::IN_BODY;
  608. /* A start tag token with the tag name "frameset" */
  609. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') {
  610. /* Insert a frameset element for the token. */
  611. $this->insertElement($token);
  612. /* Change the insertion mode to "in frameset". */
  613. $this->mode = self::IN_FRAMESET;
  614. /* A start tag token whose tag name is one of: "base", "link", "meta",
  615. "script", "style", "title" */
  616. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  617. array('base', 'link', 'meta', 'noframes', 'script', 'style', 'title'))) {
  618. // parse error
  619. /* Push the node pointed to by the head element pointer onto the
  620. * stack of open elements. */
  621. $this->stack[] = $this->head_pointer;
  622. $this->processWithRulesFor($token, self::IN_HEAD);
  623. array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1);
  624. // inversion of specification
  625. } elseif(
  626. ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
  627. ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  628. $token['name'] !== 'body' && $token['name'] !== 'html' &&
  629. $token['name'] !== 'br')) {
  630. // parse error
  631. /* Anything else */
  632. } else {
  633. $this->emitToken(array(
  634. 'name' => 'body',
  635. 'type' => HTML5_Tokenizer::STARTTAG,
  636. 'attr' => array()
  637. ));
  638. $this->flag_frameset_ok = true;
  639. $this->emitToken($token);
  640. }
  641. break;
  642. case self::IN_BODY:
  643. /* Handle the token as follows: */
  644. switch($token['type']) {
  645. /* A character token */
  646. case HTML5_Tokenizer::CHARACTER:
  647. case HTML5_Tokenizer::SPACECHARACTER:
  648. /* Reconstruct the active formatting elements, if any. */
  649. $this->reconstructActiveFormattingElements();
  650. /* Append the token's character to the current node. */
  651. $this->insertText($token['data']);
  652. /* If the token is not one of U+0009 CHARACTER TABULATION,
  653. * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020
  654. * SPACE, then set the frameset-ok flag to "not ok". */
  655. // i.e., if any of the characters is not whitespace
  656. if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) {
  657. $this->flag_frameset_ok = false;
  658. }
  659. break;
  660. /* A comment token */
  661. case HTML5_Tokenizer::COMMENT:
  662. /* Append a Comment node to the current node with the data
  663. attribute set to the data given in the comment token. */
  664. $this->insertComment($token['data']);
  665. break;
  666. case HTML5_Tokenizer::DOCTYPE:
  667. // parse error
  668. break;
  669. case HTML5_Tokenizer::EOF:
  670. // parse error
  671. break;
  672. case HTML5_Tokenizer::STARTTAG:
  673. switch($token['name']) {
  674. case 'html':
  675. // parse error
  676. /* For each attribute on the token, check to see if the
  677. * attribute is already present on the top element of the
  678. * stack of open elements. If it is not, add the attribute
  679. * and its corresponding value to that element. */
  680. foreach($token['attr'] as $attr) {
  681. if(!$this->stack[0]->hasAttribute($attr['name'])) {
  682. $this->stack[0]->setAttribute($attr['name'], $attr['value']);
  683. }
  684. }
  685. break;
  686. case 'base': case 'command': case 'link': case 'meta': case 'noframes':
  687. case 'script': case 'style': case 'title':
  688. /* Process the token as if the insertion mode had been "in
  689. head". */
  690. $this->processWithRulesFor($token, self::IN_HEAD);
  691. break;
  692. /* A start tag token with the tag name "body" */
  693. case 'body':
  694. /* Parse error. If the second element on the stack of open
  695. elements is not a body element, or, if the stack of open
  696. elements has only one node on it, then ignore the token.
  697. (fragment case) */
  698. if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
  699. $this->ignored = true;
  700. // Ignore
  701. /* Otherwise, for each attribute on the token, check to see
  702. if the attribute is already present on the body element (the
  703. second element) on the stack of open elements. If it is not,
  704. add the attribute and its corresponding value to that
  705. element. */
  706. } else {
  707. foreach($token['attr'] as $attr) {
  708. if(!$this->stack[1]->hasAttribute($attr['name'])) {
  709. $this->stack[1]->setAttribute($attr['name'], $attr['value']);
  710. }
  711. }
  712. }
  713. break;
  714. case 'frameset':
  715. // parse error
  716. /* If the second element on the stack of open elements is
  717. * not a body element, or, if the stack of open elements
  718. * has only one node on it, then ignore the token.
  719. * (fragment case) */
  720. if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
  721. $this->ignored = true;
  722. // Ignore
  723. } elseif (!$this->flag_frameset_ok) {
  724. $this->ignored = true;
  725. // Ignore
  726. } else {
  727. /* 1. Remove the second element on the stack of open
  728. * elements from its parent node, if it has one. */
  729. if($this->stack[1]->parentNode) {
  730. $this->stack[1]->parentNode->removeChild($this->stack[1]);
  731. }
  732. /* 2. Pop all the nodes from the bottom of the stack of
  733. * open elements, from the current node up to the root
  734. * html element. */
  735. array_splice($this->stack, 1);
  736. $this->insertElement($token);
  737. $this->mode = self::IN_FRAMESET;
  738. }
  739. break;
  740. // in spec, there is a diversion here
  741. case 'address': case 'article': case 'aside': case 'blockquote':
  742. case 'center': case 'datagrid': case 'details': case 'dir':
  743. case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
  744. case 'header': case 'hgroup': case 'menu': case 'nav':
  745. case 'ol': case 'p': case 'section': case 'ul':
  746. /* If the stack of open elements has a p element in scope,
  747. then act as if an end tag with the tag name p had been
  748. seen. */
  749. if($this->elementInScope('p')) {
  750. $this->emitToken(array(
  751. 'name' => 'p',
  752. 'type' => HTML5_Tokenizer::ENDTAG
  753. ));
  754. }
  755. /* Insert an HTML element for the token. */
  756. $this->insertElement($token);
  757. break;
  758. /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
  759. "h5", "h6" */
  760. case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
  761. /* If the stack of open elements has a p element in scope,
  762. then act as if an end tag with the tag name p had been seen. */
  763. if($this->elementInScope('p')) {
  764. $this->emitToken(array(
  765. 'name' => 'p',
  766. 'type' => HTML5_Tokenizer::ENDTAG
  767. ));
  768. }
  769. /* If the current node is an element whose tag name is one
  770. * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a
  771. * parse error; pop the current node off the stack of open
  772. * elements. */
  773. $peek = array_pop($this->stack);
  774. if (in_array($peek->tagName, array("h1", "h2", "h3", "h4", "h5", "h6"))) {
  775. // parse error
  776. } else {
  777. $this->stack[] = $peek;
  778. }
  779. /* Insert an HTML element for the token. */
  780. $this->insertElement($token);
  781. break;
  782. case 'pre': case 'listing':
  783. /* If the stack of open elements has a p element in scope,
  784. then act as if an end tag with the tag name p had been seen. */
  785. if($this->elementInScope('p')) {
  786. $this->emitToken(array(
  787. 'name' => 'p',
  788. 'type' => HTML5_Tokenizer::ENDTAG
  789. ));
  790. }
  791. $this->insertElement($token);
  792. /* If the next token is a U+000A LINE FEED (LF) character
  793. * token, then ignore that token and move on to the next
  794. * one. (Newlines at the start of pre blocks are ignored as
  795. * an authoring convenience.) */
  796. $this->ignore_lf_token = 2;
  797. $this->flag_frameset_ok = false;
  798. break;
  799. /* A start tag whose tag name is "form" */
  800. case 'form':
  801. /* If the form element pointer is not null, ignore the
  802. token with a parse error. */
  803. if($this->form_pointer !== null) {
  804. $this->ignored = true;
  805. // Ignore.
  806. /* Otherwise: */
  807. } else {
  808. /* If the stack of open elements has a p element in
  809. scope, then act as if an end tag with the tag name p
  810. had been seen. */
  811. if($this->elementInScope('p')) {
  812. $this->emitToken(array(
  813. 'name' => 'p',
  814. 'type' => HTML5_Tokenizer::ENDTAG
  815. ));
  816. }
  817. /* Insert an HTML element for the token, and set the
  818. form element pointer to point to the element created. */
  819. $element = $this->insertElement($token);
  820. $this->form_pointer = $element;
  821. }
  822. break;
  823. // condensed specification
  824. case 'li': case 'dc': case 'dd': case 'ds': case 'dt':
  825. /* 1. Set the frameset-ok flag to "not ok". */
  826. $this->flag_frameset_ok = false;
  827. $stack_length = count($this->stack) - 1;
  828. for($n = $stack_length; 0 <= $n; $n--) {
  829. /* 2. Initialise node to be the current node (the
  830. bottommost node of the stack). */
  831. $stop = false;
  832. $node = $this->stack[$n];
  833. $cat = $this->getElementCategory($node);
  834. // for case 'li':
  835. /* 3. If node is an li element, then act as if an end
  836. * tag with the tag name "li" had been seen, then jump
  837. * to the last step. */
  838. // for case 'dc': case 'dd': case 'ds': case 'dt':
  839. /* If node is a dc, dd, ds or dt element, then act as if an end
  840. * tag with the same tag name as node had been seen, then
  841. * jump to the last step. */
  842. if(($token['name'] === 'li' && $node->tagName === 'li') ||
  843. ($token['name'] !== 'li' && ($node->tagName == 'dc' || $node->tagName === 'dd' || $node->tagName == 'ds' || $node->tagName === 'dt'))) { // limited conditional
  844. $this->emitToken(array(
  845. 'type' => HTML5_Tokenizer::ENDTAG,
  846. 'name' => $node->tagName,
  847. ));
  848. break;
  849. }
  850. /* 4. If node is not in the formatting category, and is
  851. not in the phrasing category, and is not an address,
  852. div or p element, then stop this algorithm. */
  853. if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
  854. $node->tagName !== 'address' && $node->tagName !== 'div' &&
  855. $node->tagName !== 'p') {
  856. break;
  857. }
  858. /* 5. Otherwise, set node to the previous entry in the
  859. * stack of open elements and return to step 2. */
  860. }
  861. /* 6. This is the last step. */
  862. /* If the stack of open elements has a p element in scope,
  863. then act as if an end tag with the tag name p had been
  864. seen. */
  865. if($this->elementInScope('p')) {
  866. $this->emitToken(array(
  867. 'name' => 'p',
  868. 'type' => HTML5_Tokenizer::ENDTAG
  869. ));
  870. }
  871. /* Finally, insert an HTML element with the same tag
  872. name as the token's. */
  873. $this->insertElement($token);
  874. break;
  875. /* A start tag token whose tag name is "plaintext" */
  876. case 'plaintext':
  877. /* If the stack of open elements has a p element in scope,
  878. then act as if an end tag with the tag name p had been
  879. seen. */
  880. if($this->elementInScope('p')) {
  881. $this->emitToken(array(
  882. 'name' => 'p',
  883. 'type' => HTML5_Tokenizer::ENDTAG
  884. ));
  885. }
  886. /* Insert an HTML element for the token. */
  887. $this->insertElement($token);
  888. $this->content_model = HTML5_Tokenizer::PLAINTEXT;
  889. break;
  890. // more diversions
  891. /* A start tag whose tag name is "a" */
  892. case 'a':
  893. /* If the list of active formatting elements contains
  894. an element whose tag name is "a" between the end of the
  895. list and the last marker on the list (or the start of
  896. the list if there is no marker on the list), then this
  897. is a parse error; act as if an end tag with the tag name
  898. "a" had been seen, then remove that element from the list
  899. of active formatting elements and the stack of open
  900. elements if the end tag didn't already remove it (it
  901. might not have if the element is not in table scope). */
  902. $leng = count($this->a_formatting);
  903. for($n = $leng - 1; $n >= 0; $n--) {
  904. if($this->a_formatting[$n] === self::MARKER) {
  905. break;
  906. } elseif($this->a_formatting[$n]->tagName === 'a') {
  907. $a = $this->a_formatting[$n];
  908. $this->emitToken(array(
  909. 'name' => 'a',
  910. 'type' => HTML5_Tokenizer::ENDTAG
  911. ));
  912. if (in_array($a, $this->a_formatting)) {
  913. $a_i = array_search($a, $this->a_formatting, true);
  914. if($a_i !== false) array_splice($this->a_formatting, $a_i, 1);
  915. }
  916. if (in_array($a, $this->stack)) {
  917. $a_i = array_search($a, $this->stack, true);
  918. if ($a_i !== false) array_splice($this->stack, $a_i, 1);
  919. }
  920. break;
  921. }
  922. }
  923. /* Reconstruct the active formatting elements, if any. */
  924. $this->reconstructActiveFormattingElements();
  925. /* Insert an HTML element for the token. */
  926. $el = $this->insertElement($token);
  927. /* Add that element to the list of active formatting
  928. elements. */
  929. $this->a_formatting[] = $el;
  930. break;
  931. case 'b': case 'big': case 'code': case 'em': case 'font': case 'i':
  932. case 's': case 'small': case 'strike':
  933. case 'strong': case 'tt': case 'u':
  934. /* Reconstruct the active formatting elements, if any. */
  935. $this->reconstructActiveFormattingElements();
  936. /* Insert an HTML element for the token. */
  937. $el = $this->insertElement($token);
  938. /* Add that element to the list of active formatting
  939. elements. */
  940. $this->a_formatting[] = $el;
  941. break;
  942. case 'nobr':
  943. /* Reconstruct the active formatting elements, if any. */
  944. $this->reconstructActiveFormattingElements();
  945. /* If the stack of open elements has a nobr element in
  946. * scope, then this is a parse error; act as if an end tag
  947. * with the tag name "nobr" had been seen, then once again
  948. * reconstruct the active formatting elements, if any. */
  949. if ($this->elementInScope('nobr')) {
  950. $this->emitToken(array(
  951. 'name' => 'nobr',
  952. 'type' => HTML5_Tokenizer::ENDTAG,
  953. ));
  954. $this->reconstructActiveFormattingElements();
  955. }
  956. /* Insert an HTML element for the token. */
  957. $el = $this->insertElement($token);
  958. /* Add that element to the list of active formatting
  959. elements. */
  960. $this->a_formatting[] = $el;
  961. break;
  962. // another diversion
  963. /* A start tag token whose tag name is "button" */
  964. case 'button':
  965. /* If the stack of open elements has a button element in scope,
  966. then this is a parse error; act as if an end tag with the tag
  967. name "button" had been seen, then reprocess the token. (We don't
  968. do that. Unnecessary.) (I hope you're right! -- ezyang) */
  969. if($this->elementInScope('button')) {
  970. $this->emitToken(array(
  971. 'name' => 'button',
  972. 'type' => HTML5_Tokenizer::ENDTAG
  973. ));
  974. }
  975. /* Reconstruct the active formatting elements, if any. */
  976. $this->reconstructActiveFormattingElements();
  977. /* Insert an HTML element for the token. */
  978. $this->insertElement($token);
  979. /* Insert a marker at the end of the list of active
  980. formatting elements. */
  981. $this->a_formatting[] = self::MARKER;
  982. $this->flag_frameset_ok = false;
  983. break;
  984. case 'applet': case 'marquee': case 'object':
  985. /* Reconstruct the active formatting elements, if any. */
  986. $this->reconstructActiveFormattingElements();
  987. /* Insert an HTML element for the token. */
  988. $this->insertElement($token);
  989. /* Insert a marker at the end of the list of active
  990. formatting elements. */
  991. $this->a_formatting[] = self::MARKER;
  992. $this->flag_frameset_ok = false;
  993. break;
  994. // spec diversion
  995. /* A start tag whose tag name is "table" */
  996. case 'table':
  997. /* If the Document is not set to quirks mode, and the
  998. * stack of open elements has a p element in scope, then
  999. * act as if an end tag with the tag name "p" had been
  1000. * seen. */
  1001. if($this->quirks_mode !== self::QUIRKS_MODE &&
  1002. $this->elementInScope('p')) {
  1003. $this->emitToken(array(
  1004. 'name' => 'p',
  1005. 'type' => HTML5_Tokenizer::ENDTAG
  1006. ));
  1007. }
  1008. /* Insert an HTML element for the token. */
  1009. $this->insertElement($token);
  1010. $this->flag_frameset_ok = false;
  1011. /* Change the insertion mode to "in table". */
  1012. $this->mode = self::IN_TABLE;
  1013. break;
  1014. /* A start tag whose tag name is one of: "area", "basefont",
  1015. "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
  1016. case 'area': case 'basefont': case 'bgsound': case 'br':
  1017. case 'embed': case 'img': case 'input': case 'keygen': case 'spacer':
  1018. case 'wbr':
  1019. /* Reconstruct the active formatting elements, if any. */
  1020. $this->reconstructActiveFormattingElements();
  1021. /* Insert an HTML element for the token. */
  1022. $this->insertElement($token);
  1023. /* Immediately pop the current node off the stack of open elements. */
  1024. array_pop($this->stack);
  1025. // YYY: Acknowledge the token's self-closing flag, if it is set.
  1026. $this->flag_frameset_ok = false;
  1027. break;
  1028. case 'param': case 'source':
  1029. /* Insert an HTML element for the token. */
  1030. $this->insertElement($token);
  1031. /* Immediately pop the current node off the stack of open elements. */
  1032. array_pop($this->stack);
  1033. // YYY: Acknowledge the token's self-closing flag, if it is set.
  1034. break;
  1035. /* A start tag whose tag name is "hr" */
  1036. case 'hr':
  1037. /* If the stack of open elements has a p element in scope,
  1038. then act as if an end tag with the tag name p had been seen. */
  1039. if($this->elementInScope('p')) {
  1040. $this->emitToken(array(
  1041. 'name' => 'p',
  1042. 'type' => HTML5_Tokenizer::ENDTAG
  1043. ));
  1044. }
  1045. /* Insert an HTML element for the token. */
  1046. $this->insertElement($token);
  1047. /* Immediately pop the current node off the stack of open elements. */
  1048. array_pop($this->stack);
  1049. // YYY: Acknowledge the token's self-closing flag, if it is set.
  1050. $this->flag_frameset_ok = false;
  1051. break;
  1052. /* A start tag whose tag name is "image" */
  1053. case 'image':
  1054. /* Parse error. Change the token's tag name to "img" and
  1055. reprocess it. (Don't ask.) */
  1056. $token['name'] = 'img';
  1057. $this->emitToken($token);
  1058. break;
  1059. /* A start tag whose tag name is "isindex" */
  1060. case 'isindex':
  1061. /* Parse error. */
  1062. /* If the form element pointer is not null,
  1063. then ignore the token. */
  1064. if($this->form_pointer === null) {
  1065. /* Act as if a start tag token with the tag name "form" had
  1066. been seen. */
  1067. /* If the token has an attribute called "action", set
  1068. * the action attribute on the resulting form
  1069. * element to the value of the "action" attribute of
  1070. * the token. */
  1071. $attr = array();
  1072. $action = $this->getAttr($token, 'action');
  1073. if ($action !== false) {
  1074. $attr[] = array('name' => 'action', 'value' => $action);
  1075. }
  1076. $this->emitToken(array(
  1077. 'name' => 'form',
  1078. 'type' => HTML5_Tokenizer::STARTTAG,
  1079. 'attr' => $attr
  1080. ));
  1081. /* Act as if a start tag token with the tag name "hr" had
  1082. been seen. */
  1083. $this->emitToken(array(
  1084. 'name' => 'hr',
  1085. 'type' => HTML5_Tokenizer::STARTTAG,
  1086. 'attr' => array()
  1087. ));
  1088. /* Act as if a start tag token with the tag name "label"
  1089. had been seen. */
  1090. $this->emitToken(array(
  1091. 'name' => 'label',
  1092. 'type' => HTML5_Tokenizer::STARTTAG,
  1093. 'attr' => array()
  1094. ));
  1095. /* Act as if a stream of character tokens had been seen. */
  1096. $prompt = $this->getAttr($token, 'prompt');
  1097. if ($prompt === false) {
  1098. $prompt = 'This is a searchable index. '.
  1099. 'Insert your search keywords here: ';
  1100. }
  1101. $this->emitToken(array(
  1102. 'data' => $prompt,
  1103. 'type' => HTML5_Tokenizer::CHARACTER,
  1104. ));
  1105. /* Act as if a start tag token with the tag name "input"
  1106. had been seen, with all the attributes from the "isindex"
  1107. token, except with the "name" attribute set to the value
  1108. "isindex" (ignoring any explicit "name" attribute). */
  1109. $attr = array();
  1110. foreach ($token['attr'] as $keypair) {
  1111. if ($keypair['name'] === 'name' || $keypair['name'] === 'action' ||
  1112. $keypair['name'] === 'prompt') continue;
  1113. $attr[] = $keypair;
  1114. }
  1115. $attr[] = array('name' => 'name', 'value' => 'isindex');
  1116. $this->emitToken(array(
  1117. 'name' => 'input',
  1118. 'type' => HTML5_Tokenizer::STARTTAG,
  1119. 'attr' => $attr
  1120. ));
  1121. /* Act as if an end tag token with the tag name "label"
  1122. had been seen. */
  1123. $this->emitToken(array(
  1124. 'name' => 'label',
  1125. 'type' => HTML5_Tokenizer::ENDTAG
  1126. ));
  1127. /* Act as if a start tag token with the tag name "hr" had
  1128. been seen. */
  1129. $this->emitToken(array(
  1130. 'name' => 'hr',
  1131. 'type' => HTML5_Tokenizer::STARTTAG
  1132. ));
  1133. /* Act as if an end tag token with the tag name "form" had
  1134. been seen. */
  1135. $this->emitToken(array(
  1136. 'name' => 'form',
  1137. 'type' => HTML5_Tokenizer::ENDTAG
  1138. ));
  1139. } else {
  1140. $this->ignored = true;
  1141. }
  1142. break;
  1143. /* A start tag whose tag name is "textarea" */
  1144. case 'textarea':
  1145. $this->insertElement($token);
  1146. /* If the next token is a U+000A LINE FEED (LF)
  1147. * character token, then ignore that token and move on to
  1148. * the next one. (Newlines at the start of textarea
  1149. * elements are ignored as an authoring convenience.)
  1150. * need flag, see also <pre> */
  1151. $this->ignore_lf_token = 2;
  1152. $this->original_mode = $this->mode;
  1153. $this->flag_frameset_ok = false;
  1154. $this->mode = self::IN_CDATA_RCDATA;
  1155. /* Switch the tokeniser's content model flag to the
  1156. RCDATA state. */
  1157. $this->content_model = HTML5_Tokenizer::RCDATA;
  1158. break;
  1159. /* A start tag token whose tag name is "xmp" */
  1160. case 'xmp':
  1161. /* If the stack of open elements has a p element in
  1162. scope, then act as if an end tag with the tag name
  1163. "p" has been seen. */
  1164. if ($this->elementInScope('p')) {
  1165. $this->emitToken(array(
  1166. 'name' => 'p',
  1167. 'type' => HTML5_Tokenizer::ENDTAG
  1168. ));
  1169. }
  1170. /* Reconstruct the active formatting elements, if any. */
  1171. $this->reconstructActiveFormattingElements();
  1172. $this->flag_frameset_ok = false;
  1173. $this->insertCDATAElement($token);
  1174. break;
  1175. case 'iframe':
  1176. $this->flag_frameset_ok = false;
  1177. $this->insertCDATAElement($token);
  1178. break;
  1179. case 'noembed': case 'noscript':
  1180. // XSCRIPT: should check scripting flag
  1181. $this->insertCDATAElement($token);
  1182. break;
  1183. /* A start tag whose tag name is "select" */
  1184. case 'select':
  1185. /* Reconstruct the active formatting elements, if any. */
  1186. $this->reconstructActiveFormattingElements();
  1187. /* Insert an HTML element for the token. */
  1188. $this->insertElement($token);
  1189. $this->flag_frameset_ok = false;
  1190. /* If the insertion mode is one of in table", "in caption",
  1191. * "in column group", "in table body", "in row", or "in
  1192. * cell", then switch the insertion mode to "in select in
  1193. * table". Otherwise, switch the insertion mode to "in
  1194. * select". */
  1195. if (
  1196. $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION ||
  1197. $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY ||
  1198. $this->mode === self::IN_ROW || $this->mode === self::IN_CELL
  1199. ) {
  1200. $this->mode = self::IN_SELECT_IN_TABLE;
  1201. } else {
  1202. $this->mode = self::IN_SELECT;
  1203. }
  1204. break;
  1205. case 'option': case 'optgroup':
  1206. if ($this->elementInScope('option')) {
  1207. $this->emitToken(array(
  1208. 'name' => 'option',
  1209. 'type' => HTML5_Tokenizer::ENDTAG,
  1210. ));
  1211. }
  1212. $this->reconstructActiveFormattingElements();
  1213. $this->insertElement($token);
  1214. break;
  1215. case 'rp': case 'rt':
  1216. /* If the stack of open elements has a ruby element in scope, then generate
  1217. * implied end tags. If the current node is not then a ruby element, this is
  1218. * a parse error; pop all the nodes from the current node up to the node
  1219. * immediately before the bottommost ruby element on the stack of open elements.
  1220. */
  1221. if ($this->elementInScope('ruby')) {
  1222. $this->generateImpliedEndTags();
  1223. }
  1224. $peek = false;
  1225. do {
  1226. if ($peek) {
  1227. // parse error
  1228. }
  1229. $peek = array_pop($this->stack);
  1230. } while ($peek->tagName !== 'ruby');
  1231. $this->stack[] = $peek; // we popped one too many
  1232. $this->insertElement($token);
  1233. break;
  1234. // spec diversion
  1235. case 'math':
  1236. $this->reconstructActiveFormattingElements();
  1237. $token = $this->adjustMathMLAttributes($token);
  1238. $token = $this->adjustForeignAttributes($token);
  1239. $this->insertForeignElement($token, self::NS_MATHML);
  1240. if (isset($token['self-closing'])) {
  1241. // XERROR: acknowledge the token's self-closing flag
  1242. array_pop($this->stack);
  1243. }
  1244. if ($this->mode !== self::IN_FOREIGN_CONTENT) {
  1245. $this->secondary_mode = $this->mode;
  1246. $this->mode = self::IN_FOREIGN_CONTENT;
  1247. }
  1248. break;
  1249. case 'svg':
  1250. $this->reconstructActiveFormattingElements();
  1251. $token = $this->adjustSVGAttributes($token);
  1252. $token = $this->adjustForeignAttributes($token);
  1253. $this->insertForeignElement($token, self::NS_SVG);
  1254. if (isset($token['self-closing'])) {
  1255. // XERROR: acknowledge the token's self-closing flag
  1256. array_pop($this->stack);
  1257. }
  1258. if ($this->mode !== self::IN_FOREIGN_CONTENT) {
  1259. $this->secondary_mode = $this->mode;
  1260. $this->mode = self::IN_FOREIGN_CONTENT;
  1261. }
  1262. break;
  1263. case 'caption': case 'col': case 'colgroup': case 'frame': case 'head':
  1264. case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr':
  1265. // parse error
  1266. break;
  1267. /* A start tag token not covered by the previous entries */
  1268. default:
  1269. /* Reconstruct the active formatting elements, if any. */
  1270. $this->reconstructActiveFormattingElements();
  1271. $this->insertElement($token);
  1272. /* This element will be a phrasing element. */
  1273. break;
  1274. }
  1275. break;
  1276. case HTML5_Tokenizer::ENDTAG:
  1277. switch($token['name']) {
  1278. /* An end tag with the tag name "body" */
  1279. case 'body':
  1280. /* If the stack of open elements does not have a body
  1281. * element in scope, this is a parse error; ignore the
  1282. * token. */
  1283. if(!$this->elementInScope('body')) {
  1284. $this->ignored = true;
  1285. /* Otherwise, if there is a node in the stack of open
  1286. * elements that is not either a dc element, a dd element,
  1287. * a ds element, a dt element, an li element, an optgroup
  1288. * element, an option element, a p element, an rp element,
  1289. * an rt element, a tbody element, a td element, a tfoot
  1290. * element, a th element, a thead element, a tr element,
  1291. * the body element, or the html element, then this is a
  1292. * parse error.
  1293. */
  1294. } else {
  1295. // XERROR: implement this check for parse error
  1296. }
  1297. /* Change the insertion mode to "after body". */
  1298. $this->mode = self::AFTER_BODY;
  1299. break;
  1300. /* An end tag with the tag name "html" */
  1301. case 'html':
  1302. /* Act as if an end tag with tag name "body" had been seen,
  1303. then, if that token wasn't ignored, reprocess the current
  1304. token. */
  1305. $this->emitToken(array(
  1306. 'name' => 'body',
  1307. 'type' => HTML5_Tokenizer::ENDTAG
  1308. ));
  1309. if (!$this->ignored) $this->emitToken($token);
  1310. break;
  1311. case 'address': case 'article': case 'aside': case 'blockquote':
  1312. case 'center': case 'datagrid': case 'details': case 'dir':
  1313. case 'div': case 'dl': case 'fieldset': case 'footer':
  1314. case 'header': case 'hgroup': case 'listing': case 'menu':
  1315. case 'nav': case 'ol': case 'pre': case 'section': case 'ul':
  1316. /* If the stack of open elements has an element in scope
  1317. with the same tag name as that of the token, then generate
  1318. implied end tags. */
  1319. if($this->elementInScope($token['name'])) {
  1320. $this->generateImpliedEndTags();
  1321. /* Now, if the current node is not an element with
  1322. the same tag name as that of the token, then this
  1323. is a parse error. */
  1324. // XERROR: implement parse error logic
  1325. /* If the stack of open elements has an element in
  1326. scope with the same tag name as that of the token,
  1327. then pop elements from this stack until an element
  1328. with that tag name has been popped from the stack. */
  1329. do {
  1330. $node = array_pop($this->stack);
  1331. } while ($node->tagName !== $token['name']);
  1332. } else {
  1333. // parse error
  1334. }
  1335. break;
  1336. /* An end tag whose tag name is "form" */
  1337. case 'form':
  1338. /* Let node be the element that the form element pointer is set to. */
  1339. $node = $this->form_pointer;
  1340. /* Set the form element pointer to null. */
  1341. $this->form_pointer = null;
  1342. /* If node is null or the stack of open elements does not
  1343. * have node in scope, then this is a parse error; ignore the token. */
  1344. if ($node === null || !in_array($node, $this->stack)) {
  1345. // parse error
  1346. $this->ignored = true;
  1347. } else {
  1348. /* 1. Generate implied end tags. */
  1349. $this->generateImpliedEndTags();
  1350. /* 2. If the current node is not node, then this is a parse error. */
  1351. if (end($this->stack) !== $node) {
  1352. // parse error
  1353. }
  1354. /* 3. Remove node from the stack of open elements. */
  1355. array_splice($this->stack, array_search($node, $this->stack, true), 1);
  1356. }
  1357. break;
  1358. /* An end tag whose tag name is "p" */
  1359. case 'p':
  1360. /* If the stack of open elements has a p element in scope,
  1361. then generate implied end tags, except for p elements. */
  1362. if($this->elementInScope('p')) {
  1363. /* Generate implied end tags, except for elements with
  1364. * the same tag name as the token. */
  1365. $this->generateImpliedEndTags(array('p'));
  1366. /* If the current node is not a p element, then this is
  1367. a parse error. */
  1368. // XERROR: implement
  1369. /* Pop elements from the stack of open elements until
  1370. * an element with the same tag name as the token has
  1371. * been popped from the stack. */
  1372. do {
  1373. $node = array_pop($this->stack);
  1374. } while ($node->tagName !== 'p');
  1375. } else {
  1376. // parse error
  1377. $this->emitToken(array(
  1378. 'name' => 'p',
  1379. 'type' => HTML5_Tokenizer::STARTTAG,
  1380. ));
  1381. $this->emitToken($token);
  1382. }
  1383. break;
  1384. /* An end tag whose tag name is "li" */
  1385. case 'li':
  1386. /* If the stack of open elements does not have an element
  1387. * in list item scope with the same tag name as that of the
  1388. * token, then this is a parse error; ignore the token. */
  1389. if ($this->elementInScope($token['name'], self::SCOPE_LISTITEM)) {
  1390. /* Generate implied end tags, except for elements with the
  1391. * same tag name as the token. */
  1392. $this->generateImpliedEndTags(array($token['name']));
  1393. /* If the current node is not an element with the same tag
  1394. * name as that of the token, then this is a parse error. */
  1395. // XERROR: parse error
  1396. /* Pop elements from the stack of open elements until an
  1397. * element with the same tag name as the token has been
  1398. * popped from the stack. */
  1399. do {
  1400. $node = array_pop($this->stack);
  1401. } while ($node->tagName !== $token['name']);
  1402. } else {
  1403. // XERROR: parse error
  1404. }
  1405. break;
  1406. /* An end tag whose tag name is "dc", "dd", "ds", "dt" */
  1407. case 'dc': case 'dd': case 'ds': case 'dt':
  1408. if($this->elementInScope($token['name'])) {
  1409. $this->generateImpliedEndTags(array($token['name']));
  1410. /* If the current node is not an element with the same
  1411. tag name as the token, then this is a parse error. */
  1412. // XERROR: implement parse error
  1413. /* Pop elements from the stack of open elements until
  1414. * an element with the same tag name as the token has
  1415. * been popped from the stack. */
  1416. do {
  1417. $node = array_pop($this->stack);
  1418. } while ($node->tagName !== $token['name']);
  1419. } else {
  1420. // XERROR: parse error
  1421. }
  1422. break;
  1423. /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
  1424. "h5", "h6" */
  1425. case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
  1426. $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
  1427. /* If the stack of open elements has in scope an element whose
  1428. tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
  1429. generate implied end tags. */
  1430. if($this->elementInScope($elements)) {
  1431. $this->generateImpliedEndTags();
  1432. /* Now, if the current node is not an element with the same
  1433. tag name as that of the token, then this is a parse error. */
  1434. // XERROR: implement parse error
  1435. /* If the stack of open elements has in scope an element
  1436. whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
  1437. "h6", then pop elements from the stack until an element
  1438. with one of those tag names has been popped from the stack. */
  1439. do {
  1440. $node = array_pop($this->stack);
  1441. } while (!in_array($node->tagName, $elements));
  1442. } else {
  1443. // parse error
  1444. }
  1445. break;
  1446. /* An end tag whose tag name is one of: "a", "b", "big", "em",
  1447. "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
  1448. case 'a': case 'b': case 'big': case 'code': case 'em': case 'font':
  1449. case 'i': case 'nobr': case 's': case 'small': case 'strike':
  1450. case 'strong': case 'tt': case 'u':
  1451. // XERROR: generally speaking this needs parse error logic
  1452. /* 1. Let the formatting element be the last element in
  1453. the list of active formatting elements that:
  1454. * is between the end of the list and the last scope
  1455. marker in the list, if any, or the start of the list
  1456. otherwise, and
  1457. * has the same tag name as the token.
  1458. */
  1459. while(true) {
  1460. for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
  1461. if($this->a_formatting[$a] === self::MARKER) {
  1462. break;
  1463. } elseif($this->a_formatting[$a]->tagName === $token['name']) {
  1464. $formatting_element = $this->a_formatting[$a];
  1465. $in_stack = in_array($formatting_element, $this->stack, true);
  1466. $fe_af_pos = $a;
  1467. break;
  1468. }
  1469. }
  1470. /* If there is no such node, or, if that node is
  1471. also in the stack of open elements but the element
  1472. is not in scope, then this is a parse error. Abort
  1473. these steps. The token is ignored. */
  1474. if(!isset($formatting_element) || ($in_stack &&
  1475. !$this->elementInScope($token['name']))) {
  1476. $this->ignored = true;
  1477. break;
  1478. /* Otherwise, if there is such a node, but that node
  1479. is not in the stack of open elements, then this is a
  1480. parse error; remove the element from the list, and
  1481. abort these steps. */
  1482. } elseif(isset($formatting_element) && !$in_stack) {
  1483. unset($this->a_formatting[$fe_af_pos]);
  1484. $this->a_formatting = array_merge($this->a_formatting);
  1485. break;
  1486. }
  1487. /* Otherwise, there is a formatting element and that
  1488. * element is in the stack and is in scope. If the
  1489. * element is not the current node, this is a parse
  1490. * error. In any case, proceed with the algorithm as
  1491. * written in the following steps. */
  1492. // XERROR: implement me
  1493. /* 2. Let the furthest block be the topmost node in the
  1494. stack of open elements that is lower in the stack
  1495. than the formatting element, and is not an element in
  1496. the phrasing or formatting categories. There might
  1497. not be one. */
  1498. $fe_s_pos = array_search($formatting_element, $this->stack, true);
  1499. $length = count($this->stack);
  1500. for($s = $fe_s_pos + 1; $s < $length; $s++) {
  1501. $category = $this->getElementCategory($this->stack[$s]);
  1502. if($category !== self::PHRASING && $category !== self::FORMATTING) {
  1503. $furthest_block = $this->stack[$s];
  1504. break;
  1505. }
  1506. }
  1507. /* 3. If there is no furthest block, then the UA must
  1508. skip the subsequent steps and instead just pop all
  1509. the nodes from the bottom of the stack of open
  1510. elements, from the current node up to the formatting
  1511. element, and remove the formatting element from the
  1512. list of active formatting elements. */
  1513. if(!isset($furthest_block)) {
  1514. for($n = $length - 1; $n >= $fe_s_pos; $n--) {
  1515. array_pop($this->stack);
  1516. }
  1517. unset($this->a_formatting[$fe_af_pos]);
  1518. $this->a_formatting = array_merge($this->a_formatting);
  1519. break;
  1520. }
  1521. /* 4. Let the common ancestor be the element
  1522. immediately above the formatting element in the stack
  1523. of open elements. */
  1524. $common_ancestor = $this->stack[$fe_s_pos - 1];
  1525. /* 5. Let a bookmark note the position of the
  1526. formatting element in the list of active formatting
  1527. elements relative to the elements on either side
  1528. of it in the list. */
  1529. $bookmark = $fe_af_pos;
  1530. /* 6. Let node and last node be the furthest block.
  1531. Follow these steps: */
  1532. $node = $furthest_block;
  1533. $last_node = $furthest_block;
  1534. while(true) {
  1535. for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
  1536. /* 6.1 Let node be the element immediately
  1537. prior to node in the stack of open elements. */
  1538. $node = $this->stack[$n];
  1539. /* 6.2 If node is not in the list of active
  1540. formatting elements, then remove node from
  1541. the stack of open elements and then go back
  1542. to step 1. */
  1543. if(!in_array($node, $this->a_formatting, true)) {
  1544. array_splice($this->stack, $n, 1);
  1545. } else {
  1546. break;
  1547. }
  1548. }
  1549. /* 6.3 Otherwise, if node is the formatting
  1550. element, then go to the next step in the overall
  1551. algorithm. */
  1552. if($node === $formatting_element) {
  1553. break;
  1554. /* 6.4 Otherwise, if last node is the furthest
  1555. block, then move the aforementioned bookmark to
  1556. be immediately after the node in the list of
  1557. active formatting elements. */
  1558. } elseif($last_node === $furthest_block) {
  1559. $bookmark = array_search($node, $this->a_formatting, true) + 1;
  1560. }
  1561. /* 6.5 Create an element for the token for which
  1562. * the element node was created, replace the entry
  1563. * for node in the list of active formatting
  1564. * elements with an entry for the new element,
  1565. * replace the entry for node in the stack of open
  1566. * elements with an entry for the new element, and
  1567. * let node be the new element. */
  1568. // we don't know what the token is anymore
  1569. // XDOM
  1570. $clone = $node->cloneNode();
  1571. $a_pos = array_search($node, $this->a_formatting, true);
  1572. $s_pos = array_search($node, $this->stack, true);
  1573. $this->a_formatting[$a_pos] = $clone;
  1574. $this->stack[$s_pos] = $clone;
  1575. $node = $clone;
  1576. /* 6.6 Insert last node into node, first removing
  1577. it from its previous parent node if any. */
  1578. // XDOM
  1579. if($last_node->parentNode !== null) {
  1580. $last_node->parentNode->removeChild($last_node);
  1581. }
  1582. // XDOM
  1583. $node->appendChild($last_node);
  1584. /* 6.7 Let last node be node. */
  1585. $last_node = $node;
  1586. /* 6.8 Return to step 1 of this inner set of steps. */
  1587. }
  1588. /* 7. If the common ancestor node is a table, tbody,
  1589. * tfoot, thead, or tr element, then, foster parent
  1590. * whatever last node ended up being in the previous
  1591. * step, first removing it from its previous parent
  1592. * node if any. */
  1593. // XDOM
  1594. if ($last_node->parentNode) { // common step
  1595. $last_node->parentNode->removeChild($last_node);
  1596. }
  1597. if (in_array($common_ancestor->tagName, array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
  1598. $this->fosterParent($last_node);
  1599. /* Otherwise, append whatever last node ended up being
  1600. * in the previous step to the common ancestor node,
  1601. * first removing it from its previous parent node if
  1602. * any. */
  1603. } else {
  1604. // XDOM
  1605. $common_ancestor->appendChild($last_node);
  1606. }
  1607. /* 8. Create an element for the token for which the
  1608. * formatting element was created. */
  1609. // XDOM
  1610. $clone = $formatting_element->cloneNode();
  1611. /* 9. Take all of the child nodes of the furthest
  1612. block and append them to the element created in the
  1613. last step. */
  1614. // XDOM
  1615. while($furthest_block->hasChildNodes()) {
  1616. $child = $furthest_block->firstChild;
  1617. $furthest_block->removeChild($child);
  1618. $clone->appendChild($child);
  1619. }
  1620. /* 10. Append that clone to the furthest block. */
  1621. // XDOM
  1622. $furthest_block->appendChild($clone);
  1623. /* 11. Remove the formatting element from the list
  1624. of active formatting elements, and insert the new element
  1625. into the list of active formatting elements at the
  1626. position of the aforementioned bookmark. */
  1627. $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
  1628. array_splice($this->a_formatting, $fe_af_pos, 1);
  1629. $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
  1630. $af_part2 = array_slice($this->a_formatting, $bookmark);
  1631. $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
  1632. /* 12. Remove the formatting element from the stack
  1633. of open elements, and insert the new element into the stack
  1634. of open elements immediately below the position of the
  1635. furthest block in that stack. */
  1636. $fe_s_pos = array_search($formatting_element, $this->stack, true);
  1637. array_splice($this->stack, $fe_s_pos, 1);
  1638. $fb_s_pos = array_search($furthest_block, $this->stack, true);
  1639. $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1);
  1640. $s_part2 = array_slice($this->stack, $fb_s_pos + 1);
  1641. $this->stack = array_merge($s_part1, array($clone), $s_part2);
  1642. /* 13. Jump back to step 1 in this series of steps. */
  1643. unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
  1644. }
  1645. break;
  1646. case 'applet': case 'button': case 'marquee': case 'object':
  1647. /* If the stack of open elements has an element in scope whose
  1648. tag name matches the tag name of the token, then generate implied
  1649. tags. */
  1650. if($this->elementInScope($token['name'])) {
  1651. $this->generateImpliedEndTags();
  1652. /* Now, if the current node is not an element with the same
  1653. tag name as the token, then this is a parse error. */
  1654. // XERROR: implement logic
  1655. /* Pop elements from the stack of open elements until
  1656. * an element with the same tag name as the token has
  1657. * been popped from the stack. */
  1658. do {
  1659. $node = array_pop($this->stack);
  1660. } while ($node->tagName !== $token['name']);
  1661. /* Clear the list of active formatting elements up to the
  1662. * last marker. */
  1663. $keys = array_keys($this->a_formatting, self::MARKER, true);
  1664. $marker = end($keys);
  1665. for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
  1666. array_pop($this->a_formatting);
  1667. }
  1668. } else {
  1669. // parse error
  1670. }
  1671. break;
  1672. case 'br':
  1673. // Parse error
  1674. $this->emitToken(array(
  1675. 'name' => 'br',
  1676. 'type' => HTML5_Tokenizer::STARTTAG,
  1677. ));
  1678. break;
  1679. /* An end tag token not covered by the previous entries */
  1680. default:
  1681. for($n = count($this->stack) - 1; $n >= 0; $n--) {
  1682. /* Initialise node to be the current node (the bottommost
  1683. node of the stack). */
  1684. $node = $this->stack[$n];
  1685. /* If node has the same tag name as the end tag token,
  1686. then: */
  1687. if($token['name'] === $node->tagName) {
  1688. /* Generate implied end tags. */
  1689. $this->generateImpliedEndTags();
  1690. /* If the tag name of the end tag token does not
  1691. match the tag name of the current node, this is a
  1692. parse error. */
  1693. // XERROR: implement this
  1694. /* Pop all the nodes from the current node up to
  1695. node, including node, then stop these steps. */
  1696. // XSKETCHY
  1697. do {
  1698. $pop = array_pop($this->stack);
  1699. } while ($pop !== $node);
  1700. break;
  1701. } else {
  1702. $category = $this->getElementCategory($node);
  1703. if($category !== self::FORMATTING && $category !== self::PHRASING) {
  1704. /* Otherwise, if node is in neither the formatting
  1705. category nor the phrasing category, then this is a
  1706. parse error. Stop this algorithm. The end tag token
  1707. is ignored. */
  1708. $this->ignored = true;
  1709. break;
  1710. // parse error
  1711. }
  1712. }
  1713. /* Set node to the previous entry in the stack of open elements. Loop. */
  1714. }
  1715. break;
  1716. }
  1717. break;
  1718. }
  1719. break;
  1720. case self::IN_CDATA_RCDATA:
  1721. if (
  1722. $token['type'] === HTML5_Tokenizer::CHARACTER ||
  1723. $token['type'] === HTML5_Tokenizer::SPACECHARACTER
  1724. ) {
  1725. $this->insertText($token['data']);
  1726. } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
  1727. // parse error
  1728. /* If the current node is a script element, mark the script
  1729. * element as "already executed". */
  1730. // probably not necessary
  1731. array_pop($this->stack);
  1732. $this->mode = $this->original_mode;
  1733. $this->emitToken($token);
  1734. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') {
  1735. array_pop($this->stack);
  1736. $this->mode = $this->original_mode;
  1737. // we're ignoring all of the execution stuff
  1738. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
  1739. array_pop($this->stack);
  1740. $this->mode = $this->original_mode;
  1741. }
  1742. break;
  1743. case self::IN_TABLE:
  1744. $clear = array('html', 'table');
  1745. /* A character token */
  1746. if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
  1747. $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  1748. /* Let the pending table character tokens
  1749. * be an empty list of tokens. */
  1750. $this->pendingTableCharacters = "";
  1751. $this->pendingTableCharactersDirty = false;
  1752. /* Let the original insertion mode be the current
  1753. * insertion mode. */
  1754. $this->original_mode = $this->mode;
  1755. /* Switch the insertion mode to
  1756. * "in table text" and
  1757. * reprocess the token. */
  1758. $this->mode = self::IN_TABLE_TEXT;
  1759. $this->emitToken($token);
  1760. /* A comment token */
  1761. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  1762. /* Append a Comment node to the current node with the data
  1763. attribute set to the data given in the comment token. */
  1764. $this->insertComment($token['data']);
  1765. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  1766. // parse error
  1767. /* A start tag whose tag name is "caption" */
  1768. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1769. $token['name'] === 'caption') {
  1770. /* Clear the stack back to a table context. */
  1771. $this->clearStackToTableContext($clear);
  1772. /* Insert a marker at the end of the list of active
  1773. formatting elements. */
  1774. $this->a_formatting[] = self::MARKER;
  1775. /* Insert an HTML element for the token, then switch the
  1776. insertion mode to "in caption". */
  1777. $this->insertElement($token);
  1778. $this->mode = self::IN_CAPTION;
  1779. /* A start tag whose tag name is "colgroup" */
  1780. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1781. $token['name'] === 'colgroup') {
  1782. /* Clear the stack back to a table context. */
  1783. $this->clearStackToTableContext($clear);
  1784. /* Insert an HTML element for the token, then switch the
  1785. insertion mode to "in column group". */
  1786. $this->insertElement($token);
  1787. $this->mode = self::IN_COLUMN_GROUP;
  1788. /* A start tag whose tag name is "col" */
  1789. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1790. $token['name'] === 'col') {
  1791. $this->emitToken(array(
  1792. 'name' => 'colgroup',
  1793. 'type' => HTML5_Tokenizer::STARTTAG,
  1794. 'attr' => array()
  1795. ));
  1796. $this->emitToken($token);
  1797. /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
  1798. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  1799. array('tbody', 'tfoot', 'thead'))) {
  1800. /* Clear the stack back to a table context. */
  1801. $this->clearStackToTableContext($clear);
  1802. /* Insert an HTML element for the token, then switch the insertion
  1803. mode to "in table body". */
  1804. $this->insertElement($token);
  1805. $this->mode = self::IN_TABLE_BODY;
  1806. /* A start tag whose tag name is one of: "td", "th", "tr" */
  1807. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1808. in_array($token['name'], array('td', 'th', 'tr'))) {
  1809. /* Act as if a start tag token with the tag name "tbody" had been
  1810. seen, then reprocess the current token. */
  1811. $this->emitToken(array(
  1812. 'name' => 'tbody',
  1813. 'type' => HTML5_Tokenizer::STARTTAG,
  1814. 'attr' => array()
  1815. ));
  1816. $this->emitToken($token);
  1817. /* A start tag whose tag name is "table" */
  1818. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1819. $token['name'] === 'table') {
  1820. /* Parse error. Act as if an end tag token with the tag name "table"
  1821. had been seen, then, if that token wasn't ignored, reprocess the
  1822. current token. */
  1823. $this->emitToken(array(
  1824. 'name' => 'table',
  1825. 'type' => HTML5_Tokenizer::ENDTAG
  1826. ));
  1827. if (!$this->ignored) $this->emitToken($token);
  1828. /* An end tag whose tag name is "table" */
  1829. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  1830. $token['name'] === 'table') {
  1831. /* If the stack of open elements does not have an element in table
  1832. scope with the same tag name as the token, this is a parse error.
  1833. Ignore the token. (fragment case) */
  1834. if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  1835. $this->ignored = true;
  1836. /* Otherwise: */
  1837. } else {
  1838. do {
  1839. $node = array_pop($this->stack);
  1840. } while ($node->tagName !== 'table');
  1841. /* Reset the insertion mode appropriately. */
  1842. $this->resetInsertionMode();
  1843. }
  1844. /* An end tag whose tag name is one of: "body", "caption", "col",
  1845. "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
  1846. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  1847. array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
  1848. 'tfoot', 'th', 'thead', 'tr'))) {
  1849. // Parse error. Ignore the token.
  1850. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  1851. ($token['name'] === 'style' || $token['name'] === 'script')) {
  1852. $this->processWithRulesFor($token, self::IN_HEAD);
  1853. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' &&
  1854. // assignment is intentional
  1855. /* If the token does not have an attribute with the name "type", or
  1856. * if it does, but that attribute's value is not an ASCII
  1857. * case-insensitive match for the string "hidden", then: act as
  1858. * described in the "anything else" entry below. */
  1859. ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') {
  1860. // I.e., if its an input with the type attribute == 'hidden'
  1861. /* Otherwise */
  1862. // parse error
  1863. $this->insertElement($token);
  1864. array_pop($this->stack);
  1865. } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
  1866. /* If the current node is not the root html element, then this is a parse error. */
  1867. if (end($this->stack)->tagName !== 'html') {
  1868. // Note: It can only be the current node in the fragment case.
  1869. // parse error
  1870. }
  1871. /* Stop parsing. */
  1872. /* Anything else */
  1873. } else {
  1874. /* Parse error. Process the token as if the insertion mode was "in
  1875. body", with the following exception: */
  1876. $old = $this->foster_parent;
  1877. $this->foster_parent = true;
  1878. $this->processWithRulesFor($token, self::IN_BODY);
  1879. $this->foster_parent = $old;
  1880. }
  1881. break;
  1882. case self::IN_TABLE_TEXT:
  1883. /* A character token */
  1884. if($token['type'] === HTML5_Tokenizer::CHARACTER) {
  1885. /* Append the character token to the pending table
  1886. * character tokens list. */
  1887. $this->pendingTableCharacters .= $token['data'];
  1888. $this->pendingTableCharactersDirty = true;
  1889. } elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  1890. $this->pendingTableCharacters .= $token['data'];
  1891. /* Anything else */
  1892. } else {
  1893. if ($this->pendingTableCharacters !== '' && is_string($this->pendingTableCharacters)) {
  1894. /* If any of the tokens in the pending table character tokens list
  1895. * are character tokens that are not one of U+0009 CHARACTER
  1896. * TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or
  1897. * U+0020 SPACE, then reprocess those character tokens using the
  1898. * rules given in the "anything else" entry in the in table"
  1899. * insertion mode.*/
  1900. if ($this->pendingTableCharactersDirty) {
  1901. /* Parse error. Process the token using the rules for the
  1902. * "in body" insertion mode, except that if the current
  1903. * node is a table, tbody, tfoot, thead, or tr element,
  1904. * then, whenever a node would be inserted into the current
  1905. * node, it must instead be foster parented. */
  1906. // XERROR
  1907. $old = $this->foster_parent;
  1908. $this->foster_parent = true;
  1909. $text_token = array(
  1910. 'type' => HTML5_Tokenizer::CHARACTER,
  1911. 'data' => $this->pendingTableCharacters,
  1912. );
  1913. $this->processWithRulesFor($text_token, self::IN_BODY);
  1914. $this->foster_parent = $old;
  1915. /* Otherwise, insert the characters given by the pending table
  1916. * character tokens list into the current node. */
  1917. } else {
  1918. $this->insertText($this->pendingTableCharacters);
  1919. }
  1920. $this->pendingTableCharacters = null;
  1921. $this->pendingTableCharactersNull = null;
  1922. }
  1923. /* Switch the insertion mode to the original insertion mode and
  1924. * reprocess the token.
  1925. */
  1926. $this->mode = $this->original_mode;
  1927. $this->emitToken($token);
  1928. }
  1929. break;
  1930. case self::IN_CAPTION:
  1931. /* An end tag whose tag name is "caption" */
  1932. if($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
  1933. /* If the stack of open elements does not have an element in table
  1934. scope with the same tag name as the token, this is a parse error.
  1935. Ignore the token. (fragment case) */
  1936. if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  1937. $this->ignored = true;
  1938. // Ignore
  1939. /* Otherwise: */
  1940. } else {
  1941. /* Generate implied end tags. */
  1942. $this->generateImpliedEndTags();
  1943. /* Now, if the current node is not a caption element, then this
  1944. is a parse error. */
  1945. // XERROR: implement
  1946. /* Pop elements from this stack until a caption element has
  1947. been popped from the stack. */
  1948. do {
  1949. $node = array_pop($this->stack);
  1950. } while ($node->tagName !== 'caption');
  1951. /* Clear the list of active formatting elements up to the last
  1952. marker. */
  1953. $this->clearTheActiveFormattingElementsUpToTheLastMarker();
  1954. /* Switch the insertion mode to "in table". */
  1955. $this->mode = self::IN_TABLE;
  1956. }
  1957. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  1958. "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
  1959. name is "table" */
  1960. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  1961. array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
  1962. 'thead', 'tr'))) || ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  1963. $token['name'] === 'table')) {
  1964. /* Parse error. Act as if an end tag with the tag name "caption"
  1965. had been seen, then, if that token wasn't ignored, reprocess the
  1966. current token. */
  1967. $this->emitToken(array(
  1968. 'name' => 'caption',
  1969. 'type' => HTML5_Tokenizer::ENDTAG
  1970. ));
  1971. if (!$this->ignored) $this->emitToken($token);
  1972. /* An end tag whose tag name is one of: "body", "col", "colgroup",
  1973. "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
  1974. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  1975. array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
  1976. 'thead', 'tr'))) {
  1977. // Parse error. Ignore the token.
  1978. $this->ignored = true;
  1979. /* Anything else */
  1980. } else {
  1981. /* Process the token as if the insertion mode was "in body". */
  1982. $this->processWithRulesFor($token, self::IN_BODY);
  1983. }
  1984. break;
  1985. case self::IN_COLUMN_GROUP:
  1986. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  1987. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  1988. or U+0020 SPACE */
  1989. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  1990. /* Append the character to the current node. */
  1991. $this->insertText($token['data']);
  1992. /* A comment token */
  1993. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  1994. /* Append a Comment node to the current node with the data
  1995. attribute set to the data given in the comment token. */
  1996. $this->insertToken($token['data']);
  1997. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  1998. // parse error
  1999. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  2000. $this->processWithRulesFor($token, self::IN_BODY);
  2001. /* A start tag whose tag name is "col" */
  2002. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') {
  2003. /* Insert a col element for the token. Immediately pop the current
  2004. node off the stack of open elements. */
  2005. $this->insertElement($token);
  2006. array_pop($this->stack);
  2007. // XERROR: Acknowledge the token's self-closing flag, if it is set.
  2008. /* An end tag whose tag name is "colgroup" */
  2009. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2010. $token['name'] === 'colgroup') {
  2011. /* If the current node is the root html element, then this is a
  2012. parse error, ignore the token. (fragment case) */
  2013. if(end($this->stack)->tagName === 'html') {
  2014. $this->ignored = true;
  2015. /* Otherwise, pop the current node (which will be a colgroup
  2016. element) from the stack of open elements. Switch the insertion
  2017. mode to "in table". */
  2018. } else {
  2019. array_pop($this->stack);
  2020. $this->mode = self::IN_TABLE;
  2021. }
  2022. /* An end tag whose tag name is "col" */
  2023. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') {
  2024. /* Parse error. Ignore the token. */
  2025. $this->ignored = true;
  2026. /* An end-of-file token */
  2027. /* If the current node is the root html element */
  2028. } elseif($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') {
  2029. /* Stop parsing */
  2030. /* Anything else */
  2031. } else {
  2032. /* Act as if an end tag with the tag name "colgroup" had been seen,
  2033. and then, if that token wasn't ignored, reprocess the current token. */
  2034. $this->emitToken(array(
  2035. 'name' => 'colgroup',
  2036. 'type' => HTML5_Tokenizer::ENDTAG
  2037. ));
  2038. if (!$this->ignored) $this->emitToken($token);
  2039. }
  2040. break;
  2041. case self::IN_TABLE_BODY:
  2042. $clear = array('tbody', 'tfoot', 'thead', 'html');
  2043. /* A start tag whose tag name is "tr" */
  2044. if($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') {
  2045. /* Clear the stack back to a table body context. */
  2046. $this->clearStackToTableContext($clear);
  2047. /* Insert a tr element for the token, then switch the insertion
  2048. mode to "in row". */
  2049. $this->insertElement($token);
  2050. $this->mode = self::IN_ROW;
  2051. /* A start tag whose tag name is one of: "th", "td" */
  2052. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2053. ($token['name'] === 'th' || $token['name'] === 'td')) {
  2054. /* Parse error. Act as if a start tag with the tag name "tr" had
  2055. been seen, then reprocess the current token. */
  2056. $this->emitToken(array(
  2057. 'name' => 'tr',
  2058. 'type' => HTML5_Tokenizer::STARTTAG,
  2059. 'attr' => array()
  2060. ));
  2061. $this->emitToken($token);
  2062. /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
  2063. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2064. in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
  2065. /* If the stack of open elements does not have an element in table
  2066. scope with the same tag name as the token, this is a parse error.
  2067. Ignore the token. */
  2068. if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  2069. // Parse error
  2070. $this->ignored = true;
  2071. /* Otherwise: */
  2072. } else {
  2073. /* Clear the stack back to a table body context. */
  2074. $this->clearStackToTableContext($clear);
  2075. /* Pop the current node from the stack of open elements. Switch
  2076. the insertion mode to "in table". */
  2077. array_pop($this->stack);
  2078. $this->mode = self::IN_TABLE;
  2079. }
  2080. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  2081. "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
  2082. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  2083. array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'))) ||
  2084. ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
  2085. /* If the stack of open elements does not have a tbody, thead, or
  2086. tfoot element in table scope, this is a parse error. Ignore the
  2087. token. (fragment case) */
  2088. if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), self::SCOPE_TABLE)) {
  2089. // parse error
  2090. $this->ignored = true;
  2091. /* Otherwise: */
  2092. } else {
  2093. /* Clear the stack back to a table body context. */
  2094. $this->clearStackToTableContext($clear);
  2095. /* Act as if an end tag with the same tag name as the current
  2096. node ("tbody", "tfoot", or "thead") had been seen, then
  2097. reprocess the current token. */
  2098. $this->emitToken(array(
  2099. 'name' => end($this->stack)->tagName,
  2100. 'type' => HTML5_Tokenizer::ENDTAG
  2101. ));
  2102. $this->emitToken($token);
  2103. }
  2104. /* An end tag whose tag name is one of: "body", "caption", "col",
  2105. "colgroup", "html", "td", "th", "tr" */
  2106. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2107. array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
  2108. /* Parse error. Ignore the token. */
  2109. $this->ignored = true;
  2110. /* Anything else */
  2111. } else {
  2112. /* Process the token as if the insertion mode was "in table". */
  2113. $this->processWithRulesFor($token, self::IN_TABLE);
  2114. }
  2115. break;
  2116. case self::IN_ROW:
  2117. $clear = array('tr', 'html');
  2118. /* A start tag whose tag name is one of: "th", "td" */
  2119. if($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2120. ($token['name'] === 'th' || $token['name'] === 'td')) {
  2121. /* Clear the stack back to a table row context. */
  2122. $this->clearStackToTableContext($clear);
  2123. /* Insert an HTML element for the token, then switch the insertion
  2124. mode to "in cell". */
  2125. $this->insertElement($token);
  2126. $this->mode = self::IN_CELL;
  2127. /* Insert a marker at the end of the list of active formatting
  2128. elements. */
  2129. $this->a_formatting[] = self::MARKER;
  2130. /* An end tag whose tag name is "tr" */
  2131. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') {
  2132. /* If the stack of open elements does not have an element in table
  2133. scope with the same tag name as the token, this is a parse error.
  2134. Ignore the token. (fragment case) */
  2135. if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  2136. // Ignore.
  2137. $this->ignored = true;
  2138. /* Otherwise: */
  2139. } else {
  2140. /* Clear the stack back to a table row context. */
  2141. $this->clearStackToTableContext($clear);
  2142. /* Pop the current node (which will be a tr element) from the
  2143. stack of open elements. Switch the insertion mode to "in table
  2144. body". */
  2145. array_pop($this->stack);
  2146. $this->mode = self::IN_TABLE_BODY;
  2147. }
  2148. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  2149. "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
  2150. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  2151. array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) ||
  2152. ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
  2153. /* Act as if an end tag with the tag name "tr" had been seen, then,
  2154. if that token wasn't ignored, reprocess the current token. */
  2155. $this->emitToken(array(
  2156. 'name' => 'tr',
  2157. 'type' => HTML5_Tokenizer::ENDTAG
  2158. ));
  2159. if (!$this->ignored) $this->emitToken($token);
  2160. /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
  2161. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2162. in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
  2163. /* If the stack of open elements does not have an element in table
  2164. scope with the same tag name as the token, this is a parse error.
  2165. Ignore the token. */
  2166. if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  2167. $this->ignored = true;
  2168. /* Otherwise: */
  2169. } else {
  2170. /* Otherwise, act as if an end tag with the tag name "tr" had
  2171. been seen, then reprocess the current token. */
  2172. $this->emitToken(array(
  2173. 'name' => 'tr',
  2174. 'type' => HTML5_Tokenizer::ENDTAG
  2175. ));
  2176. $this->emitToken($token);
  2177. }
  2178. /* An end tag whose tag name is one of: "body", "caption", "col",
  2179. "colgroup", "html", "td", "th" */
  2180. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2181. array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'))) {
  2182. /* Parse error. Ignore the token. */
  2183. $this->ignored = true;
  2184. /* Anything else */
  2185. } else {
  2186. /* Process the token as if the insertion mode was "in table". */
  2187. $this->processWithRulesFor($token, self::IN_TABLE);
  2188. }
  2189. break;
  2190. case self::IN_CELL:
  2191. /* An end tag whose tag name is one of: "td", "th" */
  2192. if($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2193. ($token['name'] === 'td' || $token['name'] === 'th')) {
  2194. /* If the stack of open elements does not have an element in table
  2195. scope with the same tag name as that of the token, then this is a
  2196. parse error and the token must be ignored. */
  2197. if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  2198. $this->ignored = true;
  2199. /* Otherwise: */
  2200. } else {
  2201. /* Generate implied end tags, except for elements with the same
  2202. tag name as the token. */
  2203. $this->generateImpliedEndTags(array($token['name']));
  2204. /* Now, if the current node is not an element with the same tag
  2205. name as the token, then this is a parse error. */
  2206. // XERROR: Implement parse error code
  2207. /* Pop elements from this stack until an element with the same
  2208. tag name as the token has been popped from the stack. */
  2209. do {
  2210. $node = array_pop($this->stack);
  2211. } while ($node->tagName !== $token['name']);
  2212. /* Clear the list of active formatting elements up to the last
  2213. marker. */
  2214. $this->clearTheActiveFormattingElementsUpToTheLastMarker();
  2215. /* Switch the insertion mode to "in row". (The current node
  2216. will be a tr element at this point.) */
  2217. $this->mode = self::IN_ROW;
  2218. }
  2219. /* A start tag whose tag name is one of: "caption", "col", "colgroup",
  2220. "tbody", "td", "tfoot", "th", "thead", "tr" */
  2221. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
  2222. array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
  2223. 'thead', 'tr'))) {
  2224. /* If the stack of open elements does not have a td or th element
  2225. in table scope, then this is a parse error; ignore the token.
  2226. (fragment case) */
  2227. if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) {
  2228. // parse error
  2229. $this->ignored = true;
  2230. /* Otherwise, close the cell (see below) and reprocess the current
  2231. token. */
  2232. } else {
  2233. $this->closeCell();
  2234. $this->emitToken($token);
  2235. }
  2236. /* An end tag whose tag name is one of: "body", "caption", "col",
  2237. "colgroup", "html" */
  2238. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2239. array('body', 'caption', 'col', 'colgroup', 'html'))) {
  2240. /* Parse error. Ignore the token. */
  2241. $this->ignored = true;
  2242. /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
  2243. "thead", "tr" */
  2244. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
  2245. array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
  2246. /* If the stack of open elements does not have a td or th element
  2247. in table scope, then this is a parse error; ignore the token.
  2248. (innerHTML case) */
  2249. if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) {
  2250. // Parse error
  2251. $this->ignored = true;
  2252. /* Otherwise, close the cell (see below) and reprocess the current
  2253. token. */
  2254. } else {
  2255. $this->closeCell();
  2256. $this->emitToken($token);
  2257. }
  2258. /* Anything else */
  2259. } else {
  2260. /* Process the token as if the insertion mode was "in body". */
  2261. $this->processWithRulesFor($token, self::IN_BODY);
  2262. }
  2263. break;
  2264. case self::IN_SELECT:
  2265. /* Handle the token as follows: */
  2266. /* A character token */
  2267. if(
  2268. $token['type'] === HTML5_Tokenizer::CHARACTER ||
  2269. $token['type'] === HTML5_Tokenizer::SPACECHARACTER
  2270. ) {
  2271. /* Append the token's character to the current node. */
  2272. $this->insertText($token['data']);
  2273. /* A comment token */
  2274. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  2275. /* Append a Comment node to the current node with the data
  2276. attribute set to the data given in the comment token. */
  2277. $this->insertComment($token['data']);
  2278. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2279. // parse error
  2280. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  2281. $this->processWithRulesFor($token, self::IN_BODY);
  2282. /* A start tag token whose tag name is "option" */
  2283. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2284. $token['name'] === 'option') {
  2285. /* If the current node is an option element, act as if an end tag
  2286. with the tag name "option" had been seen. */
  2287. if(end($this->stack)->tagName === 'option') {
  2288. $this->emitToken(array(
  2289. 'name' => 'option',
  2290. 'type' => HTML5_Tokenizer::ENDTAG
  2291. ));
  2292. }
  2293. /* Insert an HTML element for the token. */
  2294. $this->insertElement($token);
  2295. /* A start tag token whose tag name is "optgroup" */
  2296. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2297. $token['name'] === 'optgroup') {
  2298. /* If the current node is an option element, act as if an end tag
  2299. with the tag name "option" had been seen. */
  2300. if(end($this->stack)->tagName === 'option') {
  2301. $this->emitToken(array(
  2302. 'name' => 'option',
  2303. 'type' => HTML5_Tokenizer::ENDTAG
  2304. ));
  2305. }
  2306. /* If the current node is an optgroup element, act as if an end tag
  2307. with the tag name "optgroup" had been seen. */
  2308. if(end($this->stack)->tagName === 'optgroup') {
  2309. $this->emitToken(array(
  2310. 'name' => 'optgroup',
  2311. 'type' => HTML5_Tokenizer::ENDTAG
  2312. ));
  2313. }
  2314. /* Insert an HTML element for the token. */
  2315. $this->insertElement($token);
  2316. /* An end tag token whose tag name is "optgroup" */
  2317. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2318. $token['name'] === 'optgroup') {
  2319. /* First, if the current node is an option element, and the node
  2320. immediately before it in the stack of open elements is an optgroup
  2321. element, then act as if an end tag with the tag name "option" had
  2322. been seen. */
  2323. $elements_in_stack = count($this->stack);
  2324. if($this->stack[$elements_in_stack - 1]->tagName === 'option' &&
  2325. $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') {
  2326. $this->emitToken(array(
  2327. 'name' => 'option',
  2328. 'type' => HTML5_Tokenizer::ENDTAG
  2329. ));
  2330. }
  2331. /* If the current node is an optgroup element, then pop that node
  2332. from the stack of open elements. Otherwise, this is a parse error,
  2333. ignore the token. */
  2334. if(end($this->stack)->tagName === 'optgroup') {
  2335. array_pop($this->stack);
  2336. } else {
  2337. // parse error
  2338. $this->ignored = true;
  2339. }
  2340. /* An end tag token whose tag name is "option" */
  2341. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2342. $token['name'] === 'option') {
  2343. /* If the current node is an option element, then pop that node
  2344. from the stack of open elements. Otherwise, this is a parse error,
  2345. ignore the token. */
  2346. if(end($this->stack)->tagName === 'option') {
  2347. array_pop($this->stack);
  2348. } else {
  2349. // parse error
  2350. $this->ignored = true;
  2351. }
  2352. /* An end tag whose tag name is "select" */
  2353. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2354. $token['name'] === 'select') {
  2355. /* If the stack of open elements does not have an element in table
  2356. scope with the same tag name as the token, this is a parse error.
  2357. Ignore the token. (fragment case) */
  2358. if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  2359. $this->ignored = true;
  2360. // parse error
  2361. /* Otherwise: */
  2362. } else {
  2363. /* Pop elements from the stack of open elements until a select
  2364. element has been popped from the stack. */
  2365. do {
  2366. $node = array_pop($this->stack);
  2367. } while ($node->tagName !== 'select');
  2368. /* Reset the insertion mode appropriately. */
  2369. $this->resetInsertionMode();
  2370. }
  2371. /* A start tag whose tag name is "select" */
  2372. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') {
  2373. /* Parse error. Act as if the token had been an end tag with the
  2374. tag name "select" instead. */
  2375. $this->emitToken(array(
  2376. 'name' => 'select',
  2377. 'type' => HTML5_Tokenizer::ENDTAG
  2378. ));
  2379. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2380. ($token['name'] === 'input' || $token['name'] === 'keygen' || $token['name'] === 'textarea')) {
  2381. // parse error
  2382. $this->emitToken(array(
  2383. 'name' => 'select',
  2384. 'type' => HTML5_Tokenizer::ENDTAG
  2385. ));
  2386. $this->emitToken($token);
  2387. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
  2388. $this->processWithRulesFor($token, self::IN_HEAD);
  2389. } elseif($token['type'] === HTML5_Tokenizer::EOF) {
  2390. // XERROR: If the current node is not the root html element, then this is a parse error.
  2391. /* Stop parsing */
  2392. /* Anything else */
  2393. } else {
  2394. /* Parse error. Ignore the token. */
  2395. $this->ignored = true;
  2396. }
  2397. break;
  2398. case self::IN_SELECT_IN_TABLE:
  2399. if($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2400. in_array($token['name'], array('caption', 'table', 'tbody',
  2401. 'tfoot', 'thead', 'tr', 'td', 'th'))) {
  2402. // parse error
  2403. $this->emitToken(array(
  2404. 'name' => 'select',
  2405. 'type' => HTML5_Tokenizer::ENDTAG,
  2406. ));
  2407. $this->emitToken($token);
  2408. /* An end tag whose tag name is one of: "caption", "table", "tbody",
  2409. "tfoot", "thead", "tr", "td", "th" */
  2410. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2411. in_array($token['name'], array('caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th'))) {
  2412. /* Parse error. */
  2413. // parse error
  2414. /* If the stack of open elements has an element in table scope with
  2415. the same tag name as that of the token, then act as if an end tag
  2416. with the tag name "select" had been seen, and reprocess the token.
  2417. Otherwise, ignore the token. */
  2418. if($this->elementInScope($token['name'], self::SCOPE_TABLE)) {
  2419. $this->emitToken(array(
  2420. 'name' => 'select',
  2421. 'type' => HTML5_Tokenizer::ENDTAG
  2422. ));
  2423. $this->emitToken($token);
  2424. } else {
  2425. $this->ignored = true;
  2426. }
  2427. } else {
  2428. $this->processWithRulesFor($token, self::IN_SELECT);
  2429. }
  2430. break;
  2431. case self::IN_FOREIGN_CONTENT:
  2432. if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
  2433. $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  2434. $this->insertText($token['data']);
  2435. } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
  2436. $this->insertComment($token['data']);
  2437. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2438. // XERROR: parse error
  2439. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2440. $token['name'] === 'script' && end($this->stack)->tagName === 'script' &&
  2441. // XDOM
  2442. end($this->stack)->namespaceURI === self::NS_SVG) {
  2443. array_pop($this->stack);
  2444. // a bunch of script running mumbo jumbo
  2445. } elseif (
  2446. ($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2447. ((
  2448. $token['name'] !== 'mglyph' &&
  2449. $token['name'] !== 'malignmark' &&
  2450. // XDOM
  2451. end($this->stack)->namespaceURI === self::NS_MATHML &&
  2452. in_array(end($this->stack)->tagName, array('mi', 'mo', 'mn', 'ms', 'mtext'))
  2453. ) ||
  2454. (
  2455. $token['name'] === 'svg' &&
  2456. // XDOM
  2457. end($this->stack)->namespaceURI === self::NS_MATHML &&
  2458. end($this->stack)->tagName === 'annotation-xml'
  2459. ) ||
  2460. (
  2461. // XDOM
  2462. end($this->stack)->namespaceURI === self::NS_SVG &&
  2463. in_array(end($this->stack)->tagName, array('foreignObject', 'desc', 'title'))
  2464. ) ||
  2465. (
  2466. // XSKETCHY && XDOM
  2467. end($this->stack)->namespaceURI === self::NS_HTML
  2468. ))
  2469. ) || $token['type'] === HTML5_Tokenizer::ENDTAG
  2470. ) {
  2471. $this->processWithRulesFor($token, $this->secondary_mode);
  2472. /* If, after doing so, the insertion mode is still "in foreign
  2473. * content", but there is no element in scope that has a namespace
  2474. * other than the HTML namespace, switch the insertion mode to the
  2475. * secondary insertion mode. */
  2476. if ($this->mode === self::IN_FOREIGN_CONTENT) {
  2477. $found = false;
  2478. // this basically duplicates elementInScope()
  2479. for ($i = count($this->stack) - 1; $i >= 0; $i--) {
  2480. // XDOM
  2481. $node = $this->stack[$i];
  2482. if ($node->namespaceURI !== self::NS_HTML) {
  2483. $found = true;
  2484. break;
  2485. } elseif (in_array($node->tagName, array('table', 'html',
  2486. 'applet', 'caption', 'td', 'th', 'button', 'marquee',
  2487. 'object')) || ($node->tagName === 'foreignObject' &&
  2488. $node->namespaceURI === self::NS_SVG)) {
  2489. break;
  2490. }
  2491. }
  2492. if (!$found) {
  2493. $this->mode = $this->secondary_mode;
  2494. }
  2495. }
  2496. } elseif ($token['type'] === HTML5_Tokenizer::EOF || (
  2497. $token['type'] === HTML5_Tokenizer::STARTTAG &&
  2498. (in_array($token['name'], array('b', "big", "blockquote", "body", "br",
  2499. "center", "code", "dc", "dd", "div", "dl", "ds", "dt", "em", "embed", "h1", "h2",
  2500. "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing",
  2501. "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s", "small",
  2502. "span", "strong", "strike", "sub", "sup", "table", "tt", "u", "ul",
  2503. "var")) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') ||
  2504. $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) {
  2505. // XERROR: parse error
  2506. do {
  2507. $node = array_pop($this->stack);
  2508. // XDOM
  2509. } while ($node->namespaceURI !== self::NS_HTML);
  2510. $this->stack[] = $node;
  2511. $this->mode = $this->secondary_mode;
  2512. $this->emitToken($token);
  2513. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG) {
  2514. static $svg_lookup = array(
  2515. 'altglyph' => 'altGlyph',
  2516. 'altglyphdef' => 'altGlyphDef',
  2517. 'altglyphitem' => 'altGlyphItem',
  2518. 'animatecolor' => 'animateColor',
  2519. 'animatemotion' => 'animateMotion',
  2520. 'animatetransform' => 'animateTransform',
  2521. 'clippath' => 'clipPath',
  2522. 'feblend' => 'feBlend',
  2523. 'fecolormatrix' => 'feColorMatrix',
  2524. 'fecomponenttransfer' => 'feComponentTransfer',
  2525. 'fecomposite' => 'feComposite',
  2526. 'feconvolvematrix' => 'feConvolveMatrix',
  2527. 'fediffuselighting' => 'feDiffuseLighting',
  2528. 'fedisplacementmap' => 'feDisplacementMap',
  2529. 'fedistantlight' => 'feDistantLight',
  2530. 'feflood' => 'feFlood',
  2531. 'fefunca' => 'feFuncA',
  2532. 'fefuncb' => 'feFuncB',
  2533. 'fefuncg' => 'feFuncG',
  2534. 'fefuncr' => 'feFuncR',
  2535. 'fegaussianblur' => 'feGaussianBlur',
  2536. 'feimage' => 'feImage',
  2537. 'femerge' => 'feMerge',
  2538. 'femergenode' => 'feMergeNode',
  2539. 'femorphology' => 'feMorphology',
  2540. 'feoffset' => 'feOffset',
  2541. 'fepointlight' => 'fePointLight',
  2542. 'fespecularlighting' => 'feSpecularLighting',
  2543. 'fespotlight' => 'feSpotLight',
  2544. 'fetile' => 'feTile',
  2545. 'feturbulence' => 'feTurbulence',
  2546. 'foreignobject' => 'foreignObject',
  2547. 'glyphref' => 'glyphRef',
  2548. 'lineargradient' => 'linearGradient',
  2549. 'radialgradient' => 'radialGradient',
  2550. 'textpath' => 'textPath',
  2551. );
  2552. // XDOM
  2553. $current = end($this->stack);
  2554. if ($current->namespaceURI === self::NS_MATHML) {
  2555. $token = $this->adjustMathMLAttributes($token);
  2556. }
  2557. if ($current->namespaceURI === self::NS_SVG &&
  2558. isset($svg_lookup[$token['name']])) {
  2559. $token['name'] = $svg_lookup[$token['name']];
  2560. }
  2561. if ($current->namespaceURI === self::NS_SVG) {
  2562. $token = $this->adjustSVGAttributes($token);
  2563. }
  2564. $token = $this->adjustForeignAttributes($token);
  2565. $this->insertForeignElement($token, $current->namespaceURI);
  2566. if (isset($token['self-closing'])) {
  2567. array_pop($this->stack);
  2568. // XERROR: acknowledge self-closing flag
  2569. }
  2570. }
  2571. break;
  2572. case self::AFTER_BODY:
  2573. /* Handle the token as follows: */
  2574. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  2575. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  2576. or U+0020 SPACE */
  2577. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  2578. /* Process the token as it would be processed if the insertion mode
  2579. was "in body". */
  2580. $this->processWithRulesFor($token, self::IN_BODY);
  2581. /* A comment token */
  2582. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  2583. /* Append a Comment node to the first element in the stack of open
  2584. elements (the html element), with the data attribute set to the
  2585. data given in the comment token. */
  2586. // XDOM
  2587. $comment = $this->dom->createComment($token['data']);
  2588. $this->stack[0]->appendChild($comment);
  2589. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2590. // parse error
  2591. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  2592. $this->processWithRulesFor($token, self::IN_BODY);
  2593. /* An end tag with the tag name "html" */
  2594. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'html') {
  2595. /* If the parser was originally created as part of the HTML
  2596. * fragment parsing algorithm, this is a parse error; ignore
  2597. * the token. (fragment case) */
  2598. $this->ignored = true;
  2599. // XERROR: implement this
  2600. $this->mode = self::AFTER_AFTER_BODY;
  2601. } elseif($token['type'] === HTML5_Tokenizer::EOF) {
  2602. /* Stop parsing */
  2603. /* Anything else */
  2604. } else {
  2605. /* Parse error. Set the insertion mode to "in body" and reprocess
  2606. the token. */
  2607. $this->mode = self::IN_BODY;
  2608. $this->emitToken($token);
  2609. }
  2610. break;
  2611. case self::IN_FRAMESET:
  2612. /* Handle the token as follows: */
  2613. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  2614. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  2615. U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
  2616. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  2617. /* Append the character to the current node. */
  2618. $this->insertText($token['data']);
  2619. /* A comment token */
  2620. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  2621. /* Append a Comment node to the current node with the data
  2622. attribute set to the data given in the comment token. */
  2623. $this->insertComment($token['data']);
  2624. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2625. // parse error
  2626. /* A start tag with the tag name "frameset" */
  2627. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2628. $token['name'] === 'frameset') {
  2629. $this->insertElement($token);
  2630. /* An end tag with the tag name "frameset" */
  2631. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2632. $token['name'] === 'frameset') {
  2633. /* If the current node is the root html element, then this is a
  2634. parse error; ignore the token. (fragment case) */
  2635. if(end($this->stack)->tagName === 'html') {
  2636. $this->ignored = true;
  2637. // Parse error
  2638. } else {
  2639. /* Otherwise, pop the current node from the stack of open
  2640. elements. */
  2641. array_pop($this->stack);
  2642. /* If the parser was not originally created as part of the HTML
  2643. * fragment parsing algorithm (fragment case), and the current
  2644. * node is no longer a frameset element, then switch the
  2645. * insertion mode to "after frameset". */
  2646. $this->mode = self::AFTER_FRAMESET;
  2647. }
  2648. /* A start tag with the tag name "frame" */
  2649. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2650. $token['name'] === 'frame') {
  2651. /* Insert an HTML element for the token. */
  2652. $this->insertElement($token);
  2653. /* Immediately pop the current node off the stack of open elements. */
  2654. array_pop($this->stack);
  2655. // XERROR: Acknowledge the token's self-closing flag, if it is set.
  2656. /* A start tag with the tag name "noframes" */
  2657. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2658. $token['name'] === 'noframes') {
  2659. /* Process the token using the rules for the "in head" insertion mode. */
  2660. $this->processwithRulesFor($token, self::IN_HEAD);
  2661. } elseif($token['type'] === HTML5_Tokenizer::EOF) {
  2662. // XERROR: If the current node is not the root html element, then this is a parse error.
  2663. /* Stop parsing */
  2664. /* Anything else */
  2665. } else {
  2666. /* Parse error. Ignore the token. */
  2667. $this->ignored = true;
  2668. }
  2669. break;
  2670. case self::AFTER_FRAMESET:
  2671. /* Handle the token as follows: */
  2672. /* A character token that is one of one of U+0009 CHARACTER TABULATION,
  2673. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
  2674. U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
  2675. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
  2676. /* Append the character to the current node. */
  2677. $this->insertText($token['data']);
  2678. /* A comment token */
  2679. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
  2680. /* Append a Comment node to the current node with the data
  2681. attribute set to the data given in the comment token. */
  2682. $this->insertComment($token['data']);
  2683. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
  2684. // parse error
  2685. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
  2686. $this->processWithRulesFor($token, self::IN_BODY);
  2687. /* An end tag with the tag name "html" */
  2688. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
  2689. $token['name'] === 'html') {
  2690. $this->mode = self::AFTER_AFTER_FRAMESET;
  2691. /* A start tag with the tag name "noframes" */
  2692. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
  2693. $token['name'] === 'noframes') {
  2694. $this->processWithRulesFor($token, self::IN_HEAD);
  2695. } elseif($token['type'] === HTML5_Tokenizer::EOF) {
  2696. /* Stop parsing */
  2697. /* Anything else */
  2698. } else {
  2699. /* Parse error. Ignore the token. */
  2700. $this->ignored = true;
  2701. }
  2702. break;
  2703. case self::AFTER_AFTER_BODY:
  2704. /* A comment token */
  2705. if($token['type'] === HTML5_Tokenizer::COMMENT) {
  2706. /* Append a Comment node to the Document object with the data
  2707. attribute set to the data given in the comment token. */
  2708. // XDOM
  2709. $comment = $this->dom->createComment($token['data']);
  2710. $this->dom->appendChild($comment);
  2711. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
  2712. $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
  2713. ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
  2714. $this->processWithRulesFor($token, self::IN_BODY);
  2715. /* An end-of-file token */
  2716. } elseif($token['type'] === HTML5_Tokenizer::EOF) {
  2717. /* OMG DONE!! */
  2718. } else {
  2719. // parse error
  2720. $this->mode = self::IN_BODY;
  2721. $this->emitToken($token);
  2722. }
  2723. break;
  2724. case self::AFTER_AFTER_FRAMESET:
  2725. /* A comment token */
  2726. if($token['type'] === HTML5_Tokenizer::COMMENT) {
  2727. /* Append a Comment node to the Document object with the data
  2728. attribute set to the data given in the comment token. */
  2729. // XDOM
  2730. $comment = $this->dom->createComment($token['data']);
  2731. $this->dom->appendChild($comment);
  2732. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
  2733. $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
  2734. ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
  2735. $this->processWithRulesFor($token, self::IN_BODY);
  2736. /* An end-of-file token */
  2737. } elseif($token['type'] === HTML5_Tokenizer::EOF) {
  2738. /* OMG DONE!! */
  2739. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'nofrmaes') {
  2740. $this->processWithRulesFor($token, self::IN_HEAD);
  2741. } else {
  2742. // parse error
  2743. }
  2744. break;
  2745. }
  2746. // end funky indenting
  2747. }
  2748. private function insertElement($token, $append = true) {
  2749. $el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
  2750. if (!empty($token['attr'])) {
  2751. foreach($token['attr'] as $attr) {
  2752. if(!$el->hasAttribute($attr['name']) && preg_match("/^[a-zA-Z_:]/", $attr['name'])) {
  2753. $el->setAttribute($attr['name'], $attr['value']);
  2754. }
  2755. }
  2756. }
  2757. if ($append) {
  2758. $this->appendToRealParent($el);
  2759. $this->stack[] = $el;
  2760. }
  2761. return $el;
  2762. }
  2763. private function insertText($data) {
  2764. if ($data === '') return;
  2765. if ($this->ignore_lf_token) {
  2766. if ($data[0] === "\n") {
  2767. $data = substr($data, 1);
  2768. if ($data === false) return;
  2769. }
  2770. }
  2771. $text = $this->dom->createTextNode($data);
  2772. $this->appendToRealParent($text);
  2773. }
  2774. private function insertComment($data) {
  2775. $comment = $this->dom->createComment($data);
  2776. $this->appendToRealParent($comment);
  2777. }
  2778. private function appendToRealParent($node) {
  2779. // this is only for the foster_parent case
  2780. /* If the current node is a table, tbody, tfoot, thead, or tr
  2781. element, then, whenever a node would be inserted into the current
  2782. node, it must instead be inserted into the foster parent element. */
  2783. if(!$this->foster_parent || !in_array(end($this->stack)->tagName,
  2784. array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
  2785. end($this->stack)->appendChild($node);
  2786. } else {
  2787. $this->fosterParent($node);
  2788. }
  2789. }
  2790. private function elementInScope($el, $scope = self::SCOPE) {
  2791. if(is_array($el)) {
  2792. foreach($el as $element) {
  2793. if($this->elementInScope($element, $scope)) {
  2794. return true;
  2795. }
  2796. }
  2797. return false;
  2798. }
  2799. $leng = count($this->stack);
  2800. for($n = 0; $n < $leng; $n++) {
  2801. /* 1. Initialise node to be the current node (the bottommost node of
  2802. the stack). */
  2803. $node = $this->stack[$leng - 1 - $n];
  2804. if($node->tagName === $el) {
  2805. /* 2. If node is the target node, terminate in a match state. */
  2806. return true;
  2807. // We've expanded the logic for these states a little differently;
  2808. // Hixie's refactoring into "specific scope" is more general, but
  2809. // this "gets the job done"
  2810. // these are the common states for all scopes
  2811. } elseif($node->tagName === 'table' || $node->tagName === 'html') {
  2812. return false;
  2813. // these are valid for "in scope" and "in list item scope"
  2814. } elseif($scope !== self::SCOPE_TABLE &&
  2815. (in_array($node->tagName, array('applet', 'caption', 'td',
  2816. 'th', 'button', 'marquee', 'object')) ||
  2817. $node->tagName === 'foreignObject' && $node->namespaceURI === self::NS_SVG)) {
  2818. return false;
  2819. // these are valid for "in list item scope"
  2820. } elseif($scope === self::SCOPE_LISTITEM && in_array($node->tagName, array('ol', 'ul'))) {
  2821. return false;
  2822. }
  2823. /* Otherwise, set node to the previous entry in the stack of open
  2824. elements and return to step 2. (This will never fail, since the loop
  2825. will always terminate in the previous step if the top of the stack
  2826. is reached.) */
  2827. }
  2828. }
  2829. private function reconstructActiveFormattingElements() {
  2830. /* 1. If there are no entries in the list of active formatting elements,
  2831. then there is nothing to reconstruct; stop this algorithm. */
  2832. $formatting_elements = count($this->a_formatting);
  2833. if($formatting_elements === 0) {
  2834. return false;
  2835. }
  2836. /* 3. Let entry be the last (most recently added) element in the list
  2837. of active formatting elements. */
  2838. $entry = end($this->a_formatting);
  2839. /* 2. If the last (most recently added) entry in the list of active
  2840. formatting elements is a marker, or if it is an element that is in the
  2841. stack of open elements, then there is nothing to reconstruct; stop this
  2842. algorithm. */
  2843. if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
  2844. return false;
  2845. }
  2846. for($a = $formatting_elements - 1; $a >= 0; true) {
  2847. /* 4. If there are no entries before entry in the list of active
  2848. formatting elements, then jump to step 8. */
  2849. if($a === 0) {
  2850. $step_seven = false;
  2851. break;
  2852. }
  2853. /* 5. Let entry be the entry one earlier than entry in the list of
  2854. active formatting elements. */
  2855. $a--;
  2856. $entry = $this->a_formatting[$a];
  2857. /* 6. If entry is neither a marker nor an element that is also in
  2858. thetack of open elements, go to step 4. */
  2859. if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
  2860. break;
  2861. }
  2862. }
  2863. while(true) {
  2864. /* 7. Let entry be the element one later than entry in the list of
  2865. active formatting elements. */
  2866. if(isset($step_seven) && $step_seven === true) {
  2867. $a++;
  2868. $entry = $this->a_formatting[$a];
  2869. }
  2870. /* 8. Perform a shallow clone of the element entry to obtain clone. */
  2871. $clone = $entry->cloneNode();
  2872. /* 9. Append clone to the current node and push it onto the stack
  2873. of open elements so that it is the new current node. */
  2874. $this->appendToRealParent($clone);
  2875. $this->stack[] = $clone;
  2876. /* 10. Replace the entry for entry in the list with an entry for
  2877. clone. */
  2878. $this->a_formatting[$a] = $clone;
  2879. /* 11. If the entry for clone in the list of active formatting
  2880. elements is not the last entry in the list, return to step 7. */
  2881. if(end($this->a_formatting) !== $clone) {
  2882. $step_seven = true;
  2883. } else {
  2884. break;
  2885. }
  2886. }
  2887. }
  2888. private function clearTheActiveFormattingElementsUpToTheLastMarker() {
  2889. /* When the steps below require the UA to clear the list of active
  2890. formatting elements up to the last marker, the UA must perform the
  2891. following steps: */
  2892. while(true) {
  2893. /* 1. Let entry be the last (most recently added) entry in the list
  2894. of active formatting elements. */
  2895. $entry = end($this->a_formatting);
  2896. /* 2. Remove entry from the list of active formatting elements. */
  2897. array_pop($this->a_formatting);
  2898. /* 3. If entry was a marker, then stop the algorithm at this point.
  2899. The list has been cleared up to the last marker. */
  2900. if($entry === self::MARKER) {
  2901. break;
  2902. }
  2903. }
  2904. }
  2905. private function generateImpliedEndTags($exclude = array()) {
  2906. /* When the steps below require the UA to generate implied end tags,
  2907. * then, while the current node is a dc element, a dd element, a ds
  2908. * element, a dt element, an li element, an option element, an optgroup
  2909. * element, a p element, an rp element, or an rt element, the UA must
  2910. * pop the current node off the stack of open elements. */
  2911. $node = end($this->stack);
  2912. $elements = array_diff(array('dc', 'dd', 'ds', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
  2913. while(in_array(end($this->stack)->tagName, $elements)) {
  2914. array_pop($this->stack);
  2915. }
  2916. }
  2917. private function getElementCategory($node) {
  2918. if (!is_object($node)) debug_print_backtrace();
  2919. $name = $node->tagName;
  2920. if(in_array($name, $this->special))
  2921. return self::SPECIAL;
  2922. elseif(in_array($name, $this->scoping))
  2923. return self::SCOPING;
  2924. elseif(in_array($name, $this->formatting))
  2925. return self::FORMATTING;
  2926. else
  2927. return self::PHRASING;
  2928. }
  2929. private function clearStackToTableContext($elements) {
  2930. /* When the steps above require the UA to clear the stack back to a
  2931. table context, it means that the UA must, while the current node is not
  2932. a table element or an html element, pop elements from the stack of open
  2933. elements. */
  2934. while(true) {
  2935. $name = end($this->stack)->tagName;
  2936. if(in_array($name, $elements)) {
  2937. break;
  2938. } else {
  2939. array_pop($this->stack);
  2940. }
  2941. }
  2942. }
  2943. private function resetInsertionMode($context = null) {
  2944. /* 1. Let last be false. */
  2945. $last = false;
  2946. $leng = count($this->stack);
  2947. for($n = $leng - 1; $n >= 0; $n--) {
  2948. /* 2. Let node be the last node in the stack of open elements. */
  2949. $node = $this->stack[$n];
  2950. /* 3. If node is the first node in the stack of open elements, then
  2951. * set last to true and set node to the context element. (fragment
  2952. * case) */
  2953. if($this->stack[0]->isSameNode($node)) {
  2954. $last = true;
  2955. $node = $context;
  2956. }
  2957. /* 4. If node is a select element, then switch the insertion mode to
  2958. "in select" and abort these steps. (fragment case) */
  2959. if($node->tagName === 'select') {
  2960. $this->mode = self::IN_SELECT;
  2961. break;
  2962. /* 5. If node is a td or th element, then switch the insertion mode
  2963. to "in cell" and abort these steps. */
  2964. } elseif($node->tagName === 'td' || $node->nodeName === 'th') {
  2965. $this->mode = self::IN_CELL;
  2966. break;
  2967. /* 6. If node is a tr element, then switch the insertion mode to
  2968. "in row" and abort these steps. */
  2969. } elseif($node->tagName === 'tr') {
  2970. $this->mode = self::IN_ROW;
  2971. break;
  2972. /* 7. If node is a tbody, thead, or tfoot element, then switch the
  2973. insertion mode to "in table body" and abort these steps. */
  2974. } elseif(in_array($node->tagName, array('tbody', 'thead', 'tfoot'))) {
  2975. $this->mode = self::IN_TABLE_BODY;
  2976. break;
  2977. /* 8. If node is a caption element, then switch the insertion mode
  2978. to "in caption" and abort these steps. */
  2979. } elseif($node->tagName === 'caption') {
  2980. $this->mode = self::IN_CAPTION;
  2981. break;
  2982. /* 9. If node is a colgroup element, then switch the insertion mode
  2983. to "in column group" and abort these steps. (innerHTML case) */
  2984. } elseif($node->tagName === 'colgroup') {
  2985. $this->mode = self::IN_COLUMN_GROUP;
  2986. break;
  2987. /* 10. If node is a table element, then switch the insertion mode
  2988. to "in table" and abort these steps. */
  2989. } elseif($node->tagName === 'table') {
  2990. $this->mode = self::IN_TABLE;
  2991. break;
  2992. /* 11. If node is an element from the MathML namespace or the SVG
  2993. * namespace, then switch the insertion mode to "in foreign
  2994. * content", let the secondary insertion mode be "in body", and
  2995. * abort these steps. */
  2996. } elseif($node->namespaceURI === self::NS_SVG ||
  2997. $node->namespaceURI === self::NS_MATHML) {
  2998. $this->mode = self::IN_FOREIGN_CONTENT;
  2999. $this->secondary_mode = self::IN_BODY;
  3000. break;
  3001. /* 12. If node is a head element, then switch the insertion mode
  3002. to "in body" ("in body"! not "in head"!) and abort these steps.
  3003. (fragment case) */
  3004. } elseif($node->tagName === 'head') {
  3005. $this->mode = self::IN_BODY;
  3006. break;
  3007. /* 13. If node is a body element, then switch the insertion mode to
  3008. "in body" and abort these steps. */
  3009. } elseif($node->tagName === 'body') {
  3010. $this->mode = self::IN_BODY;
  3011. break;
  3012. /* 14. If node is a frameset element, then switch the insertion
  3013. mode to "in frameset" and abort these steps. (fragment case) */
  3014. } elseif($node->tagName === 'frameset') {
  3015. $this->mode = self::IN_FRAMESET;
  3016. break;
  3017. /* 15. If node is an html element, then: if the head element
  3018. pointer is null, switch the insertion mode to "before head",
  3019. otherwise, switch the insertion mode to "after head". In either
  3020. case, abort these steps. (fragment case) */
  3021. } elseif($node->tagName === 'html') {
  3022. $this->mode = ($this->head_pointer === null)
  3023. ? self::BEFORE_HEAD
  3024. : self::AFTER_HEAD;
  3025. break;
  3026. /* 16. If last is true, then set the insertion mode to "in body"
  3027. and abort these steps. (fragment case) */
  3028. } elseif($last) {
  3029. $this->mode = self::IN_BODY;
  3030. break;
  3031. }
  3032. }
  3033. }
  3034. private function closeCell() {
  3035. /* If the stack of open elements has a td or th element in table scope,
  3036. then act as if an end tag token with that tag name had been seen. */
  3037. foreach(array('td', 'th') as $cell) {
  3038. if($this->elementInScope($cell, self::SCOPE_TABLE)) {
  3039. $this->emitToken(array(
  3040. 'name' => $cell,
  3041. 'type' => HTML5_Tokenizer::ENDTAG
  3042. ));
  3043. break;
  3044. }
  3045. }
  3046. }
  3047. private function processWithRulesFor($token, $mode) {
  3048. /* "using the rules for the m insertion mode", where m is one of these
  3049. * modes, the user agent must use the rules described under the m
  3050. * insertion mode's section, but must leave the insertion mode
  3051. * unchanged unless the rules in m themselves switch the insertion mode
  3052. * to a new value. */
  3053. return $this->emitToken($token, $mode);
  3054. }
  3055. private function insertCDATAElement($token) {
  3056. $this->insertElement($token);
  3057. $this->original_mode = $this->mode;
  3058. $this->mode = self::IN_CDATA_RCDATA;
  3059. $this->content_model = HTML5_Tokenizer::CDATA;
  3060. }
  3061. private function insertRCDATAElement($token) {
  3062. $this->insertElement($token);
  3063. $this->original_mode = $this->mode;
  3064. $this->mode = self::IN_CDATA_RCDATA;
  3065. $this->content_model = HTML5_Tokenizer::RCDATA;
  3066. }
  3067. private function getAttr($token, $key) {
  3068. if (!isset($token['attr'])) return false;
  3069. $ret = false;
  3070. foreach ($token['attr'] as $keypair) {
  3071. if ($keypair['name'] === $key) $ret = $keypair['value'];
  3072. }
  3073. return $ret;
  3074. }
  3075. private function getCurrentTable() {
  3076. /* The current table is the last table element in the stack of open
  3077. * elements, if there is one. If there is no table element in the stack
  3078. * of open elements (fragment case), then the current table is the
  3079. * first element in the stack of open elements (the html element). */
  3080. for ($i = count($this->stack) - 1; $i >= 0; $i--) {
  3081. if ($this->stack[$i]->tagName === 'table') {
  3082. return $this->stack[$i];
  3083. }
  3084. }
  3085. return $this->stack[0];
  3086. }
  3087. private function getFosterParent() {
  3088. /* The foster parent element is the parent element of the last
  3089. table element in the stack of open elements, if there is a
  3090. table element and it has such a parent element. If there is no
  3091. table element in the stack of open elements (innerHTML case),
  3092. then the foster parent element is the first element in the
  3093. stack of open elements (the html element). Otherwise, if there
  3094. is a table element in the stack of open elements, but the last
  3095. table element in the stack of open elements has no parent, or
  3096. its parent node is not an element, then the foster parent
  3097. element is the element before the last table element in the
  3098. stack of open elements. */
  3099. for($n = count($this->stack) - 1; $n >= 0; $n--) {
  3100. if($this->stack[$n]->tagName === 'table') {
  3101. $table = $this->stack[$n];
  3102. break;
  3103. }
  3104. }
  3105. if(isset($table) && $table->parentNode !== null) {
  3106. return $table->parentNode;
  3107. } elseif(!isset($table)) {
  3108. return $this->stack[0];
  3109. } elseif(isset($table) && ($table->parentNode === null ||
  3110. $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
  3111. return $this->stack[$n - 1];
  3112. }
  3113. }
  3114. public function fosterParent($node) {
  3115. $foster_parent = $this->getFosterParent();
  3116. $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
  3117. /* When a node node is to be foster parented, the node node must be
  3118. * be inserted into the foster parent element. */
  3119. /* If the foster parent element is the parent element of the last table
  3120. * element in the stack of open elements, then node must be inserted
  3121. * immediately before the last table element in the stack of open
  3122. * elements in the foster parent element; otherwise, node must be
  3123. * appended to the foster parent element. */
  3124. if ($table->tagName === 'table' && $table->parentNode->isSameNode($foster_parent)) {
  3125. $foster_parent->insertBefore($node, $table);
  3126. } else {
  3127. $foster_parent->appendChild($node);
  3128. }
  3129. }
  3130. /**
  3131. * For debugging, prints the stack
  3132. */
  3133. private function printStack() {
  3134. $names = array();
  3135. foreach ($this->stack as $i => $element) {
  3136. $names[] = $element->tagName;
  3137. }
  3138. echo " -> stack [" . implode(', ', $names) . "]\n";
  3139. }
  3140. /**
  3141. * For debugging, prints active formatting elements
  3142. */
  3143. private function printActiveFormattingElements() {
  3144. if (!$this->a_formatting) return;
  3145. $names = array();
  3146. foreach ($this->a_formatting as $node) {
  3147. if ($node === self::MARKER) $names[] = 'MARKER';
  3148. else $names[] = $node->tagName;
  3149. }
  3150. echo " -> active formatting [" . implode(', ', $names) . "]\n";
  3151. }
  3152. public function currentTableIsTainted() {
  3153. return !empty($this->getCurrentTable()->tainted);
  3154. }
  3155. /**
  3156. * Sets up the tree constructor for building a fragment.
  3157. */
  3158. public function setupContext($context = null) {
  3159. $this->fragment = true;
  3160. if ($context) {
  3161. $context = $this->dom->createElementNS(self::NS_HTML, $context);
  3162. /* 4.1. Set the HTML parser's tokenization stage's content model
  3163. * flag according to the context element, as follows: */
  3164. switch ($context->tagName) {
  3165. case 'title': case 'textarea':
  3166. $this->content_model = HTML5_Tokenizer::RCDATA;
  3167. break;
  3168. case 'style': case 'script': case 'xmp': case 'iframe':
  3169. case 'noembed': case 'noframes':
  3170. $this->content_model = HTML5_Tokenizer::CDATA;
  3171. break;
  3172. case 'noscript':
  3173. // XSCRIPT: assuming scripting is enabled
  3174. $this->content_model = HTML5_Tokenizer::CDATA;
  3175. break;
  3176. case 'plaintext':
  3177. $this->content_model = HTML5_Tokenizer::PLAINTEXT;
  3178. break;
  3179. }
  3180. /* 4.2. Let root be a new html element with no attributes. */
  3181. $root = $this->dom->createElementNS(self::NS_HTML, 'html');
  3182. $this->root = $root;
  3183. /* 4.3 Append the element root to the Document node created above. */
  3184. $this->dom->appendChild($root);
  3185. /* 4.4 Set up the parser's stack of open elements so that it
  3186. * contains just the single element root. */
  3187. $this->stack = array($root);
  3188. /* 4.5 Reset the parser's insertion mode appropriately. */
  3189. $this->resetInsertionMode($context);
  3190. /* 4.6 Set the parser's form element pointer to the nearest node
  3191. * to the context element that is a form element (going straight up
  3192. * the ancestor chain, and including the element itself, if it is a
  3193. * form element), or, if there is no such form element, to null. */
  3194. $node = $context;
  3195. do {
  3196. if ($node->tagName === 'form') {
  3197. $this->form_pointer = $node;
  3198. break;
  3199. }
  3200. } while ($node = $node->parentNode);
  3201. }
  3202. }
  3203. public function adjustMathMLAttributes($token) {
  3204. foreach ($token['attr'] as &$kp) {
  3205. if ($kp['name'] === 'definitionurl') {
  3206. $kp['name'] = 'definitionURL';
  3207. }
  3208. }
  3209. return $token;
  3210. }
  3211. public function adjustSVGAttributes($token) {
  3212. static $lookup = array(
  3213. 'attributename' => 'attributeName',
  3214. 'attributetype' => 'attributeType',
  3215. 'basefrequency' => 'baseFrequency',
  3216. 'baseprofile' => 'baseProfile',
  3217. 'calcmode' => 'calcMode',
  3218. 'clippathunits' => 'clipPathUnits',
  3219. 'contentscripttype' => 'contentScriptType',
  3220. 'contentstyletype' => 'contentStyleType',
  3221. 'diffuseconstant' => 'diffuseConstant',
  3222. 'edgemode' => 'edgeMode',
  3223. 'externalresourcesrequired' => 'externalResourcesRequired',
  3224. 'filterres' => 'filterRes',
  3225. 'filterunits' => 'filterUnits',
  3226. 'glyphref' => 'glyphRef',
  3227. 'gradienttransform' => 'gradientTransform',
  3228. 'gradientunits' => 'gradientUnits',
  3229. 'kernelmatrix' => 'kernelMatrix',
  3230. 'kernelunitlength' => 'kernelUnitLength',
  3231. 'keypoints' => 'keyPoints',
  3232. 'keysplines' => 'keySplines',
  3233. 'keytimes' => 'keyTimes',
  3234. 'lengthadjust' => 'lengthAdjust',
  3235. 'limitingconeangle' => 'limitingConeAngle',
  3236. 'markerheight' => 'markerHeight',
  3237. 'markerunits' => 'markerUnits',
  3238. 'markerwidth' => 'markerWidth',
  3239. 'maskcontentunits' => 'maskContentUnits',
  3240. 'maskunits' => 'maskUnits',
  3241. 'numoctaves' => 'numOctaves',
  3242. 'pathlength' => 'pathLength',
  3243. 'patterncontentunits' => 'patternContentUnits',
  3244. 'patterntransform' => 'patternTransform',
  3245. 'patternunits' => 'patternUnits',
  3246. 'pointsatx' => 'pointsAtX',
  3247. 'pointsaty' => 'pointsAtY',
  3248. 'pointsatz' => 'pointsAtZ',
  3249. 'preservealpha' => 'preserveAlpha',
  3250. 'preserveaspectratio' => 'preserveAspectRatio',
  3251. 'primitiveunits' => 'primitiveUnits',
  3252. 'refx' => 'refX',
  3253. 'refy' => 'refY',
  3254. 'repeatcount' => 'repeatCount',
  3255. 'repeatdur' => 'repeatDur',
  3256. 'requiredextensions' => 'requiredExtensions',
  3257. 'requiredfeatures' => 'requiredFeatures',
  3258. 'specularconstant' => 'specularConstant',
  3259. 'specularexponent' => 'specularExponent',
  3260. 'spreadmethod' => 'spreadMethod',
  3261. 'startoffset' => 'startOffset',
  3262. 'stddeviation' => 'stdDeviation',
  3263. 'stitchtiles' => 'stitchTiles',
  3264. 'surfacescale' => 'surfaceScale',
  3265. 'systemlanguage' => 'systemLanguage',
  3266. 'tablevalues' => 'tableValues',
  3267. 'targetx' => 'targetX',
  3268. 'targety' => 'targetY',
  3269. 'textlength' => 'textLength',
  3270. 'viewbox' => 'viewBox',
  3271. 'viewtarget' => 'viewTarget',
  3272. 'xchannelselector' => 'xChannelSelector',
  3273. 'ychannelselector' => 'yChannelSelector',
  3274. 'zoomandpan' => 'zoomAndPan',
  3275. );
  3276. foreach ($token['attr'] as &$kp) {
  3277. if (isset($lookup[$kp['name']])) {
  3278. $kp['name'] = $lookup[$kp['name']];
  3279. }
  3280. }
  3281. return $token;
  3282. }
  3283. public function adjustForeignAttributes($token) {
  3284. static $lookup = array(
  3285. 'xlink:actuate' => array('xlink', 'actuate', self::NS_XLINK),
  3286. 'xlink:arcrole' => array('xlink', 'arcrole', self::NS_XLINK),
  3287. 'xlink:href' => array('xlink', 'href', self::NS_XLINK),
  3288. 'xlink:role' => array('xlink', 'role', self::NS_XLINK),
  3289. 'xlink:show' => array('xlink', 'show', self::NS_XLINK),
  3290. 'xlink:title' => array('xlink', 'title', self::NS_XLINK),
  3291. 'xlink:type' => array('xlink', 'type', self::NS_XLINK),
  3292. 'xml:base' => array('xml', 'base', self::NS_XML),
  3293. 'xml:lang' => array('xml', 'lang', self::NS_XML),
  3294. 'xml:space' => array('xml', 'space', self::NS_XML),
  3295. 'xmlns' => array(null, 'xmlns', self::NS_XMLNS),
  3296. 'xmlns:xlink' => array('xmlns', 'xlink', self::NS_XMLNS),
  3297. );
  3298. foreach ($token['attr'] as &$kp) {
  3299. if (isset($lookup[$kp['name']])) {
  3300. $kp['name'] = $lookup[$kp['name']];
  3301. }
  3302. }
  3303. return $token;
  3304. }
  3305. public function insertForeignElement($token, $namespaceURI) {
  3306. $el = $this->dom->createElementNS($namespaceURI, $token['name']);
  3307. if (!empty($token['attr'])) {
  3308. foreach ($token['attr'] as $kp) {
  3309. $attr = $kp['name'];
  3310. if (is_array($attr)) {
  3311. $ns = $attr[2];
  3312. $attr = $attr[1];
  3313. } else {
  3314. $ns = self::NS_HTML;
  3315. }
  3316. if (!$el->hasAttributeNS($ns, $attr)) {
  3317. // XSKETCHY: work around godawful libxml bug
  3318. if ($ns === self::NS_XLINK) {
  3319. $el->setAttribute('xlink:'.$attr, $kp['value']);
  3320. } elseif ($ns === self::NS_HTML) {
  3321. // Another godawful libxml bug
  3322. $el->setAttribute($attr, $kp['value']);
  3323. } else {
  3324. $el->setAttributeNS($ns, $attr, $kp['value']);
  3325. }
  3326. }
  3327. }
  3328. }
  3329. $this->appendToRealParent($el);
  3330. $this->stack[] = $el;
  3331. // XERROR: see below
  3332. /* If the newly created element has an xmlns attribute in the XMLNS
  3333. * namespace whose value is not exactly the same as the element's
  3334. * namespace, that is a parse error. Similarly, if the newly created
  3335. * element has an xmlns:xlink attribute in the XMLNS namespace whose
  3336. * value is not the XLink Namespace, that is a parse error. */
  3337. }
  3338. public function save() {
  3339. $this->dom->normalize();
  3340. if (!$this->fragment) {
  3341. return $this->dom;
  3342. } else {
  3343. if ($this->root) {
  3344. return $this->root->childNodes;
  3345. } else {
  3346. return $this->dom->childNodes;
  3347. }
  3348. }
  3349. }
  3350. }