|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819 |
- <?php
-
- class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
- {
-
- protected $_has_pear = false;
-
- public function __construct()
- {
- parent::__construct();
- if ($GLOBALS['HTMLPurifierTest']['PH5P']) {
- require_once 'HTMLPurifier/Lexer/PH5P.php';
- }
- }
-
- // HTMLPurifier_Lexer::create() --------------------------------------------
-
- public function test_create()
- {
- $this->config->set('Core.MaintainLineNumbers', true);
- $lexer = HTMLPurifier_Lexer::create($this->config);
- $this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
- }
-
- public function test_create_objectLexerImpl()
- {
- $this->config->set('Core.LexerImpl', new HTMLPurifier_Lexer_DirectLex());
- $lexer = HTMLPurifier_Lexer::create($this->config);
- $this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
- }
-
- public function test_create_unknownLexer()
- {
- $this->config->set('Core.LexerImpl', 'AsdfAsdf');
- $this->expectException(new HTMLPurifier_Exception('Cannot instantiate unrecognized Lexer type AsdfAsdf'));
- HTMLPurifier_Lexer::create($this->config);
- }
-
- public function test_create_incompatibleLexer()
- {
- $this->config->set('Core.LexerImpl', 'DOMLex');
- $this->config->set('Core.MaintainLineNumbers', true);
- $this->expectException(new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'));
- HTMLPurifier_Lexer::create($this->config);
- }
-
- // HTMLPurifier_Lexer->parseData() -----------------------------------------
-
- public function assertParseData($input, $expect = true)
- {
- if ($expect === true) $expect = $input;
- $lexer = new HTMLPurifier_Lexer();
- $this->assertIdentical($expect, $lexer->parseData($input));
- }
-
- public function test_parseData_plainText()
- {
- $this->assertParseData('asdf');
- }
-
- public function test_parseData_ampersandEntity()
- {
- $this->assertParseData('&', '&');
- }
-
- public function test_parseData_quotEntity()
- {
- $this->assertParseData('"', '"');
- }
-
- public function test_parseData_aposNumericEntity()
- {
- $this->assertParseData(''', "'");
- }
-
- public function test_parseData_aposCompactNumericEntity()
- {
- $this->assertParseData(''', "'");
- }
-
- public function test_parseData_adjacentAmpersandEntities()
- {
- $this->assertParseData('&&&', '&&&');
- }
-
- public function test_parseData_trailingUnescapedAmpersand()
- {
- $this->assertParseData('&&', '&&');
- }
-
- public function test_parseData_internalUnescapedAmpersand()
- {
- $this->assertParseData('Procter & Gamble');
- }
-
- public function test_parseData_improperEntityFaultToleranceTest()
- {
- $this->assertParseData('-');
- }
-
- // HTMLPurifier_Lexer->extractBody() ---------------------------------------
-
- public function assertExtractBody($text, $extract = true)
- {
- $lexer = new HTMLPurifier_Lexer();
- $result = $lexer->extractBody($text);
- if ($extract === true) $extract = $text;
- $this->assertIdentical($extract, $result);
- }
-
- public function test_extractBody_noBodyTags()
- {
- $this->assertExtractBody('<b>Bold</b>');
- }
-
- public function test_extractBody_lowercaseBodyTags()
- {
- $this->assertExtractBody('<html><body><b>Bold</b></body></html>', '<b>Bold</b>');
- }
-
- public function test_extractBody_uppercaseBodyTags()
- {
- $this->assertExtractBody('<HTML><BODY><B>Bold</B></BODY></HTML>', '<B>Bold</B>');
- }
-
- public function test_extractBody_realisticUseCase()
- {
- $this->assertExtractBody(
- '<?xml version="1.0"
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
- <head>
- <title>xyz</title>
- </head>
- <body>
- <form method="post" action="whatever1">
- <div>
- <input type="text" name="username" />
- <input type="text" name="password" />
- <input type="submit" />
- </div>
- </form>
- </body>
- </html>',
- '
- <form method="post" action="whatever1">
- <div>
- <input type="text" name="username" />
- <input type="text" name="password" />
- <input type="submit" />
- </div>
- </form>
- ');
- }
-
- public function test_extractBody_bodyWithAttributes()
- {
- $this->assertExtractBody('<html><body bgcolor="#F00"><b>Bold</b></body></html>', '<b>Bold</b>');
- }
-
- public function test_extractBody_preserveUnclosedBody()
- {
- $this->assertExtractBody('<body>asdf'); // not closed, don't accept
- }
-
- public function test_extractBody_useLastBody()
- {
- $this->assertExtractBody('<body>foo</body>bar</body>', 'foo</body>bar');
- }
-
- // HTMLPurifier_Lexer->tokenizeHTML() --------------------------------------
-
- public function assertTokenization($input, $expect, $alt_expect = array())
- {
- $lexers = array();
- $lexers['DirectLex'] = new HTMLPurifier_Lexer_DirectLex();
- if (class_exists('DOMDocument')) {
- $lexers['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
- $lexers['PH5P'] = new HTMLPurifier_Lexer_PH5P();
- }
- foreach ($lexers as $name => $lexer) {
- $result = $lexer->tokenizeHTML($input, $this->config, $this->context);
- if (isset($alt_expect[$name])) {
- if ($alt_expect[$name] === false) continue;
- $t_expect = $alt_expect[$name];
- $this->assertIdentical($result, $alt_expect[$name], "$name: %s");
- } else {
- $t_expect = $expect;
- $this->assertIdentical($result, $expect, "$name: %s");
- }
- if ($t_expect != $result) {
- printTokens($result);
- }
- }
- }
-
- public function test_tokenizeHTML_emptyInput()
- {
- $this->assertTokenization('', array());
- }
-
- public function test_tokenizeHTML_plainText()
- {
- $this->assertTokenization(
- 'This is regular text.',
- array(
- new HTMLPurifier_Token_Text('This is regular text.')
- )
- );
- }
-
- public function test_tokenizeHTML_textAndTags()
- {
- $this->assertTokenization(
- 'This is <b>bold</b> text',
- array(
- new HTMLPurifier_Token_Text('This is '),
- new HTMLPurifier_Token_Start('b', array()),
- new HTMLPurifier_Token_Text('bold'),
- new HTMLPurifier_Token_End('b'),
- new HTMLPurifier_Token_Text(' text'),
- )
- );
- }
-
- public function test_tokenizeHTML_normalizeCase()
- {
- $this->assertTokenization(
- '<DIV>Totally rad dude. <b>asdf</b></div>',
- array(
- new HTMLPurifier_Token_Start('DIV', array()),
- new HTMLPurifier_Token_Text('Totally rad dude. '),
- new HTMLPurifier_Token_Start('b', array()),
- new HTMLPurifier_Token_Text('asdf'),
- new HTMLPurifier_Token_End('b'),
- new HTMLPurifier_Token_End('div'),
- )
- );
- }
-
- public function test_tokenizeHTML_notWellFormed()
- {
- $this->assertTokenization(
- '<asdf></asdf><d></d><poOloka><poolasdf><ds></asdf></ASDF>',
- array(
- new HTMLPurifier_Token_Start('asdf'),
- new HTMLPurifier_Token_End('asdf'),
- new HTMLPurifier_Token_Start('d'),
- new HTMLPurifier_Token_End('d'),
- new HTMLPurifier_Token_Start('poOloka'),
- new HTMLPurifier_Token_Start('poolasdf'),
- new HTMLPurifier_Token_Start('ds'),
- new HTMLPurifier_Token_End('asdf'),
- new HTMLPurifier_Token_End('ASDF'),
- ),
- array(
- 'DOMLex' => $alt = array(
- new HTMLPurifier_Token_Empty('asdf'),
- new HTMLPurifier_Token_Empty('d'),
- new HTMLPurifier_Token_Start('pooloka'),
- new HTMLPurifier_Token_Start('poolasdf'),
- new HTMLPurifier_Token_Empty('ds'),
- new HTMLPurifier_Token_End('poolasdf'),
- new HTMLPurifier_Token_End('pooloka'),
- ),
- 'PH5P' => $alt,
- )
- );
- }
-
- public function test_tokenizeHTML_whitespaceInTag()
- {
- $this->assertTokenization(
- '<a'."\t".'href="foobar.php"'."\n".'title="foo!">Link to <b id="asdf">foobar</b></a>',
- array(
- new HTMLPurifier_Token_Start('a',array('href'=>'foobar.php','title'=>'foo!')),
- new HTMLPurifier_Token_Text('Link to '),
- new HTMLPurifier_Token_Start('b',array('id'=>'asdf')),
- new HTMLPurifier_Token_Text('foobar'),
- new HTMLPurifier_Token_End('b'),
- new HTMLPurifier_Token_End('a'),
- )
- );
- }
-
- public function test_tokenizeHTML_singleAttribute()
- {
- $this->assertTokenization(
- '<br style="&" />',
- array(
- new HTMLPurifier_Token_Empty('br', array('style' => '&'))
- )
- );
- }
-
- public function test_tokenizeHTML_emptyTag()
- {
- $this->assertTokenization(
- '<br />',
- array( new HTMLPurifier_Token_Empty('br') )
- );
- }
-
- public function test_tokenizeHTML_comment()
- {
- $this->assertTokenization(
- '<!-- Comment -->',
- array( new HTMLPurifier_Token_Comment(' Comment ') )
- );
- }
-
- public function test_tokenizeHTML_malformedComment()
- {
- $this->assertTokenization(
- '<!-- not so well formed --->',
- array( new HTMLPurifier_Token_Comment(' not so well formed -') )
- );
- }
-
- public function test_tokenizeHTML_unterminatedTag()
- {
- $this->assertTokenization(
- '<a href=""',
- array( new HTMLPurifier_Token_Text('<a href=""') ),
- array(
- // I like our behavior better, but it's non-standard
- 'DOMLex' => array( new HTMLPurifier_Token_Empty('a', array('href'=>'')) ),
- 'PH5P' => false, // total barfing, grabs scaffolding too
- )
- );
- }
-
- public function test_tokenizeHTML_specialEntities()
- {
- $this->assertTokenization(
- '<b>',
- array(
- new HTMLPurifier_Token_Text('<b>')
- ),
- array(
- // some parsers will separate entities out
- 'PH5P' => array(
- new HTMLPurifier_Token_Text('<'),
- new HTMLPurifier_Token_Text('b'),
- new HTMLPurifier_Token_Text('>'),
- ),
- )
- );
- }
-
- public function test_tokenizeHTML_earlyQuote()
- {
- $this->assertTokenization(
- '<a "=>',
- array( new HTMLPurifier_Token_Empty('a') ),
- array(
- // we barf on this input
- 'DirectLex' => array(
- new HTMLPurifier_Token_Start('a', array('"' => ''))
- ),
- 'PH5P' => false, // behavior varies; handle this personally
- )
- );
- }
-
- public function test_tokenizeHTML_earlyQuote_PH5P()
- {
- if (!class_exists('DOMDocument')) return;
- $lexer = new HTMLPurifier_Lexer_PH5P();
- $result = $lexer->tokenizeHTML('<a "=>', $this->config, $this->context);
- if ($this->context->get('PH5PError', true)) {
- $this->assertIdentical(array(
- new HTMLPurifier_Token_Start('a', array('"' => ''))
- ), $result);
- } else {
- $this->assertIdentical(array(
- new HTMLPurifier_Token_Empty('a', array('"' => ''))
- ), $result);
- }
- }
-
- public function test_tokenizeHTML_unescapedQuote()
- {
- $this->assertTokenization(
- '"',
- array( new HTMLPurifier_Token_Text('"') )
- );
- }
-
- public function test_tokenizeHTML_escapedQuote()
- {
- $this->assertTokenization(
- '"',
- array( new HTMLPurifier_Token_Text('"') )
- );
- }
-
- public function test_tokenizeHTML_cdata()
- {
- $this->assertTokenization(
- '<![CDATA[You <b>can't</b> get me!]]>',
- array( new HTMLPurifier_Token_Text('You <b>can't</b> get me!') ),
- array(
- 'PH5P' => array(
- new HTMLPurifier_Token_Text('You '),
- new HTMLPurifier_Token_Text('<'),
- new HTMLPurifier_Token_Text('b'),
- new HTMLPurifier_Token_Text('>'),
- new HTMLPurifier_Token_Text('can'),
- new HTMLPurifier_Token_Text('&'),
- new HTMLPurifier_Token_Text('#39;t'),
- new HTMLPurifier_Token_Text('<'),
- new HTMLPurifier_Token_Text('/b'),
- new HTMLPurifier_Token_Text('>'),
- new HTMLPurifier_Token_Text(' get me!'),
- ),
- )
- );
- }
-
- public function test_tokenizeHTML_characterEntity()
- {
- $this->assertTokenization(
- 'θ',
- array( new HTMLPurifier_Token_Text("\xCE\xB8") )
- );
- }
-
- public function test_tokenizeHTML_characterEntityInCDATA()
- {
- $this->assertTokenization(
- '<![CDATA[→]]>',
- array( new HTMLPurifier_Token_Text("→") ),
- array(
- 'PH5P' => array(
- new HTMLPurifier_Token_Text('&'),
- new HTMLPurifier_Token_Text('rarr;'),
- ),
- )
- );
- }
-
- public function test_tokenizeHTML_entityInAttribute()
- {
- $this->assertTokenization(
- '<a href="index.php?title=foo&id=bar">Link</a>',
- array(
- new HTMLPurifier_Token_Start('a',array('href' => 'index.php?title=foo&id=bar')),
- new HTMLPurifier_Token_Text('Link'),
- new HTMLPurifier_Token_End('a'),
- )
- );
- }
-
- public function test_tokenizeHTML_preserveUTF8()
- {
- $this->assertTokenization(
- "\xCE\xB8",
- array( new HTMLPurifier_Token_Text("\xCE\xB8") )
- );
- }
-
- public function test_tokenizeHTML_specialEntityInAttribute()
- {
- $this->assertTokenization(
- '<br test="x < 6" />',
- array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) )
- );
- }
-
- public function test_tokenizeHTML_emoticonProtection()
- {
- $this->assertTokenization(
- '<b>Whoa! <3 That\'s not good >.></b>',
- array(
- new HTMLPurifier_Token_Start('b'),
- new HTMLPurifier_Token_Text('Whoa! '),
- new HTMLPurifier_Token_Text('<'),
- new HTMLPurifier_Token_Text('3 That\'s not good >.>'),
- new HTMLPurifier_Token_End('b')
- ),
- array(
- // text is absorbed together
- 'DOMLex' => array(
- new HTMLPurifier_Token_Start('b'),
- new HTMLPurifier_Token_Text('Whoa! <3 That\'s not good >.>'),
- new HTMLPurifier_Token_End('b'),
- ),
- 'PH5P' => array( // interesting grouping
- new HTMLPurifier_Token_Start('b'),
- new HTMLPurifier_Token_Text('Whoa! '),
- new HTMLPurifier_Token_Text('<'),
- new HTMLPurifier_Token_Text('3 That\'s not good >.>'),
- new HTMLPurifier_Token_End('b'),
- ),
- )
- );
- }
-
- public function test_tokenizeHTML_commentWithFunkyChars()
- {
- $this->assertTokenization(
- '<!-- This >< comment --><br />',
- array(
- new HTMLPurifier_Token_Comment(' This >< comment '),
- new HTMLPurifier_Token_Empty('br'),
- )
- );
- }
-
- public function test_tokenizeHTML_unterminatedComment()
- {
- $this->assertTokenization(
- '<!-- This >< comment',
- array( new HTMLPurifier_Token_Comment(' This >< comment') ),
- array(
- 'DOMLex' => false,
- 'PH5P' => false,
- )
- );
- }
-
- public function test_tokenizeHTML_scriptCDATAContents()
- {
- $this->config->set('HTML.Trusted', true);
- $this->assertTokenization(
- 'Foo: <script>alert("<foo>");</script>',
- array(
- new HTMLPurifier_Token_Text('Foo: '),
- new HTMLPurifier_Token_Start('script'),
- new HTMLPurifier_Token_Text('alert("<foo>");'),
- new HTMLPurifier_Token_End('script'),
- ),
- array(
- // PH5P, for some reason, bubbles the script to <head>
- 'PH5P' => false,
- )
- );
- }
-
- public function test_tokenizeHTML_entitiesInComment()
- {
- $this->assertTokenization(
- '<!-- This comment < < & -->',
- array( new HTMLPurifier_Token_Comment(' This comment < < & ') )
- );
- }
-
- public function test_tokenizeHTML_attributeWithSpecialCharacters()
- {
- $this->assertTokenization(
- '<a href="><>">',
- array( new HTMLPurifier_Token_Empty('a', array('href' => '><>')) ),
- array(
- 'DirectLex' => array(
- new HTMLPurifier_Token_Start('a', array('href' => '')),
- new HTMLPurifier_Token_Text('<'),
- new HTMLPurifier_Token_Text('">'),
- )
- )
- );
- }
-
- public function test_tokenizeHTML_emptyTagWithSlashInAttribute()
- {
- $this->assertTokenization(
- '<param name="src" value="http://example.com/video.wmv" />',
- array( new HTMLPurifier_Token_Empty('param', array('name' => 'src', 'value' => 'http://example.com/video.wmv')) )
- );
- }
-
- public function test_tokenizeHTML_style()
- {
- $extra = array(
- // PH5P doesn't seem to like style tags
- 'PH5P' => false,
- // DirectLex defers to RemoveForeignElements for textification
- 'DirectLex' => array(
- new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
- new HTMLPurifier_Token_Comment("\ndiv {}\n"),
- new HTMLPurifier_Token_End('style'),
- ),
- );
- if (!defined('LIBXML_VERSION')) {
- // LIBXML_VERSION is missing in early versions of PHP
- // prior to 1.30 of php-src/ext/libxml/libxml.c (version-wise,
- // this translates to 5.0.x. In such cases, punt the test entirely.
- return;
- } elseif (LIBXML_VERSION < 20628) {
- // libxml's behavior is wrong prior to this version, so make
- // appropriate accomodations
- $extra['DOMLex'] = $extra['DirectLex'];
- }
- $this->assertTokenization(
- '<style type="text/css"><!--
- div {}
- --></style>',
- array(
- new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
- new HTMLPurifier_Token_Text("\ndiv {}\n"),
- new HTMLPurifier_Token_End('style'),
- ),
- $extra
- );
- }
-
- public function test_tokenizeHTML_tagWithAtSignAndExtraGt()
- {
- $alt_expect = array(
- // Technically this is invalid, but it won't be a
- // problem with invalid element removal; also, this
- // mimics Mozilla's parsing of the tag.
- new HTMLPurifier_Token_Start('a@'),
- new HTMLPurifier_Token_Text('>'),
- );
- $this->assertTokenization(
- '<a@>>',
- array(
- new HTMLPurifier_Token_Start('a'),
- new HTMLPurifier_Token_Text('>'),
- new HTMLPurifier_Token_End('a'),
- ),
- array(
- 'DirectLex' => $alt_expect,
- )
- );
- }
-
- public function test_tokenizeHTML_emoticonHeart()
- {
- $this->assertTokenization(
- '<br /><3<br />',
- array(
- new HTMLPurifier_Token_Empty('br'),
- new HTMLPurifier_Token_Text('<'),
- new HTMLPurifier_Token_Text('3'),
- new HTMLPurifier_Token_Empty('br'),
- ),
- array(
- 'DOMLex' => array(
- new HTMLPurifier_Token_Empty('br'),
- new HTMLPurifier_Token_Text('<3'),
- new HTMLPurifier_Token_Empty('br'),
- ),
- )
- );
- }
-
- public function test_tokenizeHTML_emoticonShiftyEyes()
- {
- $this->assertTokenization(
- '<b><<</b>',
- array(
- new HTMLPurifier_Token_Start('b'),
- new HTMLPurifier_Token_Text('<'),
- new HTMLPurifier_Token_Text('<'),
- new HTMLPurifier_Token_End('b'),
- ),
- array(
- 'DOMLex' => array(
- new HTMLPurifier_Token_Start('b'),
- new HTMLPurifier_Token_Text('<<'),
- new HTMLPurifier_Token_End('b'),
- ),
- )
- );
- }
-
- public function test_tokenizeHTML_eon1996()
- {
- $this->assertTokenization(
- '< <b>test</b>',
- array(
- new HTMLPurifier_Token_Text('<'),
- new HTMLPurifier_Token_Text(' '),
- new HTMLPurifier_Token_Start('b'),
- new HTMLPurifier_Token_Text('test'),
- new HTMLPurifier_Token_End('b'),
- ),
- array(
- 'DOMLex' => array(
- new HTMLPurifier_Token_Text('< '),
- new HTMLPurifier_Token_Start('b'),
- new HTMLPurifier_Token_Text('test'),
- new HTMLPurifier_Token_End('b'),
- ),
- )
- );
- }
-
- public function test_tokenizeHTML_bodyInCDATA()
- {
- $alt_tokens = array(
- new HTMLPurifier_Token_Text('<'),
- new HTMLPurifier_Token_Text('body'),
- new HTMLPurifier_Token_Text('>'),
- new HTMLPurifier_Token_Text('Foo'),
- new HTMLPurifier_Token_Text('<'),
- new HTMLPurifier_Token_Text('/body'),
- new HTMLPurifier_Token_Text('>'),
- );
- $this->assertTokenization(
- '<![CDATA[<body>Foo</body>]]>',
- array(
- new HTMLPurifier_Token_Text('<body>Foo</body>'),
- ),
- array(
- 'PH5P' => $alt_tokens,
- )
- );
- }
-
- public function test_tokenizeHTML_()
- {
- $this->assertTokenization(
- '<a><img /></a>',
- array(
- new HTMLPurifier_Token_Start('a'),
- new HTMLPurifier_Token_Empty('img'),
- new HTMLPurifier_Token_End('a'),
- )
- );
- }
-
- public function test_tokenizeHTML_ignoreIECondComment()
- {
- $this->assertTokenization(
- '<!--[if IE]>foo<a>bar<!-- baz --><![endif]-->',
- array()
- );
- }
-
- public function test_tokenizeHTML_removeProcessingInstruction()
- {
- $this->config->set('Core.RemoveProcessingInstructions', true);
- $this->assertTokenization(
- '<?xml blah blah ?>',
- array()
- );
- }
-
- public function test_tokenizeHTML_removeNewline()
- {
- $this->config->set('Core.NormalizeNewlines', true);
- $this->assertTokenization(
- "plain\rtext\r\n",
- array(
- new HTMLPurifier_Token_Text("plain\ntext\n")
- )
- );
- }
-
- public function test_tokenizeHTML_noRemoveNewline()
- {
- $this->config->set('Core.NormalizeNewlines', false);
- $this->assertTokenization(
- "plain\rtext\r\n",
- array(
- new HTMLPurifier_Token_Text("plain\rtext\r\n")
- )
- );
- }
-
- public function test_tokenizeHTML_conditionalCommentUngreedy()
- {
- $this->assertTokenization(
- '<!--[if gte mso 9]>a<![endif]-->b<!--[if gte mso 9]>c<![endif]-->',
- array(
- new HTMLPurifier_Token_Text("b")
- )
- );
- }
-
- public function test_tokenizeHTML_imgTag()
- {
- $start = array(
- new HTMLPurifier_Token_Start('img',
- array(
- 'src' => 'img_11775.jpg',
- 'alt' => '[Img #11775]',
- 'id' => 'EMBEDDED_IMG_11775',
- )
- )
- );
- $this->assertTokenization(
- '<img src="img_11775.jpg" alt="[Img #11775]" id="EMBEDDED_IMG_11775" >',
- array(
- new HTMLPurifier_Token_Empty('img',
- array(
- 'src' => 'img_11775.jpg',
- 'alt' => '[Img #11775]',
- 'id' => 'EMBEDDED_IMG_11775',
- )
- )
- ),
- array(
- 'DirectLex' => $start,
- )
- );
- }
-
-
- /*
-
- public function test_tokenizeHTML_()
- {
- $this->assertTokenization(
- ,
- array(
-
- )
- );
- }
- */
-
- }
-
- // vim: et sw=4 sts=4
|