|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752 |
- <?php
-
-
- class Diff_SequenceMatcher
- {
-
-
- private $junkCallback = null;
-
-
-
- private $a = null;
-
-
-
- private $b = null;
-
-
-
- private $junkDict = array();
-
-
-
- private $b2j = array();
-
- private $options = array();
-
- private $matchingBlocks = null;
- private $opCodes = null;
- private $fullBCount = null;
-
- private $defaultOptions = array(
- 'ignoreNewLines' => false,
- 'ignoreWhitespace' => false,
- 'ignoreCase' => false
- );
-
-
-
- public function __construct($a, $b, $junkCallback=null, $options)
- {
- $this->a = null;
- $this->b = null;
- $this->junkCallback = $junkCallback;
- $this->setOptions($options);
- $this->setSequences($a, $b);
- }
-
-
-
- public function setOptions($options)
- {
- $this->options = array_merge($this->defaultOptions, $options);
- }
-
-
-
- public function setSequences($a, $b)
- {
- $this->setSeq1($a);
- $this->setSeq2($b);
- }
-
-
-
- public function setSeq1($a)
- {
- if(!is_array($a)) {
- $a = str_split($a);
- }
- if($a == $this->a) {
- return;
- }
-
- $this->a= $a;
- $this->matchingBlocks = null;
- $this->opCodes = null;
- }
-
-
-
- public function setSeq2($b)
- {
- if(!is_array($b)) {
- $b = str_split($b);
- }
- if($b == $this->b) {
- return;
- }
-
- $this->b = $b;
- $this->matchingBlocks = null;
- $this->opCodes = null;
- $this->fullBCount = null;
- $this->chainB();
- }
-
-
-
- private function chainB()
- {
- $length = count ($this->b);
- $this->b2j = array();
- $popularDict = array();
-
- for($i = 0; $i < $length; ++$i) {
- $char = $this->b[$i];
- if(isset($this->b2j[$char])) {
- if($length >= 200 && count($this->b2j[$char]) * 100 > $length) {
- $popularDict[$char] = 1;
- unset($this->b2j[$char]);
- }
- else {
- $this->b2j[$char][] = $i;
- }
- }
- else {
- $this->b2j[$char] = array(
- $i
- );
- }
- }
-
-
- foreach(array_keys($popularDict) as $char) {
- unset($this->b2j[$char]);
- }
-
- $this->junkDict = array();
- if(is_callable($this->junkCallback)) {
- foreach(array_keys($popularDict) as $char) {
- if(call_user_func($this->junkCallback, $char)) {
- $this->junkDict[$char] = 1;
- unset($popularDict[$char]);
- }
- }
-
- foreach(array_keys($this->b2j) as $char) {
- if(call_user_func($this->junkCallback, $char)) {
- $this->junkDict[$char] = 1;
- unset($this->b2j[$char]);
- }
- }
- }
- }
-
-
-
- private function isBJunk($b)
- {
- if(isset($this->junkDict[$b])) {
- return true;
- }
-
- return false;
- }
-
-
-
- public function findLongestMatch($alo, $ahi, $blo, $bhi)
- {
- $a = $this->a;
- $b = $this->b;
-
- $bestI = $alo;
- $bestJ = $blo;
- $bestSize = 0;
-
- $j2Len = array();
- $nothing = array();
-
- for($i = $alo; $i < $ahi; ++$i) {
- $newJ2Len = array();
- $jDict = $this->arrayGetDefault($this->b2j, $a[$i], $nothing);
- foreach($jDict as $j) {
- if($j < $blo) {
- continue;
- }
- else if($j >= $bhi) {
- break;
- }
-
- $k = $this->arrayGetDefault($j2Len, $j -1, 0) + 1;
- $newJ2Len[$j] = $k;
- if($k > $bestSize) {
- $bestI = $i - $k + 1;
- $bestJ = $j - $k + 1;
- $bestSize = $k;
- }
- }
-
- $j2Len = $newJ2Len;
- }
-
- while($bestI > $alo && $bestJ > $blo && !$this->isBJunk($b[$bestJ - 1]) &&
- !$this->linesAreDifferent($bestI - 1, $bestJ - 1)) {
- --$bestI;
- --$bestJ;
- ++$bestSize;
- }
-
- while($bestI + $bestSize < $ahi && ($bestJ + $bestSize) < $bhi &&
- !$this->isBJunk($b[$bestJ + $bestSize]) && !$this->linesAreDifferent($bestI + $bestSize, $bestJ + $bestSize)) {
- ++$bestSize;
- }
-
- while($bestI > $alo && $bestJ > $blo && $this->isBJunk($b[$bestJ - 1]) &&
- !$this->linesAreDifferent($bestI - 1, $bestJ - 1)) {
- --$bestI;
- --$bestJ;
- ++$bestSize;
- }
-
- while($bestI + $bestSize < $ahi && $bestJ + $bestSize < $bhi &&
- $this->isBJunk($b[$bestJ + $bestSize]) && !$this->linesAreDifferent($bestI + $bestSize, $bestJ + $bestSize)) {
- ++$bestSize;
- }
-
- return array(
- $bestI,
- $bestJ,
- $bestSize
- );
- }
-
-
-
- public function linesAreDifferent($aIndex, $bIndex)
- {
- $lineA = $this->a[$aIndex];
- $lineB = $this->b[$bIndex];
-
- if($this->options['ignoreWhitespace']) {
- $replace = array("\t", ' ');
- $lineA = str_replace($replace, '', $lineA);
- $lineB = str_replace($replace, '', $lineB);
- }
-
- if($this->options['ignoreCase']) {
- $lineA = strtolower($lineA);
- $lineB = strtolower($lineB);
- }
-
- if($lineA != $lineB) {
- return true;
- }
-
- return false;
- }
-
-
-
- public function getMatchingBlocks()
- {
- if(!empty($this->matchingBlocks)) {
- return $this->matchingBlocks;
- }
-
- $aLength = count($this->a);
- $bLength = count($this->b);
-
- $queue = array(
- array(
- 0,
- $aLength,
- 0,
- $bLength
- )
- );
-
- $matchingBlocks = array();
- while(!empty($queue)) {
- list($alo, $ahi, $blo, $bhi) = array_pop($queue);
- $x = $this->findLongestMatch($alo, $ahi, $blo, $bhi);
- list($i, $j, $k) = $x;
- if($k) {
- $matchingBlocks[] = $x;
- if($alo < $i && $blo < $j) {
- $queue[] = array(
- $alo,
- $i,
- $blo,
- $j
- );
- }
-
- if($i + $k < $ahi && $j + $k < $bhi) {
- $queue[] = array(
- $i + $k,
- $ahi,
- $j + $k,
- $bhi
- );
- }
- }
- }
-
- usort($matchingBlocks, array($this, 'tupleSort'));
-
- $i1 = 0;
- $j1 = 0;
- $k1 = 0;
- $nonAdjacent = array();
- foreach($matchingBlocks as $block) {
- list($i2, $j2, $k2) = $block;
- if($i1 + $k1 == $i2 && $j1 + $k1 == $j2) {
- $k1 += $k2;
- }
- else {
- if($k1) {
- $nonAdjacent[] = array(
- $i1,
- $j1,
- $k1
- );
- }
-
- $i1 = $i2;
- $j1 = $j2;
- $k1 = $k2;
- }
- }
-
- if($k1) {
- $nonAdjacent[] = array(
- $i1,
- $j1,
- $k1
- );
- }
-
- $nonAdjacent[] = array(
- $aLength,
- $bLength,
- 0
- );
-
- $this->matchingBlocks = $nonAdjacent;
- return $this->matchingBlocks;
- }
-
-
-
- public function getOpCodes()
- {
- if(!empty($this->opCodes)) {
- return $this->opCodes;
- }
-
- $i = 0;
- $j = 0;
- $this->opCodes = array();
-
- $blocks = $this->getMatchingBlocks();
- foreach($blocks as $block) {
- list($ai, $bj, $size) = $block;
- $tag = '';
- if($i < $ai && $j < $bj) {
- $tag = 'replace';
- }
- else if($i < $ai) {
- $tag = 'delete';
- }
- else if($j < $bj) {
- $tag = 'insert';
- }
-
- if($tag) {
- $this->opCodes[] = array(
- $tag,
- $i,
- $ai,
- $j,
- $bj
- );
- }
-
- $i = $ai + $size;
- $j = $bj + $size;
-
- if($size) {
- $this->opCodes[] = array(
- 'equal',
- $ai,
- $i,
- $bj,
- $j
- );
- }
- }
- return $this->opCodes;
- }
-
-
-
- public function getGroupedOpcodes($context=3)
- {
- $opCodes = $this->getOpCodes();
- if(empty($opCodes)) {
- $opCodes = array(
- array(
- 'equal',
- 0,
- 1,
- 0,
- 1
- )
- );
- }
-
- if($opCodes[0][0] == 'equal') {
- $opCodes[0] = array(
- $opCodes[0][0],
- max($opCodes[0][1], $opCodes[0][2] - $context),
- $opCodes[0][2],
- max($opCodes[0][3], $opCodes[0][4] - $context),
- $opCodes[0][4]
- );
- }
-
- $lastItem = count($opCodes) - 1;
- if($opCodes[$lastItem][0] == 'equal') {
- list($tag, $i1, $i2, $j1, $j2) = $opCodes[$lastItem];
- $opCodes[$lastItem] = array(
- $tag,
- $i1,
- min($i2, $i1 + $context),
- $j1,
- min($j2, $j1 + $context)
- );
- }
-
- $maxRange = $context * 2;
- $groups = array();
- $group = array();
- foreach($opCodes as $code) {
- list($tag, $i1, $i2, $j1, $j2) = $code;
- if($tag == 'equal' && $i2 - $i1 > $maxRange) {
- $group[] = array(
- $tag,
- $i1,
- min($i2, $i1 + $context),
- $j1,
- min($j2, $j1 + $context)
- );
- $groups[] = $group;
- $group = array();
- $i1 = max($i1, $i2 - $context);
- $j1 = max($j1, $j2 - $context);
- }
- $group[] = array(
- $tag,
- $i1,
- $i2,
- $j1,
- $j2
- );
- }
-
- if(!empty($group) && !(count($group) == 1 && $group[0][0] == 'equal')) {
- $groups[] = $group;
- }
-
- return $groups;
- }
-
-
-
- public function Ratio()
- {
- $matches = array_reduce($this->getMatchingBlocks(), array($this, 'ratioReduce'), 0);
- return $this->calculateRatio($matches, count ($this->a) + count ($this->b));
- }
-
-
-
- private function ratioReduce($sum, $triple)
- {
- return $sum + ($triple[count($triple) - 1]);
- }
-
-
-
- private function quickRatio()
- {
- if($this->fullBCount === null) {
- $this->fullBCount = array();
- $bLength = count ($this->b);
- for($i = 0; $i < $bLength; ++$i) {
- $char = $this->b[$i];
- $this->fullBCount[$char] = $this->arrayGetDefault($this->fullBCount, $char, 0) + 1;
- }
- }
-
- $avail = array();
- $matches = 0;
- $aLength = count ($this->a);
- for($i = 0; $i < $aLength; ++$i) {
- $char = $this->a[$i];
- if(isset($avail[$char])) {
- $numb = $avail[$char];
- }
- else {
- $numb = $this->arrayGetDefault($this->fullBCount, $char, 0);
- }
- $avail[$char] = $numb - 1;
- if($numb > 0) {
- ++$matches;
- }
- }
-
- $this->calculateRatio($matches, count ($this->a) + count ($this->b));
- }
-
-
-
- private function realquickRatio()
- {
- $aLength = count ($this->a);
- $bLength = count ($this->b);
-
- return $this->calculateRatio(min($aLength, $bLength), $aLength + $bLength);
- }
-
-
-
- private function calculateRatio($matches, $length=0)
- {
- if($length) {
- return 2 * ($matches / $length);
- }
- else {
- return 1;
- }
- }
-
-
-
- private function arrayGetDefault($array, $key, $default)
- {
- if(isset($array[$key])) {
- return $array[$key];
- }
- else {
- return $default;
- }
- }
-
-
-
- private function tupleSort($a, $b)
- {
- $max = max(count($a), count($b));
- for($i = 0; $i < $max; ++$i) {
- if($a[$i] < $b[$i]) {
- return -1;
- }
- else if($a[$i] > $b[$i]) {
- return 1;
- }
- }
-
- if(count($a) == count($b)) {
- return 0;
- }
- else if(count($a) < count($b)) {
- return -1;
- }
- else {
- return 1;
- }
- }
- }
|