00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 include_once('TranslationWord.php');
00022
00033 class TranslationWordList implements Iterator
00034 {
00035 private $allWords;
00036 private $fakeLength;
00038 private $DB;
00039 private $Tables;
00040 private $currIndex;
00042 public static $WordsCache;
00052 function __construct($text = "")
00053 {
00054
00055 $this->DB = GetDbConn();
00056 $this->Tables = GetTables();
00057
00058 $this->currIndex = 0;
00059 $this->fakeLength = 0;
00060 unset($this->allWords);
00061
00062
00063 if($text != "")
00064 {
00065 $this->SplitToWords($text);
00066 }
00067 }
00068
00069
00070
00074 public function rewind()
00075 {
00076 $this->currIndex = 0;
00077 }
00081 public function hasMore()
00082 {
00083 return count($this->allWords) > $this->currIndex;
00084 }
00088 public function key()
00089 {
00090 return $this->currIndex;
00091 }
00095 public function current()
00096 {
00097 return $this->allWords[$this->currIndex];
00098 }
00102 public function next()
00103 {
00104 $this->currIndex++;
00105 }
00109 public function valid()
00110 {
00111 if(count($this->allWords) > $this->currIndex)
00112 {
00113 return true;
00114 }
00115 return false;
00116 }
00117
00118
00119
00126 public static function InitWordsCache()
00127 {
00128 $Tables = GetTables();
00129 $DB = GetDbConn();
00130
00131
00132 $rs = $DB->Execute("SELECT Word, ID FROM ".$Tables["WordIndex"]);
00133
00134 if(!$rs)
00135 {
00136 die("DB Error:". $this->DB->ErrorMsg());
00137 }
00138
00139 while($oldWord = $rs->FetchRow())
00140 {
00141
00142 TranslationWordList::$WordsCache[$oldWord["Word"]] = $oldWord["ID"];
00143 }
00144 }
00145
00146
00147
00160 function AddWord($word, $count = 1)
00161 {
00162
00163 $w =& $this->FindByName($word);
00164 if($w == null)
00165 {
00166
00167 $w = new TranslationWord();
00168 $w->SetWord($word);
00169 $w->SetCount($count);
00170 $this->allWords[] = $w;
00171 }
00172 else
00173 {
00174
00175 $w->SetCount($w->GetCount() + $count);
00176 }
00177 }
00178
00192 function AddID($id, $count = 1)
00193 {
00194
00195 $w =& $this->FindByID($id);
00196 if($w == null)
00197 {
00198
00199 $w = new TranslationWord();
00200 $w->SetID($id);
00201 $w->SetCount($count);
00202 $this->allWords[] = $w;
00203 }
00204 else
00205 {
00206
00207 $w->SetCount($w->GetCount() + $count);
00208 }
00209 }
00210
00211
00221 function SetIDByName($word, $id)
00222 {
00223 $w =& $this->FindByName($word);
00224 if($w == null)
00225 {
00226 die("WORDS: SetIDByName to a not existing word: $word, $id.\n");
00227 }
00228
00229 $w->SetID($id);
00230 }
00231
00241 function SetFakeLength($length)
00242 {
00243 $this->fakeLength = $length;
00244 }
00245
00256 private function SplitToWords($text)
00257 {
00258
00259 unset($this->allWords);
00260 $this->SetFakeLength(0);
00261
00262
00263 $text = str_replace("\n", " ", $text);
00264 $text = str_replace("\r", " ", $text);
00265
00266
00267 $text = str_replace("<strong>", "", $text);
00268 $text = str_replace("</strong>", "", $text);
00269 $text = str_replace("<b>", "", $text);
00270 $text = str_replace("</b>", "", $text);
00271 $text = str_replace("<i>", "", $text);
00272 $text = str_replace("</i>", "", $text);
00273 $text = str_replace("<em>", "", $text);
00274 $text = str_replace("</em>", "", $text);
00275 $text = str_replace(" ", " ", $text);
00276
00277
00278 $wordList = split('[ \.,()!\?:;=|*]', $text);
00279
00280
00281 foreach($wordList as $word)
00282 {
00283
00284 $word = trim($word, " \t\n\r:[]#'\"$+-_<>/");
00285
00286
00287 if($word != "" && !is_numeric($word))
00288 {
00289 $this->AddWord(strtolower($word));
00290 }
00291 }
00292
00293
00294 if($this->SumCount() != 0)
00295 {
00296 $this->AddToWordIndex();
00297 }
00298 }
00299
00300
00301
00310 function &FindByName($word)
00311 {
00312 if(empty($this->allWords))
00313 {
00314 $null = null;
00315 return $null;
00316 }
00317
00318 foreach($this->allWords as $w)
00319 {
00320 if($w->GetWord() == $word)
00321 {
00322 return $w;
00323 }
00324 }
00325
00326 $null = null;
00327 return $null;
00328 }
00329
00338 function &FindByID($id)
00339 {
00340 if(empty($this->allWords))
00341 {
00342 $null = null;
00343 return $null;
00344 }
00345
00346 foreach($this->allWords as $w)
00347 {
00348 if($w->GetID() == $id)
00349 {
00350 return $w;
00351 }
00352 }
00353
00354 $null = null;
00355 return $null;
00356 }
00357
00358
00368 function SumCount()
00369 {
00370
00371
00372 if($this->fakeLength <> 0)
00373 {
00374 return $this->fakeLength;
00375 }
00376
00377
00378 if(empty($this->allWords))
00379 {
00380 return 0;
00381 }
00382
00383
00384 $count = 0;
00385 foreach($this->allWords as $w)
00386 {
00387 $count += $w->GetCount();
00388 }
00389
00390 return $count;
00391 }
00392
00393
00394
00406 function CompareByID($otherWords)
00407 {
00408
00409 $max = $this->SumCount() + $otherWords->SumCount();
00410
00411 $points = 0;
00412
00413
00414 foreach($this->allWords as $w)
00415 {
00416
00417 $w2 = $otherWords->FindByID($w->GetID());
00418 if($w2 != null)
00419 {
00420
00421
00422
00423 if($w2->GetCount() == $w->GetCount())
00424 {
00425 $points += $w->GetCount() * 2;
00426 }
00427 else
00428 {
00429 $points += abs($w2->GetCount() - $w->GetCount()) * 2;
00430 }
00431 }
00432 }
00433
00434
00435
00436 return floor((100/$max) * $points);
00437 }
00438
00439
00440
00450 private function AddToWordIndex()
00451 {
00452
00453
00454
00455 foreach($this->allWords as &$w)
00456 {
00457
00458 if(!empty(TranslationWordList::$WordsCache[$w->GetWord()]))
00459 {
00460
00461 $this->SetIDByName($w->GetWord(), TranslationWordList::$WordsCache[$w->GetWord()]);
00462 }
00463 else
00464 {
00465
00466 $ok = $this->DB->Execute("INSERT INTO ".$this->Tables["WordIndex"]." (Word) VALUES ('".fixstr($w->GetWord())."')");
00467
00468 if(!$ok)
00469 {
00470 die("DB Error:". $this->DB->ErrorMsg()." Line: ".__LINE__);
00471 }
00472 if(!$this->DB->Insert_ID())
00473 {
00474 die("DB Error: Insert_ID not supported. Line: ".__LINE__);
00475 }
00476
00477
00478 $w->SetID($this->DB->Insert_ID());
00479 TranslationWordList::$WordsCache[$w->GetWord()] = $w->GetID();
00480 }
00481 }
00482
00483
00484 }
00485
00486 }
00487
00488
00489 ?>