MySQLの標準機能で日本語を全文検索する(3)

htmlfiles は HTMLファイルのパスが書かれたテキストリストをコマンドラインでから流し込む。

$ php into.php < htmlfiles

into.php

<?php
mb_internal_encoding("UTF-8"); 
mb_regex_encoding("UTF-8");

require_once("db.php");
require_once("htmlindex.php");

class Register {
  var $dbm;
  var $uri;
  var $title;
  var $content;
  var $hie;
  
  public function Register() {
    $this->dbm = new DBManager("livedocs_ft");
    $this->hie =new HtmlIndexExtractor();
  }
  
  public function regist($htmlfile) {
    if (!$this->readFile($htmlfile)) {
      echo "[ERR] Can't open file. : $file\n";
      return;
    }
    
    $this->hie->extract($this->htmltext);
    
    if ($this->dbm->insertFullTextIndexPrimary($htmlfile, 
      $this->hie->getTitle(),
      $this->hie->getContent())) {
      echo "[OK]  Inserted into file : $htmlfile.\n";
    } else {
      echo "[NG]  Inserted into file : $htmlfile.\n";
    }
  }
  
  private function readFile($file) {
    $fh = fopen($file, 'r');
    if ($fh == FALSE) {
      return false;
    }
    
    $ctn = "";
    while (! feof($fh)) {
        $ctn .= fgets($fh);
    }
    fclose($fh);
  $this->htmltext = $ctn;
    
    return true;
  }
}


$rg = new Register();

$stdin = fopen('php://stdin', 'r');
if ($stdin == FALSE) {
  echo "No STDIN\n";
    exit;
}
while (!feof($stdin)) {
  $idxfile = rtrim(fgets($stdin), "\n");
  $idxfile = trim($idxfile);
  if (strlen($idxfile) > 0)
  $rg->regist($idxfile);
}
fclose($stdin);

echo "Registing Completed.\n";
?>