MySQLの標準機能で日本語を全文検索する(4)

検索部分の実装。markupで全文のうち最初にマッチする単語が出てくる部分を抜き出してハイライト化する。
ページ処理はしていない。検索時間は JavaScript で後から表示。
phpにあまり慣れていなかったものの、LAMP を改めて実感する手軽さだった。

find.php

<?php
mb_internal_encoding("UTF-8"); 
mb_regex_encoding("UTF-8");

header('Content-Type:text/html;charset=utf-8');
require("db.php");
?>
<html>
<head>
<title>Full Text Search Result</title>
<style type="text/css">
h1 {
  font-weight:normal; color:#2530e5; font-size:200%;
  font-family:'Arial'; margin-top:20px;
}
ol,li,p { margin:0; padding:0; }
li { list-style:none; margin:1.75em 0; }
a  { font-weight:bold; }
p  { margin-top:0.25em; font-size:80%; line-height:1.5; width:530px; }
em { font-weight:bold; font-style:normal; background-color:#ff0; }
#hitcount {
  font-size:80%;
  border-top:#3366cc 1px solid;
  background-color:#e5ecf9;
  padding:3px;
  text-align:right;
}
</style>
</head>
<body>
<h1>全文検索</h1>
<form method="get" action="find.php">
  <input type="text" name="find" value="<?php echo($_GET['find']) ?>" style="width:300px"/>
  <input type="submit" value=" 検索 "/><br/>
  <input type="checkbox" id="titleonly" name="titleonly" value="1"/><label for="titleonly">タイトルのみ</label>
</form>
<?php

function markup($content, $words) {
  $disp = "";
  $ctnstart = mb_strpos($content, "\n") + 1;
  $ret = $ctnstart;
  foreach ($words as $word){
    $ret = mb_stripos($content, $word, $ret);
    if ($ret === false) {
      continue;
    } elseif ($ret < 30) {
      $ret = 0;
    } else {
      $disp = "...";
      $ret -= 30;
    }
    
    if (mb_strlen($content) - $ctnstart <= 130) {
      $disp = mb_substr($content, $ctnstart);
      break;
    }
    
    $disp .= mb_substr($content, $ret, 130);
    if (mb_strlen($disp) >= 133) {
      $disp .= "...";
    }
    break;
  }
  
  if (strlen($disp) == 0) {
    if (mb_strlen($content) - $ctnstart > 130) {
      return mb_substr($content, $ctnstart, 130)."...";
    } else {
      return mb_substr($content, $ctnstart);
    }
  }
  
  $kk = array();
  $kk[] = "#ffff00";
  $kk[] = "#00ffff";
  $kk[] = "#ff00ff";
  $kk[] = "#00ff00";
  $k = 0;
  foreach ($words as $word){
    if (mb_eregi($word, $content, $wdarray)) {
      foreach ($wdarray as $soeji => $wd){
        $disp = mbereg_replace($wd, "<em style=\"background-color:".$kk[$k]."\">".$wd."</em>", $disp);
      }
    }
    if (++$k == 4) { $k = 0; }
  }
  
  return $disp;
}

if ($_GET['find']) {
$starttime = (float)microtime();
?>
<div id="hitcount"></div>
<ol>
<?php
  $dbm = new DBManager("livedocs_ft");
  if ($_GET['titleonly'] == 1) {
    $stmt = $dbm->findTitle($_GET['find']);
  } else {
    $stmt = $dbm->find($_GET['find']);
  }
  $hitcount = 0;
  if ($stmt->execute()) {
    while ($record = $stmt->fetch(PDO::FETCH_ASSOC)) {
        echo "<li><a href=\"".$record['uri']."\">".$record['title']."</a>\n";
        if ($_GET['titleonly'] == 1) {
        } else {
          echo "<p>";
          echo markup($record['content'], $dbm->getMarkupWords());
          echo "</p>";
        }
        echo "<p>";
        echo $record['score'];
        echo "</p></li>";
        $hitcount++;
    }
  }
  $endtime = (float)microtime();
?>
</ol>
<?php
}
?>
<script type="text/javascript"><!--
  document.getElementById('hitcount').innerHTML = '<?php
echo "<b>".$_GET['find']."</b> で検索した結果 <b>".$hitcount."</b> 件 (<b>";
printf("%.3f", ($endtime - $starttime));
echo "</b> 秒)";
?>';
//--!>
</script>
</body>
</html>