MeCab + Senna + Tritonn で MySQL 全文検索を試す(2)
MeCab + Senna + Tritonn で MySQL 全文検索を試す - Tosshi Note の続き
Google 風の検索 UI を実装してみた。
テーブルを作成 - SQL
CREATE TABLE fastsearch ( id INTEGER AUTO_INCREMENT, PRIMARY KEY (id), uri VARCHAR(512) NOT NULL, title VARCHAR(1024) NOT NULL, content MEDIUMTEXT, FULLTEXT INDEX USING NGRAM, SECTIONALIZE (title, content) ) DEFAULT CHARSET utf8 ENGINE = MyISAM;
データベース設定 - config.php
<?php // ** MySQL settings ** // define('DB_NAME', 'データベース名'); // The name of the database define('DB_USER', 'ユーザー'); // Your MySQL username define('DB_PASSWORD', 'パスワード'); // ...and password define('DB_HOST', 'ホスト名'); define('TABLE_NAME', 'fastsearch'); ?>
データベース操作 - db.php
PDO を使って DBを操作する処理を実装する。
find では kwic 関数を使って検索語のマークアップをする。
<?php require_once("config.php"); class DBManager { var $table; var $dbh; var $words; var $cond; public function DBManager() { $this->table = TABLE_NAME; $this->dbh = new PDO("mysql:host=".DB_HOST.";dbname=".DB_NAME, DB_USER, DB_PASSWORD); } public function insertFullTextIndex($uri, $title, $content) { $stmt = $this->dbh->prepare("INSERT INTO ".$this->table ." (uri, title, content) VALUES (:uri, :title, :content)"); $stmt->bindParam(':uri', $uri); $stmt->bindParam(':title', $title); $stmt->bindParam(':content', $content); return $stmt->execute(); } public function setWords($sw) { $sw = mb_ereg_replace("(\s| )+"," ", $sw); $sw = mb_ereg_replace("^\s+", "", $sw); $sw = mb_ereg_replace("\s+$", "", $sw); $sw = mb_ereg_replace("'", "\'", $sw); $this->words = preg_split("/\s/", $sw); $this->cond = ""; foreach ($this->words as $str){ if ('-' != substr($str, 0, 1)) { $this->cond .= " +".$str; } else { $this->cond .= " ".$str; } } } public function hitcount(){ $stmt = $this->dbh->prepare("SELECT COUNT(*) FROM ".$this->table ." WHERE MATCH(title, content) AGAINST ('" .$this->cond."' IN BOOLEAN MODE)"); $stmt->execute(); return $stmt->fetchColumn(); } public function find($page=-1, $page_unit=50){ $kcount = 0; $kwic = "kwic(content, 320, 1, 0, \"\", \"...\" "; foreach ($this->words as $str){ if ('-' != substr($str, 0, 1)) { $kwic .= ", \"".$str."\", \"<em class='kw".$kcount."'>\", \"</em>\" "; if (++$kcount == 5) $kcount = 0; } } $kwic .= ") "; if ($page == -1) { $pagesql = ""; } else { $pagesql = " LIMIT ". ($page * $page_unit) .",".$page_unit; } $stmt = $this->dbh->prepare("SELECT uri, title, ".$kwic." as summary FROM " .$this->table." WHERE MATCH(title, content) AGAINST ('" .$this->cond."' IN BOOLEAN MODE)".$pagesql); return $stmt; } } ?>
検索UI - find.php
db.php を呼び出して Google 風の検索 UI を実現する。
<?php mb_internal_encoding("UTF-8"); mb_regex_encoding("UTF-8"); header('Content-Type:text/html;charset=utf-8'); require("db.php"); $pagecount = 10; ?> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <html> <head> <title>Fast Search</title> <link href="style.css" type="text/css" rel="stylesheet" media="screen" /> </head> <body> <img src="logo.png" width="517" height="90"/> <div class="search-form"> <form id="mainForm" method="get" action="./index.php"> <input type="text" name="find" value="<?php echo($_GET['find']) ?>" style="width:400px"/> <input type="submit" value=" 検索 "/><br/> </form> </div> <?php // 検索 function microtime_float() { list($usec, $sec) = explode(" ", microtime()); return ((float)$usec + (float)$sec); } $time_start = microtime_float(); if ($_GET['find']) { $starttime = microtime_float(); $dbm = new DBManager(); $dbm->setWords($_GET['find']); $hitcount = $dbm->hitcount(); $result = "<b>".$_GET['find']."</b> で検索した結果 <b>".$hitcount."</b> 件"; if ($hitcount == 0) { ?><div id="hitcount"><?php $endtime = microtime_float(); echo $result; echo "<b>"; printf("%.3f", ($endtime - $starttime)); ?></b> 秒)</div> <?php } else { $startnum = 1; $currentpage = 0; if ($hitcount <= $pagecount) { $stmt = $dbm->find(); $endnum = $hitcount; } elseif ($_GET['page']) { $startnum = 1 + $_GET['page'] * $pagecount; $endnum = $startnum + $pagecount; $currentpage = $_GET['page']; $stmt = $dbm->find($currentpage, $pagecount); } else { $endnum = $pagecount; $stmt = $dbm->find(0, $pagecount); } echo "<div id=\"hitcount\">"; $endtime = microtime_float(); echo $result; echo "中 <b>$startnum</b> - <b>$endnum</b> 件目 (<b>"; printf("%.3f", ($endtime - $starttime)); echo "</b> 秒)</div>"; if ($stmt->execute()) { echo "<ol>"; while ($record = $stmt->fetch(PDO::FETCH_ASSOC)) { echo "<li><a href=\"".$record['uri']."\">".$record['title']."</a>\n"; echo "<div class=\"summary\">"; echo $record['summary']; echo "</div>"; echo "</li>"; } echo "</ol>"; } else { echo "fail"; } } if ($hitcount > $pagecount) { ?> <div class="paging-area"> <?php $pagen = 0; echo "検索結果ページ: <br/>"; for ($total = 0; $total < $hitcount; $total += $pagecount) { if ($currentpage != $pagen) { echo "<a href=\"./find.php?find="; echo urlencode($_GET['find']); echo "&page=$pagen\">"; echo (++$pagen); echo "</a> "; } else { echo "<b>".++$pagen."</b> "; } if ($pagen >= 15) { echo "..."; break; } } ?> </div> <?php } } ?> <div class="copyright">Copyright © 2007 Toshimitsu Takahashi</div> </body> </html>