昔、「ハッカーと画家」を読んで作ったベイズ推定のPHP習作が見つかった。
ベイズ推定自体あまり使う機会はないのだけどメモ。
bayesian.php
<?php
mb_internal_encoding('UTF-8');
$gpp = false;
if (isset($_POST['e'])) {
$g = $b = array();
foreach ($_POST['g'] as $row) {
// 全角を半角に、trimして、サニタイズ
$v = htmlspecialchars(trim(preg_replace('/ /', ' ', $row)));
if ($v != '') {
$g[] = $v;
}
}
foreach ($_POST['b'] as $row) {
$v = htmlspecialchars(trim(preg_replace('/ /', ' ', $row)));
if ($v != '') {
$b[] = $v;
}
}
// 総単語数
$total = array('g'=>sizeof($g), 'b'=>sizeof($b));
$tbl = array();
// Good、Badでの出現回数
foreach($g as $row) {
if (!isset($tbl[$row])) {
$tbl[$row] = array('g'=>0,'b'=>0);
}
$tbl[$row]['g']++;
}
foreach($b as $row) {
if (!isset($tbl[$row])) {
$tbl[$row] = array('g'=>0,'b'=>0);
}
$tbl[$row]['b']++;
}
// Goodの確率テーブルを作成
foreach($tbl as $k=>$v) {
$tbl[$k]['gp'] = max(.01,min(.99, $tbl[$k]['g'] / $total['g']));
$tbl[$k]['bp'] = max(.01,min(.99, $tbl[$k]['b'] / $total['b']));
$tbl[$k]['gpp'] = ($tbl[$k]['gp'] / ($tbl[$k]['gp']+$tbl[$k]['bp']));
}
$c = htmlspecialchars(preg_replace('/ /', ' ', $_POST['c']));
$cw = array_unique(split(' ', $c));
// 対象単語配列がGoodである確率を算出
$gp = $bp = 1;
$res = array();
foreach ($cw as $row) {
if (!isset($tbl[$row])) {
continue;
}
$gp *= $tbl[$row]['gpp'];
$bp *= (1 - $tbl[$row]['gpp']);
$res[$row] = $tbl[$row];
}
$gpp = $gp / ($gp+$bp);
}
?><html>
<head>
<meta http-equiv="content-type" content="text/html; charset=UTF-8">
</head>
<body>
<h1>bayes</h1>
<form method="POST" action="index.php">
<table border="2">
<tr><td>Good Sentence</td><td>Bad Sentence</td></tr>
<?php
for ($i=0;$i<10;$i++) {
$gv = isset($g[$i]) ? $g[$i] : '';
$bv = isset($b[$i]) ? $b[$i] : '';
?>
<tr>
<td><input type="text" name="g[]" value="<?php echo $gv; ?>" size="50"/></td>
<td><input type="text" name="b[]" value="<?php echo $bv; ?>" size="50"/></td>
</tr>
<?php
}
$cv = isset($c) ? $c : '';
?>
</table>
Check Sentence<br />
<input type="text" name="c" value="<?php echo $cv; ?>" size="100" /><br />
<input type="submit" name="e" />
</form>
<?php
if ($gpp !== false) {
?>
<hr />
<?php echo $cv; ?>
<br />
Good Probablility:<?php echo $gpp; ?><br />
<pre>
total:
<?php print_r($total); ?>
words:
<?php print_r($res); ?>
<?php
}
?>
</body></html>