必要なので書いた。C++からならば簡単に使えるかと思ったが、そんなことはなかったので、自分で書いた。g++ -L$KENLM_INSTALL_DIR/lib -lkenlm
などとしてコンパイルすれば使える
build
kenlm:
\curl http://kheafield.com/code/kenlm.tar.gz |tar xz
cd kenlm && ./bjam --max-kenlm-order=5 --prefix=`pwd`
mkdir -p kenlm/lib
echo "\$$CXX \$$CXXFLAGS \$$objects -shared -o lib/libkenlm.so" >>kenlm/compile_query_only.sh
cd kenlm && CXXFLAGS=-fpic ./compile_query_only.sh
use
void load_model(const char *model_file)
{
shared_ptr<lm::base::Model> rv = nullptr;
using namespace lm::ngram;
if (RecognizeBinary(model_file, model_type_)) {
switch(model_type_) {
case PROBING:
probing_model_ = std::move(shared_ptr<lm::ngram::ProbingModel>(new ProbingModel(model_file, config_)));
break;
case REST_PROBING:
rest_probing_model_ = std::move(shared_ptr<lm::ngram::RestProbingModel>(new RestProbingModel(model_file, config_)));
break;
/*
case TRIE:
rv = shared_ptr<lm::base::Model>(new TrieModel(model_file, config_));
break;
case QUANT_TRIE:
rv = shared_ptr<lm::base::Model>(new QuantTrieModel(model_file, config_));
break;
case ARRAY_TRIE:
rv = shared_ptr<lm::base::Model>(new ArrayTrieModel(model_file, config_));
break;
case QUANT_ARRAY_TRIE:
rv = shared_ptr<lm::base::Model>(new QuantArrayTrieModel(model_file, config_));
break;
*/
default:
std::cerr << "Unrecognized kenlm model type " << model_type_ << std::endl;
abort();
}
} else {
assert(0);
//Query<ProbingModel>(model_file, config, sentence_context, show_words);
}
}
double get_cost(const lm::WordIndex *begin,
const lm::WordIndex *end, lm::WordIndex w) const
{
lm::ngram::State state;
lm::FullScoreReturn result;
switch(model_type_) {
case lm::ngram::PROBING:
result = probing_model_->FullScoreForgotState(begin, end,
w, state);
break;
case lm::ngram::REST_PROBING:
result = rest_probing_model_->FullScoreForgotState(begin, end,
w, state);
break;
/*
case TRIE:
rv = shared_ptr<lm::base::Model>(new TrieModel(model_file, config_));
break;
case QUANT_TRIE:
rv = shared_ptr<lm::base::Model>(new QuantTrieModel(model_file, config_));
break;
case ARRAY_TRIE:
rv = shared_ptr<lm::base::Model>(new ArrayTrieModel(model_file, config_));
break;
case QUANT_ARRAY_TRIE:
rv = shared_ptr<lm::base::Model>(new QuantArrayTrieModel(model_file, config_));
break;
*/
default:
std::cerr << "Unrecognized kenlm model type " << model_type_ << std::endl;
abort();
}
return result.prob;
}
lm::WordIndex state[N_GRAM-1];
size_t i;
for (i = 0; i < ctxt.size(); ++i)
state[i] = ctxt[ctxt.size()-i-1];
// Осторожно за range
-get_cost2(state, state+i, w);