LoginSignup
0
0

More than 5 years have passed since last update.

KenLMをC++で呼び出す。参考になればいいと思う

Last updated at Posted at 2015-11-19

必要なので書いた。C++からならば簡単に使えるかと思ったが、そんなことはなかったので、自分で書いた。g++ -L$KENLM_INSTALL_DIR/lib -lkenlm などとしてコンパイルすれば使える

build

kenlm:
        \curl http://kheafield.com/code/kenlm.tar.gz |tar xz
        cd kenlm && ./bjam --max-kenlm-order=5 --prefix=`pwd`
        mkdir -p kenlm/lib
        echo "\$$CXX \$$CXXFLAGS \$$objects -shared -o lib/libkenlm.so" >>kenlm/compile_query_only.sh
        cd kenlm && CXXFLAGS=-fpic ./compile_query_only.sh

use

  void load_model(const char *model_file)
  {
    shared_ptr<lm::base::Model> rv = nullptr;
    using namespace lm::ngram;
    if (RecognizeBinary(model_file, model_type_)) {
      switch(model_type_) {
      case PROBING:
        probing_model_ = std::move(shared_ptr<lm::ngram::ProbingModel>(new ProbingModel(model_file, config_)));
        break;
      case REST_PROBING:
        rest_probing_model_ = std::move(shared_ptr<lm::ngram::RestProbingModel>(new RestProbingModel(model_file, config_)));
        break;
        /*
      case TRIE:
        rv = shared_ptr<lm::base::Model>(new TrieModel(model_file, config_));
        break;
      case QUANT_TRIE:
        rv = shared_ptr<lm::base::Model>(new QuantTrieModel(model_file, config_));
        break;
      case ARRAY_TRIE:
        rv = shared_ptr<lm::base::Model>(new ArrayTrieModel(model_file, config_));
        break;
      case QUANT_ARRAY_TRIE:
        rv = shared_ptr<lm::base::Model>(new QuantArrayTrieModel(model_file, config_));
        break;
        */
      default:
        std::cerr << "Unrecognized kenlm model type " << model_type_ << std::endl;
        abort();
      }
    } else {
      assert(0);
      //Query<ProbingModel>(model_file, config, sentence_context, show_words);
    }
  }
  double get_cost(const lm::WordIndex *begin,
      const lm::WordIndex *end, lm::WordIndex w) const
  {
    lm::ngram::State state;
    lm::FullScoreReturn result;
    switch(model_type_) {
    case lm::ngram::PROBING:
      result = probing_model_->FullScoreForgotState(begin, end,
          w, state);
      break;
    case lm::ngram::REST_PROBING:
      result = rest_probing_model_->FullScoreForgotState(begin, end,
          w, state);
      break;
      /*
         case TRIE:
         rv = shared_ptr<lm::base::Model>(new TrieModel(model_file, config_));
         break;
         case QUANT_TRIE:
         rv = shared_ptr<lm::base::Model>(new QuantTrieModel(model_file, config_));
         break;
         case ARRAY_TRIE:
         rv = shared_ptr<lm::base::Model>(new ArrayTrieModel(model_file, config_));
         break;
         case QUANT_ARRAY_TRIE:
         rv = shared_ptr<lm::base::Model>(new QuantArrayTrieModel(model_file, config_));
         break;
         */
    default:
      std::cerr << "Unrecognized kenlm model type " << model_type_ << std::endl;
      abort();
    }
    return result.prob;
  }
    lm::WordIndex state[N_GRAM-1];
    size_t i;
    for (i = 0; i < ctxt.size(); ++i)
      state[i] = ctxt[ctxt.size()-i-1];
    // Осторожно за range
    -get_cost2(state, state+i, w);
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0