1
1

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 3 years have passed since last update.

tensorflow1系を入れずにmellotronを動かす方法

Posted at

Texarを以下の手順でインストール

git clone https://github.com/asyml/texar-pytorch.git 
cd texar-pytorch
pip install -e .

公式ドキュメントより引用

hparams.pyを以下のように書き換える。(ファイルのパスは自分で設定をしてください)

from text.symbols import symbols
from texar.torch import HParams
# main.py


def create_hparams(hparams_string=None, verbose=False):
    """Create model hyperparameters. Parse nondefault from given string."""

    hparams = {
        ################################
        # Experiment Parameters        #
        ################################
        "epochs": 50000,
        "iters_per_checkpoint": 500,
        "seed": 1234,
        "dynamic_loss_scaling": True,
        "fp16_run": False,
        "distributed_run": False,
        "dist_backend": "nccl",
        "dist_url": "tcp://localhost:54321",
        "cudnn_enabled": True,
        "cudnn_benchmark": False,
        "ignore_layers": ['speaker_embedding.weight'],

        ################################
        # Data Parameters             #
        ################################
        "training_files": $YOUR_PATH,
        "validation_files": $YOUR_PATH,
        "text_cleaners": ['basic_cleaners'],
        "p_arpabet": 1.0,
        "cmudict_path": "data/cmu_dictionary",

        ################################
        # Audio Parameters             #
        ################################
        "max_wav_value": 32768.0,
        "sampling_rate": 22050,
        "filter_length": 1024,
        "hop_length": 256,
        "win_length": 1024,
        "n_mel_channels": 80,
        "mel_fmin": 0.0,
        "mel_fmax": 8000.0,
        "f0_min": 80,
        "f0_max": 880,
        "harm_thresh": 0.25,

        ################################
        # Model Parameters             #
        ################################
        "n_symbols": len(symbols),
        "symbols_embedding_dim": 512,

        # Encoder parameters
        "encoder_kernel_size": 5,
        "encoder_n_convolutions": 3,
        "encoder_embedding_dim": 512,

        # Decoder parameters
        "n_frames_per_step": 1,  # currently only 1 is supported
        "decoder_rnn_dim": 1024,
        "prenet_dim": 256,
        "prenet_f0_n_layers": 1,
        "prenet_f0_dim": 1,
        "prenet_f0_kernel_size": 1,
        "prenet_rms_dim": 0,
        "prenet_rms_kernel_size": 1,
        "max_decoder_steps": 1000,
        "gate_threshold": 0.5,
        "p_attention_dropout": 0.1,
        "p_decoder_dropout": 0.1,
        "p_teacher_forcing": 1.0,

        # Attention parameters
        "attention_rnn_dim": 1024,
        "attention_dim": 128,

        # Location Layer parameters
        "attention_location_n_filters": 32,
        "attention_location_kernel_size": 31,

        # Mel-post processing network parameters
        "postnet_embedding_dim": 512,
        "postnet_kernel_size": 5,
        "postnet_n_convolutions": 5,

        # Speaker embedding
        "n_speakers": 123,
        "speaker_embedding_dim": 128,

        # Reference encoder
        "with_gst": True,
        "ref_enc_filters": [32, 32, 64, 64, 128, 128],
        "ref_enc_size": [3, 3],
        "ref_enc_strides": [2, 2],
        "ref_enc_pad": [1, 1],
        "ref_enc_gru_size": 128,

        # Style Token Layer
        "token_embedding_size": 256,
        "token_num": 10,
        "num_heads": 8,

        ################################
        # Optimization Hyperparameters #
        ################################
        "use_saved_learning_rate": False,
        "learning_rate": 1e-3,
        "learning_rate_min": 1e-5,
        "learning_rate_anneal": 50000,
        "weight_decay": 1e-6,
        "grad_clip_thresh": 1.0,
        "batch_size": 32,
        "mask_padding": True,  # set model's padded outputs to padded values
    }
    """
    if hparams_string:
        tf.compat.v1.logging.info(
            'Parsing command line hparams: %s', hparams_string)
        hparams.parse(hparams_string)

    if verbose:
        tf.compat.v1.logging.info('Final parsed hparams: %s', hparams.values())
    """
    hparams = HParams(hparams, hparams)
    return hparams

1
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
1

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?