UNPKG

aivmlib-web

Version:

Aivis Voice Model File (.aivm/.aivmx) Utility Library for Web

400 lines (399 loc) 17.1 kB
import { z } from 'zod'; export type StyleBertVITS2HyperParameters = z.infer<typeof StyleBertVITS2HyperParametersSchema>; /** * Style-Bert-VITS2 のハイパーパラメータのスキーマ * 学習モデルの作成時期によって詳細なパラメータの有無が異なるため、実装上必須のパラメータ以外は optional としている * 以下は Style-Bert-VITS2 v2.4.1 のハイパーパラメータスキーマ定義を TypeScript 向けに改変したもの * ref: https://github.com/litagin02/Style-Bert-VITS2/blob/2.4.1/style_bert_vits2/models/hyper_parameters.py */ export declare const StyleBertVITS2HyperParametersSchema: z.ZodObject<{ model_name: z.ZodString; version: z.ZodString; train: z.ZodObject<{ log_interval: z.ZodOptional<z.ZodNumber>; eval_interval: z.ZodOptional<z.ZodNumber>; seed: z.ZodOptional<z.ZodNumber>; epochs: z.ZodOptional<z.ZodNumber>; learning_rate: z.ZodOptional<z.ZodNumber>; betas: z.ZodOptional<z.ZodTuple<[z.ZodNumber, z.ZodNumber], null>>; eps: z.ZodOptional<z.ZodNumber>; batch_size: z.ZodOptional<z.ZodNumber>; bf16_run: z.ZodOptional<z.ZodBoolean>; fp16_run: z.ZodOptional<z.ZodBoolean>; lr_decay: z.ZodOptional<z.ZodNumber>; segment_size: z.ZodOptional<z.ZodNumber>; init_lr_ratio: z.ZodOptional<z.ZodNumber>; warmup_epochs: z.ZodOptional<z.ZodNumber>; c_mel: z.ZodOptional<z.ZodNumber>; c_kl: z.ZodOptional<z.ZodNumber>; c_commit: z.ZodOptional<z.ZodNumber>; skip_optimizer: z.ZodOptional<z.ZodBoolean>; freeze_ZH_bert: z.ZodOptional<z.ZodBoolean>; freeze_JP_bert: z.ZodOptional<z.ZodBoolean>; freeze_EN_bert: z.ZodOptional<z.ZodBoolean>; freeze_emo: z.ZodOptional<z.ZodBoolean>; freeze_style: z.ZodOptional<z.ZodBoolean>; freeze_decoder: z.ZodOptional<z.ZodBoolean>; }, "strip", z.ZodTypeAny, { log_interval?: number | undefined; eval_interval?: number | undefined; seed?: number | undefined; epochs?: number | undefined; learning_rate?: number | undefined; betas?: [number, number] | undefined; eps?: number | undefined; batch_size?: number | undefined; bf16_run?: boolean | undefined; fp16_run?: boolean | undefined; lr_decay?: number | undefined; segment_size?: number | undefined; init_lr_ratio?: number | undefined; warmup_epochs?: number | undefined; c_mel?: number | undefined; c_kl?: number | undefined; c_commit?: number | undefined; skip_optimizer?: boolean | undefined; freeze_ZH_bert?: boolean | undefined; freeze_JP_bert?: boolean | undefined; freeze_EN_bert?: boolean | undefined; freeze_emo?: boolean | undefined; freeze_style?: boolean | undefined; freeze_decoder?: boolean | undefined; }, { log_interval?: number | undefined; eval_interval?: number | undefined; seed?: number | undefined; epochs?: number | undefined; learning_rate?: number | undefined; betas?: [number, number] | undefined; eps?: number | undefined; batch_size?: number | undefined; bf16_run?: boolean | undefined; fp16_run?: boolean | undefined; lr_decay?: number | undefined; segment_size?: number | undefined; init_lr_ratio?: number | undefined; warmup_epochs?: number | undefined; c_mel?: number | undefined; c_kl?: number | undefined; c_commit?: number | undefined; skip_optimizer?: boolean | undefined; freeze_ZH_bert?: boolean | undefined; freeze_JP_bert?: boolean | undefined; freeze_EN_bert?: boolean | undefined; freeze_emo?: boolean | undefined; freeze_style?: boolean | undefined; freeze_decoder?: boolean | undefined; }>; data: z.ZodObject<{ use_jp_extra: z.ZodOptional<z.ZodBoolean>; training_files: z.ZodOptional<z.ZodString>; validation_files: z.ZodOptional<z.ZodString>; max_wav_value: z.ZodOptional<z.ZodNumber>; sampling_rate: z.ZodOptional<z.ZodNumber>; filter_length: z.ZodOptional<z.ZodNumber>; hop_length: z.ZodOptional<z.ZodNumber>; win_length: z.ZodOptional<z.ZodNumber>; n_mel_channels: z.ZodOptional<z.ZodNumber>; mel_fmin: z.ZodOptional<z.ZodNumber>; mel_fmax: z.ZodOptional<z.ZodNullable<z.ZodNumber>>; add_blank: z.ZodOptional<z.ZodBoolean>; n_speakers: z.ZodNumber; cleaned_text: z.ZodOptional<z.ZodBoolean>; spk2id: z.ZodRecord<z.ZodString, z.ZodNumber>; num_styles: z.ZodNumber; style2id: z.ZodRecord<z.ZodString, z.ZodNumber>; }, "strip", z.ZodTypeAny, { n_speakers: number; spk2id: Record<string, number>; num_styles: number; style2id: Record<string, number>; use_jp_extra?: boolean | undefined; training_files?: string | undefined; validation_files?: string | undefined; max_wav_value?: number | undefined; sampling_rate?: number | undefined; filter_length?: number | undefined; hop_length?: number | undefined; win_length?: number | undefined; n_mel_channels?: number | undefined; mel_fmin?: number | undefined; mel_fmax?: number | null | undefined; add_blank?: boolean | undefined; cleaned_text?: boolean | undefined; }, { n_speakers: number; spk2id: Record<string, number>; num_styles: number; style2id: Record<string, number>; use_jp_extra?: boolean | undefined; training_files?: string | undefined; validation_files?: string | undefined; max_wav_value?: number | undefined; sampling_rate?: number | undefined; filter_length?: number | undefined; hop_length?: number | undefined; win_length?: number | undefined; n_mel_channels?: number | undefined; mel_fmin?: number | undefined; mel_fmax?: number | null | undefined; add_blank?: boolean | undefined; cleaned_text?: boolean | undefined; }>; model: z.ZodObject<{ use_spk_conditioned_encoder: z.ZodOptional<z.ZodBoolean>; use_noise_scaled_mas: z.ZodOptional<z.ZodBoolean>; use_mel_posterior_encoder: z.ZodOptional<z.ZodBoolean>; use_duration_discriminator: z.ZodOptional<z.ZodBoolean>; use_wavlm_discriminator: z.ZodOptional<z.ZodBoolean>; inter_channels: z.ZodOptional<z.ZodNumber>; hidden_channels: z.ZodOptional<z.ZodNumber>; filter_channels: z.ZodOptional<z.ZodNumber>; n_heads: z.ZodOptional<z.ZodNumber>; n_layers: z.ZodOptional<z.ZodNumber>; kernel_size: z.ZodOptional<z.ZodNumber>; p_dropout: z.ZodOptional<z.ZodNumber>; resblock: z.ZodOptional<z.ZodString>; resblock_kernel_sizes: z.ZodOptional<z.ZodArray<z.ZodNumber, "many">>; resblock_dilation_sizes: z.ZodOptional<z.ZodArray<z.ZodArray<z.ZodNumber, "many">, "many">>; upsample_rates: z.ZodOptional<z.ZodArray<z.ZodNumber, "many">>; upsample_initial_channel: z.ZodOptional<z.ZodNumber>; upsample_kernel_sizes: z.ZodOptional<z.ZodArray<z.ZodNumber, "many">>; n_layers_q: z.ZodOptional<z.ZodNumber>; use_spectral_norm: z.ZodOptional<z.ZodBoolean>; gin_channels: z.ZodOptional<z.ZodNumber>; slm: z.ZodOptional<z.ZodObject<{ model: z.ZodOptional<z.ZodString>; sr: z.ZodOptional<z.ZodNumber>; hidden: z.ZodOptional<z.ZodNumber>; nlayers: z.ZodOptional<z.ZodNumber>; initial_channel: z.ZodOptional<z.ZodNumber>; }, "strip", z.ZodTypeAny, { model?: string | undefined; sr?: number | undefined; hidden?: number | undefined; nlayers?: number | undefined; initial_channel?: number | undefined; }, { model?: string | undefined; sr?: number | undefined; hidden?: number | undefined; nlayers?: number | undefined; initial_channel?: number | undefined; }>>; }, "strip", z.ZodTypeAny, { use_spk_conditioned_encoder?: boolean | undefined; use_noise_scaled_mas?: boolean | undefined; use_mel_posterior_encoder?: boolean | undefined; use_duration_discriminator?: boolean | undefined; use_wavlm_discriminator?: boolean | undefined; inter_channels?: number | undefined; hidden_channels?: number | undefined; filter_channels?: number | undefined; n_heads?: number | undefined; n_layers?: number | undefined; kernel_size?: number | undefined; p_dropout?: number | undefined; resblock?: string | undefined; resblock_kernel_sizes?: number[] | undefined; resblock_dilation_sizes?: number[][] | undefined; upsample_rates?: number[] | undefined; upsample_initial_channel?: number | undefined; upsample_kernel_sizes?: number[] | undefined; n_layers_q?: number | undefined; use_spectral_norm?: boolean | undefined; gin_channels?: number | undefined; slm?: { model?: string | undefined; sr?: number | undefined; hidden?: number | undefined; nlayers?: number | undefined; initial_channel?: number | undefined; } | undefined; }, { use_spk_conditioned_encoder?: boolean | undefined; use_noise_scaled_mas?: boolean | undefined; use_mel_posterior_encoder?: boolean | undefined; use_duration_discriminator?: boolean | undefined; use_wavlm_discriminator?: boolean | undefined; inter_channels?: number | undefined; hidden_channels?: number | undefined; filter_channels?: number | undefined; n_heads?: number | undefined; n_layers?: number | undefined; kernel_size?: number | undefined; p_dropout?: number | undefined; resblock?: string | undefined; resblock_kernel_sizes?: number[] | undefined; resblock_dilation_sizes?: number[][] | undefined; upsample_rates?: number[] | undefined; upsample_initial_channel?: number | undefined; upsample_kernel_sizes?: number[] | undefined; n_layers_q?: number | undefined; use_spectral_norm?: boolean | undefined; gin_channels?: number | undefined; slm?: { model?: string | undefined; sr?: number | undefined; hidden?: number | undefined; nlayers?: number | undefined; initial_channel?: number | undefined; } | undefined; }>; }, "strip", z.ZodTypeAny, { model_name: string; version: string; train: { log_interval?: number | undefined; eval_interval?: number | undefined; seed?: number | undefined; epochs?: number | undefined; learning_rate?: number | undefined; betas?: [number, number] | undefined; eps?: number | undefined; batch_size?: number | undefined; bf16_run?: boolean | undefined; fp16_run?: boolean | undefined; lr_decay?: number | undefined; segment_size?: number | undefined; init_lr_ratio?: number | undefined; warmup_epochs?: number | undefined; c_mel?: number | undefined; c_kl?: number | undefined; c_commit?: number | undefined; skip_optimizer?: boolean | undefined; freeze_ZH_bert?: boolean | undefined; freeze_JP_bert?: boolean | undefined; freeze_EN_bert?: boolean | undefined; freeze_emo?: boolean | undefined; freeze_style?: boolean | undefined; freeze_decoder?: boolean | undefined; }; data: { n_speakers: number; spk2id: Record<string, number>; num_styles: number; style2id: Record<string, number>; use_jp_extra?: boolean | undefined; training_files?: string | undefined; validation_files?: string | undefined; max_wav_value?: number | undefined; sampling_rate?: number | undefined; filter_length?: number | undefined; hop_length?: number | undefined; win_length?: number | undefined; n_mel_channels?: number | undefined; mel_fmin?: number | undefined; mel_fmax?: number | null | undefined; add_blank?: boolean | undefined; cleaned_text?: boolean | undefined; }; model: { use_spk_conditioned_encoder?: boolean | undefined; use_noise_scaled_mas?: boolean | undefined; use_mel_posterior_encoder?: boolean | undefined; use_duration_discriminator?: boolean | undefined; use_wavlm_discriminator?: boolean | undefined; inter_channels?: number | undefined; hidden_channels?: number | undefined; filter_channels?: number | undefined; n_heads?: number | undefined; n_layers?: number | undefined; kernel_size?: number | undefined; p_dropout?: number | undefined; resblock?: string | undefined; resblock_kernel_sizes?: number[] | undefined; resblock_dilation_sizes?: number[][] | undefined; upsample_rates?: number[] | undefined; upsample_initial_channel?: number | undefined; upsample_kernel_sizes?: number[] | undefined; n_layers_q?: number | undefined; use_spectral_norm?: boolean | undefined; gin_channels?: number | undefined; slm?: { model?: string | undefined; sr?: number | undefined; hidden?: number | undefined; nlayers?: number | undefined; initial_channel?: number | undefined; } | undefined; }; }, { model_name: string; version: string; train: { log_interval?: number | undefined; eval_interval?: number | undefined; seed?: number | undefined; epochs?: number | undefined; learning_rate?: number | undefined; betas?: [number, number] | undefined; eps?: number | undefined; batch_size?: number | undefined; bf16_run?: boolean | undefined; fp16_run?: boolean | undefined; lr_decay?: number | undefined; segment_size?: number | undefined; init_lr_ratio?: number | undefined; warmup_epochs?: number | undefined; c_mel?: number | undefined; c_kl?: number | undefined; c_commit?: number | undefined; skip_optimizer?: boolean | undefined; freeze_ZH_bert?: boolean | undefined; freeze_JP_bert?: boolean | undefined; freeze_EN_bert?: boolean | undefined; freeze_emo?: boolean | undefined; freeze_style?: boolean | undefined; freeze_decoder?: boolean | undefined; }; data: { n_speakers: number; spk2id: Record<string, number>; num_styles: number; style2id: Record<string, number>; use_jp_extra?: boolean | undefined; training_files?: string | undefined; validation_files?: string | undefined; max_wav_value?: number | undefined; sampling_rate?: number | undefined; filter_length?: number | undefined; hop_length?: number | undefined; win_length?: number | undefined; n_mel_channels?: number | undefined; mel_fmin?: number | undefined; mel_fmax?: number | null | undefined; add_blank?: boolean | undefined; cleaned_text?: boolean | undefined; }; model: { use_spk_conditioned_encoder?: boolean | undefined; use_noise_scaled_mas?: boolean | undefined; use_mel_posterior_encoder?: boolean | undefined; use_duration_discriminator?: boolean | undefined; use_wavlm_discriminator?: boolean | undefined; inter_channels?: number | undefined; hidden_channels?: number | undefined; filter_channels?: number | undefined; n_heads?: number | undefined; n_layers?: number | undefined; kernel_size?: number | undefined; p_dropout?: number | undefined; resblock?: string | undefined; resblock_kernel_sizes?: number[] | undefined; resblock_dilation_sizes?: number[][] | undefined; upsample_rates?: number[] | undefined; upsample_initial_channel?: number | undefined; upsample_kernel_sizes?: number[] | undefined; n_layers_q?: number | undefined; use_spectral_norm?: boolean | undefined; gin_channels?: number | undefined; slm?: { model?: string | undefined; sr?: number | undefined; hidden?: number | undefined; nlayers?: number | undefined; initial_channel?: number | undefined; } | undefined; }; }>;