diff --git a/Dockerfile b/Dockerfile index 3b4ae8d..a6b362d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,11 +12,9 @@ WORKDIR /piper RUN apt-get update \ && apt-get install -y --no-install-recommends \ build-essential \ - cmake \ espeak-ng \ git \ ninja-build \ - pkg-config \ python3 \ python3-dev \ python3-venv \ @@ -27,8 +25,16 @@ RUN git clone --depth 1 https://github.com/OHF-voice/piper1-gpl.git . RUN python3 -m venv ${VENV_PATH} RUN pip install --upgrade pip wheel setuptools \ - && pip install --no-cache-dir cmake scikit-build \ - && pip install --no-cache-dir -e '.[train]' + && pip install --no-cache-dir numpy cython cmake scikit-build onnx onnxruntime-gpu + +RUN echo "torch==2.4.*" > /tmp/constraints.txt \ + && echo "torchvision==0.19.*" >> /tmp/constraints.txt \ + && echo "torchaudio==2.4.*" >> /tmp/constraints.txt + +RUN pip install --no-cache-dir torch==2.4.* torchvision==0.19.* torchaudio==2.4.* \ + --index-url https://download.pytorch.org/whl/cu121 + +RUN pip install --no-cache-dir -e '.[train]' -c /tmp/constraints.txt RUN ./build_monotonic_align.sh \ && python3 setup.py build_ext --inplace diff --git a/entrypoint.sh b/entrypoint.sh index 1f1dce4..6529c25 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -3,24 +3,34 @@ set -e CKPT_DIR="/piper/lightning_logs" -CKPT="" +VOICE_NAME=${VOICE_NAME:-pt_BR-well} + +latest_checkpoint() { + ls -t ${CKPT_DIR}/version_*/checkpoints/*.ckpt | head -n1 +} if compgen -G "${CKPT_DIR}/version_*/checkpoints/*.ckpt" > /dev/null; then - CKPT=$(ls -t ${CKPT_DIR}/version_*/checkpoints/*.ckpt | head -n1) + CKPT=$(latest_checkpoint) RESUME_ARG="--ckpt_path $CKPT" echo "Resuming: $CKPT" fi -exec python3 -m piper.train fit \ - --data.voice_name pt_BR-well \ +python3 -m piper.train fit \ + --data.voice_name $VOICE_NAME \ --data.espeak_voice pt-br \ --data.audio_dir /data/wav/ \ --data.batch_size 16 \ --data.cache_dir /data/.cache/ \ - --data.config_path /data/config.json \ + --data.config_path /data/${VOICE_NAME}-medium.onnx.json \ --data.csv_path /data/metadata.csv \ --model.sample_rate 22050 \ --trainer.check_val_every_n_epoch 1 \ --trainer.max_epochs 10000 \ $RESUME_ARG + +CKPT=$(latest_checkpoint) + +python3 -m piper.train.export_onnx \ + --checkpoint $CKPT \ + --output-file /data/${VOICE_NAME}-medium.onnx