dotfiles: add coqui-read helper

2026-04-14 00:45:26 -07:00
parent 3813af4bd2
commit 18c8e0324f
1 changed files with 171 additions and 0 deletions
--- a/dotfiles/lib/functions/coqui-read
+++ b/dotfiles/lib/functions/coqui-read
@@ -0,0 +1,171 @@
 #!/usr/bin/env bash
 function coqui-read {
    local script_file stdin_file
    script_file="$(mktemp)"
    stdin_file=""
    if [[ "$#" -eq 0 && ! -t 0 ]]; then
        stdin_file="$(mktemp)"
        cat > "$stdin_file"
        set -- --stdin-file "$stdin_file"
    fi
    cat > "$script_file" <<'PY'
 import argparse
 import re
 import subprocess
 import sys
 import tempfile
 import urllib.parse
 import urllib.request
 from pathlib import Path
 DEFAULT_HOST = "http://[::1]:11115"
 def split_sentences(text: str) -> list[str]:
    parts = re.split(r"(?<=[.!?])\s+", text.strip())
    return [part.strip() for part in parts if part.strip()]
 def split_text(text: str, mode: str, max_chars: int) -> list[str]:
    normalized = re.sub(r"\r\n?", "\n", text).strip()
    if not normalized:
        return []
    if mode == "sentences":
        units = split_sentences(normalized)
    else:
        units = [chunk.strip() for chunk in re.split(r"\n\s*\n+", normalized) if chunk.strip()]
    chunks: list[str] = []
    for unit in units:
        if len(unit) <= max_chars:
            chunks.append(unit)
            continue
        sentences = split_sentences(unit)
        if len(sentences) <= 1:
            chunks.append(unit)
            continue
        current = ""
        for sentence in sentences:
            candidate = sentence if not current else f"{current} {sentence}"
            if len(candidate) <= max_chars:
                current = candidate
            else:
                if current:
                    chunks.append(current)
                current = sentence
        if current:
            chunks.append(current)
    return chunks
 def build_url(base_url: str, text: str, speaker: str | None, language: str | None) -> str:
    params = {"text": text}
    if speaker:
        params["speaker_id"] = speaker
    if language:
        params["language_id"] = language
    query = urllib.parse.urlencode(params)
    return f"{base_url.rstrip('/')}/api/tts?{query}"
 def synthesize_chunk(base_url: str, text: str, speaker: str | None, language: str | None) -> Path:
    request = urllib.request.Request(build_url(base_url, text, speaker, language))
    with urllib.request.urlopen(request, timeout=300) as response:
        wav_data = response.read()
    temp_file = tempfile.NamedTemporaryFile(prefix="coqui-read-", suffix=".wav", delete=False)
    temp_file.write(wav_data)
    temp_file.close()
    return Path(temp_file.name)
 def play_file(path: Path, player: str) -> None:
    if player == "ffplay":
        cmd = [player, "-nodisp", "-autoexit", "-loglevel", "warning", str(path)]
    else:
        cmd = [player, str(path)]
    subprocess.run(cmd, check=True)
 def read_input(inputs: list[str]) -> str:
    if inputs:
        if len(inputs) == 1 and Path(inputs[0]).exists():
            return Path(inputs[0]).read_text()
        return " ".join(inputs)
    return sys.stdin.read()
 def main() -> int:
    parser = argparse.ArgumentParser(description="Read text incrementally through the local Coqui TTS service.")
    parser.add_argument("--stdin-file", default=None, help=argparse.SUPPRESS)
    parser.add_argument("inputs", nargs="*", help="Text to speak, or a single text-file path. Reads stdin when omitted.")
    parser.add_argument("--host", default=DEFAULT_HOST, help=f"Coqui server base URL. Default: {DEFAULT_HOST}")
    parser.add_argument("--speaker", default=None, help="Optional speaker_id value.")
    parser.add_argument("--language", default=None, help="Optional language_id value.")
    parser.add_argument(
        "--chunk-mode",
        choices=["paragraphs", "sentences"],
        default="paragraphs",
        help="Chunking strategy before synthesis.",
    )
    parser.add_argument("--max-chars", type=int, default=700, help="Maximum characters per synthesized chunk.")
    parser.add_argument(
        "--player",
        default="ffplay",
        help="Playback command. Use 'none' to only synthesize and print wav paths.",
    )
    parser.add_argument(
        "--keep",
        action="store_true",
        help="Keep generated wav files on disk instead of deleting them after playback.",
    )
    args = parser.parse_args()
    if args.stdin_file:
        text = Path(args.stdin_file).read_text()
    else:
        text = read_input(args.inputs)
    chunks = split_text(text, args.chunk_mode, args.max_chars)
    if not chunks:
        print("No text to synthesize.", file=sys.stderr)
        return 1
    created_files: list[Path] = []
    try:
        for index, chunk in enumerate(chunks, start=1):
            print(f"[{index}/{len(chunks)}] Synthesizing {len(chunk)} chars...", file=sys.stderr)
            wav_path = synthesize_chunk(args.host, chunk, args.speaker, args.language)
            created_files.append(wav_path)
            print(wav_path)
            if args.player != "none":
                play_file(wav_path, args.player)
    finally:
        if not args.keep:
            for wav_path in created_files:
                wav_path.unlink(missing_ok=True)
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
 PY
    python3 "$script_file" "$@"
    local exit_code=$?
    rm -f "$script_file"
    if [[ -n "$stdin_file" ]]; then
        rm -f "$stdin_file"
    fi
    return "$exit_code"
 }
 coqui-read "$@"