diff --git a/api/espeak.js b/api/espeak.js index 621dada..db0f940 100644 --- a/api/espeak.js +++ b/api/espeak.js @@ -1,8 +1,23 @@ +import axios from "axios"; +import {getTag} from "@sozialhelden/ietf-language-tags"; + const bodyParser = require('body-parser') const app = require('express')() const fs = require('fs') const tmp = require('tmp') const exec = require('child_process').exec; +const _ = require('lodash'); + +const builder = require('xmlbuilder'); + + + +require("dotenv").config() + +const azureSpeech = require("microsoft-cognitiveservices-speech-sdk") +const {SpeechSynthesisOutputFormat} = require("microsoft-cognitiveservices-speech-sdk"); +const speechConfig = azureSpeech.SpeechConfig.fromSubscription(process.env.AZURE_KEY, process.env.AZURE_REGION) +speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Ogg24Khz16BitMonoOpus const eSpeakExecutable = process.platform === "win32" ? `"C:\\Program Files\\eSpeak NG\\espeak-ng.exe"` : `/usr/bin/espeak-ng` @@ -13,12 +28,65 @@ app.post('/get', (req, res, next) => { fs.writeFile(path, req.body.sourceText.replace(/\n/gi,",\n"), {encoding: "utf-8"}, ()=> { exec(`${eSpeakExecutable} -v"${req.body.selectedLanguage}" -q -f "${path}" --ipa`, ((error, stdout, stderr) => { - res.json({out: stdout, stderr}) + res.json({out: stdout.trim(), stderr: stderr.trim()}) cleanupCallback() })) }) }); }) +app.get('/getLanguages', async (req, res, next) => { + try { + + const languages = (await axios.get(`https://${process.env.AZURE_REGION}.tts.speech.microsoft.com/cognitiveservices/voices/list`, { + headers: { + "Ocp-Apim-Subscription-Key": process.env.AZURE_KEY + } + })).data + const grouped = _.groupBy(languages, x=>x.Locale) + let obj = [] + for (const [key, value] of Object.entries(grouped)){ + const tag = getTag(key) + obj.push({ + label: `${tag.language.Description[0]} (${tag.region.Description[0]})`, + options: value.map(x=>{ + return { + text: `${x.DisplayName} (${x.LocalName}, ${x.Gender}, ${x.VoiceType})`, + value: x.ShortName + } + }).sort((a, b) => a.text > b.text ? 1 : -1) + }) + } + res.json(obj.sort((a, b) => a.label > b.label ? 1 : -1)) + } catch(e) { + next(e) + } +}) + +app.post('/speak', async (req, res, next) => { + const synthesizer = new azureSpeech.SpeechSynthesizer(speechConfig) + + const splitLines = req.body.targetText.split("\n") + const language = req.body.selectedSpeechLanguage.split("-").slice(0,2).join("-") + + let root = builder.create("speak") + root.att("version","1.0") + root.att("xmlns", "http://www.w3.org/2001/10/synthesis") + root.att("xml:lang", language) + let voice = root.ele("voice").att("name", req.body.selectedSpeechLanguage) + + for (const line of splitLines) { + voice.ele("phoneme").att("alphabet","ipa").att("ph",line).t(line) + } + + const xml = root.toString() + + synthesizer.speakSsmlAsync(xml, result => { + const audioData = result.audioData; + res.contentType("audio/ogg; codecs=opus").send(Buffer.from(audioData)) + }) + +}) + module.exports = app diff --git a/nuxt.config.js b/nuxt.config.js index be70632..e393d4e 100644 --- a/nuxt.config.js +++ b/nuxt.config.js @@ -47,6 +47,7 @@ export default { height: '15px' }, + ssr: false, // Build Configuration (https://go.nuxtjs.dev/config-build) build: { diff --git a/package.json b/package.json index c2f7305..6c06908 100644 --- a/package.json +++ b/package.json @@ -10,14 +10,20 @@ }, "dependencies": { "@nuxtjs/axios": "^5.12.2", + "@sozialhelden/ietf-language-tags": "^3.2.4", "axios": "^0.21.1", "body-parser": "^1.19.0", "bootstrap": "^4.5.2", "bootstrap-vue": "^2.17.3", "core-js": "^3.6.5", + "dotenv": "^8.2.0", "express": "^4.17.1", + "lodash": "^4.17.20", + "microsoft-cognitiveservices-speech-sdk": "^1.14.1", "nuxt": "^2.14.6", - "tmp": "^0.2.1" + "tmp": "^0.2.1", + "vue-multiselect": "^2.1.6", + "xmlbuilder": "^15.1.1" }, "devDependencies": { "@nuxt/types": "~2.14.0" diff --git a/pages/index.vue b/pages/index.vue index e2303fd..07fc73b 100644 --- a/pages/index.vue +++ b/pages/index.vue @@ -19,6 +19,16 @@
{{targetText.out}}
+

Speak IPA

+ + Speak
+ @@ -26,7 +36,10 @@