93 lines
3.0 KiB
JavaScript
93 lines
3.0 KiB
JavaScript
import axios from "axios";
|
|
import {getTag} from "@sozialhelden/ietf-language-tags";
|
|
|
|
const bodyParser = require('body-parser')
|
|
const app = require('express')()
|
|
const fs = require('fs')
|
|
const tmp = require('tmp')
|
|
const exec = require('child_process').exec;
|
|
const _ = require('lodash');
|
|
|
|
const builder = require('xmlbuilder');
|
|
|
|
|
|
|
|
require("dotenv").config()
|
|
|
|
const azureSpeech = require("microsoft-cognitiveservices-speech-sdk")
|
|
const {SpeechSynthesisOutputFormat} = require("microsoft-cognitiveservices-speech-sdk");
|
|
const speechConfig = azureSpeech.SpeechConfig.fromSubscription(process.env.AZURE_KEY, process.env.AZURE_REGION)
|
|
speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Ogg24Khz16BitMonoOpus
|
|
|
|
const eSpeakExecutable = process.platform === "win32" ? `"C:\\Program Files\\eSpeak NG\\espeak-ng.exe"` : `/usr/bin/espeak-ng`
|
|
|
|
app.use(bodyParser.json())
|
|
app.post('/get', (req, res, next) => {
|
|
tmp.file(function _tempFileCreated(err, path, fd, cleanupCallback) {
|
|
if (err) throw err;
|
|
|
|
fs.writeFile(path, req.body.sourceText.replace(/\n/gi,",\n"), {encoding: "utf-8"}, ()=> {
|
|
exec(`${eSpeakExecutable} -v"${req.body.selectedLanguage}" -q -f "${path}" --ipa`, ((error, stdout, stderr) => {
|
|
res.json({out: stdout.trim(), stderr: stderr.trim()})
|
|
cleanupCallback()
|
|
}))
|
|
})
|
|
});
|
|
})
|
|
|
|
app.get('/getLanguages', async (req, res, next) => {
|
|
try {
|
|
|
|
const languages = (await axios.get(`https://${process.env.AZURE_REGION}.tts.speech.microsoft.com/cognitiveservices/voices/list`, {
|
|
headers: {
|
|
"Ocp-Apim-Subscription-Key": process.env.AZURE_KEY
|
|
}
|
|
})).data
|
|
const grouped = _.groupBy(languages, x=>x.Locale)
|
|
let obj = []
|
|
for (const [key, value] of Object.entries(grouped)){
|
|
const tag = getTag(key)
|
|
obj.push({
|
|
label: `${tag.language.Description[0]} (${tag.region.Description[0]})`,
|
|
options: value.map(x=>{
|
|
return {
|
|
text: `${x.DisplayName} (${x.LocalName}, ${x.Gender}, ${x.VoiceType})`,
|
|
value: x.ShortName
|
|
}
|
|
}).sort((a, b) => a.text > b.text ? 1 : -1)
|
|
})
|
|
}
|
|
res.json(obj.sort((a, b) => a.label > b.label ? 1 : -1))
|
|
} catch(e) {
|
|
next(e)
|
|
}
|
|
})
|
|
|
|
app.post('/speak', async (req, res, next) => {
|
|
const synthesizer = new azureSpeech.SpeechSynthesizer(speechConfig)
|
|
|
|
const splitLines = req.body.targetText.split("\n")
|
|
const language = req.body.selectedSpeechLanguage.split("-").slice(0,2).join("-")
|
|
|
|
let root = builder.create("speak")
|
|
root.att("version","1.0")
|
|
root.att("xmlns", "http://www.w3.org/2001/10/synthesis")
|
|
root.att("xml:lang", language)
|
|
let voice = root.ele("voice").att("name", req.body.selectedSpeechLanguage)
|
|
|
|
for (const line of splitLines) {
|
|
voice.ele("phoneme").att("alphabet","ipa").att("ph",line).t(line)
|
|
}
|
|
|
|
const xml = root.toString()
|
|
|
|
synthesizer.speakSsmlAsync(xml, result => {
|
|
const audioData = result.audioData;
|
|
res.contentType("audio/ogg; codecs=opus").send(Buffer.from(audioData))
|
|
})
|
|
|
|
})
|
|
|
|
module.exports = app
|
|
|