#!/usr/bin/env python
# coding=utf-8
# aeneas is a Python/C library and a set of tools
# to automagically synchronize audio and text (aka forced alignment)
#
# Copyright (C) 2012-2013, Alberto Pettarin (www.albertopettarin.it)
# Copyright (C) 2013-2015, ReadBeyond Srl (www.readbeyond.it)
# Copyright (C) 2015-2017, Alberto Pettarin (www.albertopettarin.it)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
This module contains the following classes:
* :class:`aeneas.cewsubprocess.CEWSubprocess` which is an
helper class executes the :mod:`aeneas.cew` C extension
in a separate process via ``subprocess``.
This module works around a problem with the ``eSpeak`` library,
which seems to generate different audio data for the same
input parameters/text, when run multiple times in the same process.
See the following discussions for details:
#. https://groups.google.com/d/msg/aeneas-forced-alignment/NLbtSRf2_vg/mMHuTQiFEgAJ
#. https://sourceforge.net/p/espeak/mailman/message/34861696/
.. warning:: This module might be removed in a future version.
.. versionadded:: 1.5.0
"""
from __future__ import absolute_import
from __future__ import print_function
import io
import subprocess
import sys
from aeneas.exacttiming import TimeValue
from aeneas.logger import Loggable
from aeneas.runtimeconfiguration import RuntimeConfiguration
import aeneas.globalfunctions as gf
[docs]class CEWSubprocess(Loggable):
"""
This helper class executes the ``aeneas.cew`` C extension
in a separate process by running
the :func:`aeneas.cewsubprocess.CEWSubprocess.main` function
via ``subprocess``.
:param rconf: a runtime configuration
:type rconf: :class:`~aeneas.runtimeconfiguration.RuntimeConfiguration`
:param logger: the logger object
:type logger: :class:`~aeneas.logger.Logger`
"""
TAG = u"CEWSubprocess"
[docs] def synthesize_multiple(self, audio_file_path, c_quit_after, c_backwards, u_text):
"""
Synthesize the text contained in the given fragment list
into a ``wav`` file.
:param string audio_file_path: the path to the output audio file
:param float c_quit_after: stop synthesizing as soon as
reaching this many seconds
:param bool c_backwards: synthesizing from the end of the text file
:param object u_text: a list of ``(voice_code, text)`` tuples
:rtype: tuple ``(sample_rate, synthesized, intervals)``
"""
self.log([u"Audio file path: '%s'", audio_file_path])
self.log([u"c_quit_after: '%.3f'", c_quit_after])
self.log([u"c_backwards: '%d'", c_backwards])
text_file_handler, text_file_path = gf.tmp_file()
data_file_handler, data_file_path = gf.tmp_file()
self.log([u"Temporary text file path: '%s'", text_file_path])
self.log([u"Temporary data file path: '%s'", data_file_path])
self.log(u"Populating the text file...")
with io.open(text_file_path, "w", encoding="utf-8") as tmp_text_file:
for f_voice_code, f_text in u_text:
tmp_text_file.write(u"%s %s\n" % (f_voice_code, f_text))
self.log(u"Populating the text file... done")
arguments = [
self.rconf[RuntimeConfiguration.CEW_SUBPROCESS_PATH],
"-m",
"aeneas.cewsubprocess",
"%.3f" % c_quit_after,
"%d" % c_backwards,
text_file_path,
audio_file_path,
data_file_path
]
self.log([u"Calling with arguments '%s'", u" ".join(arguments)])
proc = subprocess.Popen(
arguments,
stdout=subprocess.PIPE,
stdin=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True)
proc.communicate()
self.log(u"Reading output data...")
with io.open(data_file_path, "r", encoding="utf-8") as data_file:
lines = data_file.read().splitlines()
sr = int(lines[0])
sf = int(lines[1])
intervals = []
for line in lines[2:]:
values = line.split(u" ")
if len(values) == 2:
intervals.append((TimeValue(values[0]), TimeValue(values[1])))
self.log(u"Reading output data... done")
self.log(u"Deleting text and data files...")
gf.delete_file(text_file_handler, text_file_path)
gf.delete_file(data_file_handler, data_file_path)
self.log(u"Deleting text and data files... done")
return (sr, sf, intervals)
[docs]def main():
"""
Run ``aeneas.cew``, reading input text from file and writing audio and interval data to file.
"""
# make sure we have enough parameters
if len(sys.argv) < 6:
print("You must pass five arguments: QUIT_AFTER BACKWARDS TEXT_FILE_PATH AUDIO_FILE_PATH DATA_FILE_PATH")
return 1
# read parameters
c_quit_after = float(sys.argv[1]) # NOTE: cew needs float, not TimeValue
c_backwards = int(sys.argv[2])
text_file_path = sys.argv[3]
audio_file_path = sys.argv[4]
data_file_path = sys.argv[5]
# read (voice_code, text) from file
s_text = []
with io.open(text_file_path, "r", encoding="utf-8") as text:
for line in text.readlines():
# NOTE: not using strip() to avoid removing trailing blank characters
line = line.replace(u"\n", u"").replace(u"\r", u"")
idx = line.find(" ")
if idx > 0:
f_voice_code = line[:idx]
f_text = line[(idx + 1):]
s_text.append((f_voice_code, f_text))
# convert to bytes/unicode as required by subprocess
c_text = []
if gf.PY2:
for f_voice_code, f_text in s_text:
c_text.append((gf.safe_bytes(f_voice_code), gf.safe_bytes(f_text)))
else:
for f_voice_code, f_text in s_text:
c_text.append((gf.safe_unicode(f_voice_code), gf.safe_unicode(f_text)))
try:
import aeneas.cew.cew
sr, sf, intervals = aeneas.cew.cew.synthesize_multiple(
audio_file_path,
c_quit_after,
c_backwards,
c_text
)
with io.open(data_file_path, "w", encoding="utf-8") as data:
data.write(u"%d\n" % (sr))
data.write(u"%d\n" % (sf))
data.write(u"\n".join([u"%.3f %.3f" % (i[0], i[1]) for i in intervals]))
except Exception as exc:
print(u"Unexpected error: %s" % str(exc))
if __name__ == "__main__":
main()