#!/usr/bin/env python
# coding=utf-8
# aeneas is a Python/C library and a set of tools
# to automagically synchronize audio and text (aka forced alignment)
#
# Copyright (C) 2012-2013, Alberto Pettarin (www.albertopettarin.it)
# Copyright (C) 2013-2015, ReadBeyond Srl (www.readbeyond.it)
# Copyright (C) 2015-2017, Alberto Pettarin (www.albertopettarin.it)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Global common functions.
"""
from __future__ import absolute_import
from __future__ import print_function
import datetime
import io
import math
import os
import re
import shutil
import sys
import tempfile
import uuid
from aeneas.exacttiming import TimeValue
import aeneas.globalconstants as gc
# RUNTIME CONSTANTS
# ANSI codes to color output in terminal
ANSI_END = u"\033[0m"
ANSI_ERROR = u"\033[91m"
ANSI_OK = u"\033[92m"
ANSI_WARNING = u"\033[93m"
# timing regex patterns
HHMMSS_MMM_PATTERN = re.compile(r"([0-9]*):([0-9]*):([0-9]*)\.([0-9]*)")
HHMMSS_MMM_PATTERN_COMMA = re.compile(r"([0-9]*):([0-9]*):([0-9]*),([0-9]*)")
# True if running from a frozen binary (e.g., compiled with pyinstaller)
FROZEN = getattr(sys, "frozen", False)
# True if running under Python 2
PY2 = (sys.version_info[0] == 2)
# COMMON FUNCTIONS
[docs]def safe_print(msg):
"""
Safely print a given Unicode string to stdout,
possibly replacing characters non-printable
in the current stdout encoding.
:param string msg: the message
"""
try:
print(msg)
except UnicodeEncodeError:
try:
# NOTE encoding and decoding so that in Python 3 no b"..." is printed
encoded = msg.encode(sys.stdout.encoding, "replace")
decoded = encoded.decode(sys.stdout.encoding, "replace")
print(decoded)
except (UnicodeDecodeError, UnicodeEncodeError):
print(u"[ERRO] An unexpected error happened while printing to stdout.")
print(u"[ERRO] Please check that your file/string encoding matches the shell encoding.")
print(u"[ERRO] If possible, set your shell encoding to UTF-8 and convert any files with legacy encodings.")
[docs]def print_error(msg, color=True):
"""
Print an error message.
:param string msg: the message
:param bool color: if ``True``, print with POSIX color
"""
if color and is_posix():
safe_print(u"%s[ERRO] %s%s" % (ANSI_ERROR, msg, ANSI_END))
else:
safe_print(u"[ERRO] %s" % (msg))
[docs]def print_info(msg, color=True):
"""
Print an info message.
:param string msg: the message
:param bool color: if ``True``, print with POSIX color
"""
safe_print(u"[INFO] %s" % (msg))
[docs]def print_success(msg, color=True):
"""
Print a success message.
:param string msg: the message
:param bool color: if ``True``, print with POSIX color
"""
if color and is_posix():
safe_print(u"%s[INFO] %s%s" % (ANSI_OK, msg, ANSI_END))
else:
safe_print(u"[INFO] %s" % (msg))
[docs]def print_warning(msg, color=True):
"""
Print a warning message.
:param string msg: the message
:param bool color: if ``True``, print with POSIX color
"""
if color and is_posix():
safe_print(u"%s[WARN] %s%s" % (ANSI_WARNING, msg, ANSI_END))
else:
safe_print(u"[WARN] %s" % (msg))
[docs]def uuid_string():
"""
Return a uuid4 as a Unicode string.
:rtype: string
"""
return safe_unicode(str(uuid.uuid4())).lower()
[docs]def custom_tmp_dir():
"""
Return the path of the temporary directory to use.
On POSIX OSes (Linux and OS X), return the value of
:data:`~aeneas.globalconstants.TMP_PATH_DEFAULT_POSIX`
(e.g., ``/tmp/``).
On non-POSIX OSes, return the value of
:data:`~aeneas.globalconstants.TMP_PATH_DEFAULT_NONPOSIX`
(i.e., ``None``), so that ``tempfile``
will use the directory specified by the
environment/user ``TMP`` or ``TEMP`` variable.
:rtype: string
"""
if is_posix():
return gc.TMP_PATH_DEFAULT_POSIX
return gc.TMP_PATH_DEFAULT_NONPOSIX
[docs]def tmp_directory(root=None):
"""
Return the path of a temporary directory created by ``tempfile``.
:param string root: path to the root temporary directory;
if ``None``, the default temporary directory
will be used instead
:rtype: string
"""
if root is None:
root = custom_tmp_dir()
return tempfile.mkdtemp(dir=root)
[docs]def tmp_file(suffix=u"", root=None):
"""
Return a (handler, path) tuple
for a temporary file with given suffix created by ``tempfile``.
:param string suffix: the suffix (e.g., the extension) of the file
:param string root: path to the root temporary directory;
if ``None``, the default temporary directory
will be used instead
:rtype: tuple
"""
if root is None:
root = custom_tmp_dir()
return tempfile.mkstemp(suffix=suffix, dir=root)
[docs]def file_extension(path):
"""
Return the file extension.
Examples: ::
/foo/bar.baz => baz
None => None
:param string path: the file path
:rtype: string
"""
if path is None:
return None
ext = os.path.splitext(os.path.basename(path))[1]
if ext.startswith("."):
ext = ext[1:]
return ext
[docs]def mimetype_from_path(path):
"""
Return a mimetype from the file extension.
:param string path: the file path
:rtype: string
"""
extension = file_extension(path)
if extension is not None:
extension = extension.lower()
if extension in gc.MIMETYPE_MAP:
return gc.MIMETYPE_MAP[extension]
return None
[docs]def file_name_without_extension(path):
"""
Return the file name without extension.
Examples: ::
/foo/bar.baz => bar
/foo/bar => bar
None => None
:param string path: the file path
:rtype: string
"""
if path is None:
return None
return os.path.splitext(os.path.basename(path))[0]
[docs]def datetime_string(time_zone=False):
"""
Return a string representing the current date and time,
in ``YYYY-MM-DDThh:mm:ss`` or ``YYYY-MM-DDThh:mm:ss+hh:mm`` format
:param boolean time_zone: if ``True``, add the time zone offset.
:rtype: string
"""
time = datetime.datetime.now()
template = u"%04d-%02d-%02dT%02d:%02d:%02d"
if time_zone:
template += u"+00:00"
return template % (
time.year,
time.month,
time.day,
time.hour,
time.minute,
time.second
)
[docs]def safe_float(string, default=None):
"""
Safely parse a string into a float.
On error return the ``default`` value.
:param string string: string value to be converted
:param float default: default value to be used in case of failure
:rtype: float
"""
value = default
try:
value = float(string)
except TypeError:
pass
except ValueError:
pass
return value
[docs]def safe_int(string, default=None):
"""
Safely parse a string into an int.
On error return the ``default`` value.
:param string string: string value to be converted
:param int default: default value to be used in case of failure
:rtype: int
"""
value = safe_float(string, default)
if value is not None:
value = int(value)
return value
[docs]def safe_get(dictionary, key, default_value, can_return_none=True):
"""
Safely perform a dictionary get,
returning the default value if the key is not found.
:param dict dictionary: the dictionary
:param string key: the key
:param variant default_value: the default value to be returned
:param bool can_return_none: if ``True``, the function can return ``None``;
otherwise, return ``default_value`` even if the
dictionary lookup succeeded
:rtype: variant
"""
return_value = default_value
try:
return_value = dictionary[key]
if (return_value is None) and (not can_return_none):
return_value = default_value
except (KeyError, TypeError):
# KeyError if key is not present in dictionary
# TypeError if dictionary is None
pass
return return_value
[docs]def norm_join(prefix, suffix):
"""
Join ``prefix`` and ``suffix`` paths
and return the resulting path, normalized.
:param string prefix: the prefix path
:param string suffix: the suffix path
:rtype: string
"""
if (prefix is None) and (suffix is None):
return "."
if prefix is None:
return os.path.normpath(suffix)
if suffix is None:
return os.path.normpath(prefix)
return os.path.normpath(os.path.join(prefix, suffix))
[docs]def config_txt_to_string(string):
"""
Convert the contents of a TXT config file
into the corresponding configuration string ::
key_1=value_1|key_2=value_2|...|key_n=value_n
Leading and trailing blank characters will be stripped
and empty lines (after stripping) will be ignored.
:param string string: the contents of a TXT config file
:rtype: string
"""
if string is None:
return None
pairs = [l.strip() for l in string.splitlines() if len(l.strip()) > 0]
return gc.CONFIG_STRING_SEPARATOR_SYMBOL.join(pairs)
[docs]def config_string_to_dict(string, result=None):
"""
Convert a given configuration string ::
key_1=value_1|key_2=value_2|...|key_n=value_n
into the corresponding dictionary ::
dictionary[key_1] = value_1
dictionary[key_2] = value_2
...
dictionary[key_n] = value_n
:param string string: the configuration string
:rtype: dict
"""
if string is None:
return {}
pairs = string.split(gc.CONFIG_STRING_SEPARATOR_SYMBOL)
return pairs_to_dict(pairs, result)
[docs]def config_xml_to_dict(contents, result, parse_job=True):
"""
Convert the contents of a XML config file
into the corresponding dictionary ::
dictionary[key_1] = value_1
dictionary[key_2] = value_2
...
dictionary[key_n] = value_n
:param bytes contents: the XML configuration contents
:param bool parse_job: if ``True``, parse the job properties;
if ``False``, parse the tasks properties
:rtype: dict (``parse_job=True``) or list of dict (``parse_job=False``)
"""
from lxml import etree
try:
root = etree.fromstring(contents)
pairs = []
if parse_job:
# parse job
for elem in root:
if (elem.tag != gc.CONFIG_XML_TASKS_TAG) and (elem.text is not None):
pairs.append(u"%s%s%s" % (
safe_unicode(elem.tag),
gc.CONFIG_STRING_ASSIGNMENT_SYMBOL,
safe_unicode(elem.text.strip())
))
return pairs_to_dict(pairs)
else:
# parse tasks
output_list = []
for task in root.find(gc.CONFIG_XML_TASKS_TAG):
if task.tag == gc.CONFIG_XML_TASK_TAG:
pairs = []
for elem in task:
if elem.text is not None:
pairs.append(u"%s%s%s" % (
safe_unicode(elem.tag),
gc.CONFIG_STRING_ASSIGNMENT_SYMBOL,
safe_unicode(elem.text.strip())
))
output_list.append(pairs_to_dict(pairs))
return output_list
except:
if result is not None:
result.passed = False
result.add_error("An error occurred while parsing XML file")
if parse_job:
return {}
else:
return []
[docs]def config_dict_to_string(dictionary):
"""
Convert a given config dictionary ::
dictionary[key_1] = value_1
dictionary[key_2] = value_2
...
dictionary[key_n] = value_n
into the corresponding string ::
key_1=value_1|key_2=value_2|...|key_n=value_n
:param dict dictionary: the config dictionary
:rtype: string
"""
parameters = []
for key in dictionary:
parameters.append(u"%s%s%s" % (
key,
gc.CONFIG_STRING_ASSIGNMENT_SYMBOL,
dictionary[key]
))
return gc.CONFIG_STRING_SEPARATOR_SYMBOL.join(parameters)
[docs]def pairs_to_dict(pairs, result=None):
"""
Convert a given list of ``key=value`` strings ::
["key_1=value_1", "key_2=value_2", ..., "key_n=value_n"]
into the corresponding dictionary ::
dictionary[key_1] = value_1
dictionary[key_2] = value_2
...
dictionary[key_n] = value_n
:param list pairs: the list of key=value strings
:rtype: dict
"""
dictionary = {}
for pair in pairs:
if len(pair) > 0:
tokens = pair.split(gc.CONFIG_STRING_ASSIGNMENT_SYMBOL)
if ((len(tokens) == 2) and
(len(tokens[0])) > 0 and
(len(tokens[1]) > 0)):
dictionary[tokens[0]] = tokens[1]
elif result is not None:
result.add_warning("Invalid key=value string: '%s'" % pair)
return dictionary
[docs]def copytree(source_directory, destination_directory, ignore=None):
"""
Recursively copy the contents of a source directory
into a destination directory.
Both directories must exist.
This function does not copy the root directory ``source_directory``
into ``destination_directory``.
Since ``shutil.copytree(src, dst)`` requires ``dst`` not to exist,
we cannot use for our purposes.
Code adapted from http://stackoverflow.com/a/12686557
:param string source_directory: the source directory, already existing
:param string destination_directory: the destination directory, already existing
"""
if os.path.isdir(source_directory):
if not os.path.isdir(destination_directory):
os.makedirs(destination_directory)
files = os.listdir(source_directory)
if ignore is not None:
ignored = ignore(source_directory, files)
else:
ignored = set()
for f in files:
if f not in ignored:
copytree(
os.path.join(source_directory, f),
os.path.join(destination_directory, f),
ignore
)
else:
shutil.copyfile(source_directory, destination_directory)
[docs]def ensure_parent_directory(path, ensure_parent=True):
"""
Ensures the parent directory exists.
:param string path: the path of the file
:param bool ensure_parent: if ``True``, ensure the parent directory of ``path`` exists;
if ``False``, ensure ``path`` exists
:raises: OSError: if the path cannot be created
"""
parent_directory = os.path.abspath(path)
if ensure_parent:
parent_directory = os.path.dirname(parent_directory)
if not os.path.exists(parent_directory):
try:
os.makedirs(parent_directory)
except (IOError, OSError):
raise OSError(u"Directory '%s' cannot be created" % parent_directory)
[docs]def time_from_ttml(string):
"""
Parse the given ``SS.mmms`` string
(TTML values have an "s" suffix, e.g. ``1.234s``)
and return a time value.
:param string string: the string to be parsed
:rtype: :class:`~aeneas.exacttiming.TimeValue`
"""
if (string is None) or (len(string) < 2):
return 0
# strips "s" at the end
string = string[:-1]
return time_from_ssmmm(string)
[docs]def time_to_ttml(time_value):
"""
Format the given time value into a ``SS.mmms`` string
(TTML values have an "s" suffix, e.g. ``1.234s``).
Examples: ::
12 => 12.000s
12.345 => 12.345s
12.345432 => 12.345s
12.345678 => 12.346s
:param float time_value: a time value, in seconds
:rtype: string
"""
if time_value is None:
time_value = 0
return "%ss" % time_to_ssmmm(time_value)
[docs]def time_from_ssmmm(string):
"""
Parse the given ``SS.mmm`` string and return a time value.
:param string string: the string to be parsed
:rtype: :class:`~aeneas.exacttiming.TimeValue`
"""
if (string is None) or (len(string) < 1):
return TimeValue("0.000")
return TimeValue(string)
[docs]def time_to_ssmmm(time_value):
"""
Format the given time value into a ``SS.mmm`` string.
Examples: ::
12 => 12.000
12.345 => 12.345
12.345432 => 12.345
12.345678 => 12.346
:param float time_value: a time value, in seconds
:rtype: string
"""
if time_value is None:
time_value = 0
return "%.3f" % (time_value)
[docs]def time_from_hhmmssmmm(string, decimal_separator="."):
"""
Parse the given ``HH:MM:SS.mmm`` string and return a time value.
:param string string: the string to be parsed
:param string decimal_separator: the decimal separator to be used
:rtype: :class:`~aeneas.exacttiming.TimeValue`
"""
if decimal_separator == ",":
pattern = HHMMSS_MMM_PATTERN_COMMA
else:
pattern = HHMMSS_MMM_PATTERN
v_length = TimeValue("0.000")
try:
match = pattern.search(string)
if match is not None:
v_h = int(match.group(1))
v_m = int(match.group(2))
v_s = int(match.group(3))
v_f = TimeValue("0." + match.group(4))
v_length = v_h * 3600 + v_m * 60 + v_s + v_f
except:
pass
return v_length
[docs]def time_to_hhmmssmmm(time_value, decimal_separator="."):
"""
Format the given time value into a ``HH:MM:SS.mmm`` string.
Examples: ::
12 => 00:00:12.000
12.345 => 00:00:12.345
12.345432 => 00:00:12.345
12.345678 => 00:00:12.346
83 => 00:01:23.000
83.456 => 00:01:23.456
83.456789 => 00:01:23.456
3600 => 01:00:00.000
3612.345 => 01:00:12.345
:param float time_value: a time value, in seconds
:param string decimal_separator: the decimal separator, default ``.``
:rtype: string
"""
if time_value is None:
time_value = 0
tmp = time_value
hours = int(math.floor(tmp / 3600))
tmp -= (hours * 3600)
minutes = int(math.floor(tmp / 60))
tmp -= minutes * 60
seconds = int(math.floor(tmp))
tmp -= seconds
milliseconds = int(math.floor(tmp * 1000))
return "%02d:%02d:%02d%s%03d" % (
hours,
minutes,
seconds,
decimal_separator,
milliseconds
)
[docs]def time_from_srt(string):
"""
Parse the given ``HH:MM:SS,mmm`` string and return a time value.
:param string string: the string to be parsed
:rtype: :class:`~aeneas.exacttiming.TimeValue`
"""
return time_from_hhmmssmmm(string, decimal_separator=",")
[docs]def time_to_srt(time_value):
"""
Format the given time value into a ``HH:MM:SS,mmm`` string,
as used in the SRT format.
Examples: ::
12 => 00:00:12,000
12.345 => 00:00:12,345
12.345432 => 00:00:12,345
12.345678 => 00:00:12,346
83 => 00:01:23,000
83.456 => 00:01:23,456
83.456789 => 00:01:23,456
3600 => 01:00:00,000
3612.345 => 01:00:12,345
:param float time_value: a time value, in seconds
:rtype: string
"""
return time_to_hhmmssmmm(time_value, decimal_separator=",")
[docs]def split_url(url):
"""
Split the given URL ``base#anchor`` into ``(base, anchor)``,
or ``(base, None)`` if no anchor is present.
In case there are two or more ``#`` characters,
return only the first two tokens: ``a#b#c => (a, b)``.
:param string url: the url
:rtype: list of str
"""
if url is None:
return (None, None)
array = url.split("#")
if len(array) == 1:
array.append(None)
return tuple(array[0:2])
[docs]def is_posix():
"""
Return ``True`` if running on a POSIX OS.
:rtype: bool
"""
# from https://docs.python.org/2/library/os.html#os.name
# the registered values of os.name are:
# "posix", "nt", "os2", "ce", "java", "riscos"
return os.name == "posix"
[docs]def is_linux():
"""
Return ``True`` if running on Linux.
:rtype: bool
"""
return (is_posix()) and (os.uname()[0] == "Linux")
[docs]def is_osx():
"""
Return ``True`` if running on Mac OS X (Darwin).
:rtype: bool
"""
return (is_posix()) and (os.uname()[0] == "Darwin")
[docs]def is_windows():
"""
Return ``True`` if running on Windows.
:rtype: bool
"""
return os.name == "nt"
[docs]def is_py2_narrow_build():
"""
Return ``True`` if running on a Python 2 narrow build.
:rtype: bool
"""
return (PY2) and (sys.maxunicode == 65535)
[docs]def fix_slash(path):
"""
On non-POSIX OSes, change the slashes in ``path``
for loading in the browser.
Example: ::
c:\\abc\\def => c:/abc/def
:param string path: the path
:rtype: string
"""
if not is_posix():
path = path.replace("\\", "/")
return path
[docs]def can_run_c_extension(name=None):
"""
Determine whether the given Python C extension loads correctly.
If ``name`` is ``None``, tests all Python C extensions,
and return ``True`` if and only if all load correctly.
:param string name: the name of the Python C extension to test
:rtype: bool
"""
def can_run_cdtw():
""" Python C extension for computing DTW """
try:
import aeneas.cdtw.cdtw
return True
except ImportError:
return False
def can_run_cmfcc():
""" Python C extension for computing MFCC """
try:
import aeneas.cmfcc.cmfcc
return True
except ImportError:
return False
def can_run_cew():
""" Python C extension for synthesizing with eSpeak """
try:
import aeneas.cew.cew
return True
except ImportError:
return False
def can_run_cfw():
""" Python C extension for synthesizing with Festival """
try:
import aeneas.cfw.cfw
return True
except ImportError:
return False
if name == "cdtw":
return can_run_cdtw()
elif name == "cmfcc":
return can_run_cmfcc()
elif name == "cew":
return can_run_cew()
elif name == "cfw":
return can_run_cfw()
else:
# NOTE cfw is still experimental!
return can_run_cdtw() and can_run_cmfcc() and can_run_cew()
[docs]def run_c_extension_with_fallback(
log_function,
extension,
c_function,
py_function,
args,
rconf
):
"""
Run a function calling a C extension, falling back
to a pure Python function if the former does not succeed.
:param function log_function: a logger function
:param string extension: the name of the extension
:param function c_function: the (Python) function calling the C extension
:param function py_function: the (Python) function providing the fallback
:param rconf: the runtime configuration
:type rconf: :class:`aeneas.runtimeconfiguration.RuntimeConfiguration`
:rtype: depends on the extension being called
:raises: RuntimeError: if both the C extension and
the pure Python code did not succeed.
.. versionadded:: 1.4.0
"""
computed = False
if not rconf[u"c_extensions"]:
log_function(u"C extensions disabled")
elif extension not in rconf:
log_function([u"C extension '%s' not recognized", extension])
elif not rconf[extension]:
log_function([u"C extension '%s' disabled", extension])
else:
log_function([u"C extension '%s' enabled", extension])
if c_function is None:
log_function(u"C function is None")
elif can_run_c_extension(extension):
log_function([u"C extension '%s' enabled and it can be loaded", extension])
computed, result = c_function(*args)
else:
log_function([u"C extension '%s' enabled but it cannot be loaded", extension])
if not computed:
if py_function is None:
log_function(u"Python function is None")
else:
log_function(u"Running the pure Python code")
computed, result = py_function(*args)
if not computed:
raise RuntimeError(u"Both the C extension and the pure Python code failed. (Wrong arguments? Input too big?)")
return result
[docs]def file_can_be_read(path):
"""
Return ``True`` if the file at the given ``path`` can be read.
:param string path: the file path
:rtype: bool
.. versionadded:: 1.4.0
"""
if path is None:
return False
try:
with io.open(path, "rb") as test_file:
pass
return True
except (IOError, OSError):
pass
return False
[docs]def file_can_be_written(path):
"""
Return ``True`` if a file can be written at the given ``path``.
:param string path: the file path
:rtype: bool
.. warning:: This function will attempt to open the given ``path``
in write mode, possibly destroying the file previously existing there.
.. versionadded:: 1.4.0
"""
if path is None:
return False
try:
with io.open(path, "wb") as test_file:
pass
delete_file(None, path)
return True
except (IOError, OSError):
pass
return False
[docs]def directory_exists(path):
"""
Return ``True`` if the given ``path`` string
points to an existing directory.
:param string path: the file path
:rtype: bool
"""
if (path is None) or (not os.path.isdir(path)):
return False
return True
[docs]def file_exists(path):
"""
Return ``True`` if the given ``path`` string
points to an existing file.
:param string path: the file path
:rtype: bool
"""
if (path is None) or (not os.path.isfile(path)):
return False
return True
[docs]def file_size(path):
"""
Return the size, in bytes, of the file at the given ``path``.
Return ``-1`` if the file does not exist or cannot be read.
:param string path: the file path
:rtype: int
"""
try:
return os.path.getsize(path)
except OSError:
return -1
[docs]def delete_directory(path):
"""
Safely delete a directory.
:param string path: the file path
"""
if path is not None:
try:
shutil.rmtree(path)
except:
pass
[docs]def close_file_handler(handler):
"""
Safely close the given file handler.
:param object handler: the file handler (as returned by tempfile)
"""
if handler is not None:
try:
os.close(handler)
except:
pass
[docs]def delete_file(handler, path):
"""
Safely delete file.
:param object handler: the file handler (as returned by tempfile)
:param string path: the file path
"""
close_file_handler(handler)
if path is not None:
try:
os.remove(path)
except:
pass
[docs]def relative_path(path, from_file):
"""
Return the relative path of a file or directory, specified
as ``path`` relative to (the parent directory of) ``from_file``.
This method is intented to be called with ``__file__``
as second argument.
The returned path is relative to the current working directory.
If ``path`` is ``None``, return ``None``.
Example: ::
path="res/foo.bar"
from_file="/root/abc/def/ghi.py"
cwd="/root"
=> "abc/def/res/foo.bar"
:param string path: the file path
:param string from_file: the reference file
:rtype: string
"""
if path is None:
return None
abs_path_target = absolute_path(path, from_file)
abs_path_cwd = os.getcwd()
if is_windows():
# NOTE on Windows, if the two paths are on different drives,
# the notion of relative path is not defined:
# return the absolute path of the target instead.
t_drive, t_tail = os.path.splitdrive(abs_path_target)
c_drive, c_tail = os.path.splitdrive(abs_path_cwd)
if t_drive != c_drive:
return abs_path_target
return os.path.relpath(abs_path_target, start=abs_path_cwd)
[docs]def absolute_path(path, from_file):
"""
Return the absolute path of a file or directory, specified
as ``path`` relative to (the parent directory of) ``from_file``.
This method is intented to be called with ``__file__``
as second argument.
If ``path`` is ``None``, return ``None``.
Example: ::
path="res/foo.bar"
from_file="/abc/def/ghi.py"
=> "/abc/def/res/foo.bar"
:param string path: the file path
:param string from_file: the reference file
:rtype: string
"""
if path is None:
return None
current_directory = os.path.dirname(from_file)
target = os.path.join(current_directory, path)
return os.path.abspath(target)
[docs]def read_file_bytes(input_file_path):
"""
Read the file at the given file path
and return its contents as a byte string,
or ``None`` if an error occurred.
:param string input_file_path: the file path
:rtype: bytes
"""
contents = None
try:
with io.open(input_file_path, "rb") as input_file:
contents = input_file.read()
except:
pass
return contents
[docs]def human_readable_number(number, suffix=""):
"""
Format the given number into a human-readable string.
Code adapted from http://stackoverflow.com/a/1094933
:param variant number: the number (int or float)
:param string suffix: the unit of the number
:rtype: string
"""
for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
if abs(number) < 1024.0:
return "%3.1f%s%s" % (number, unit, suffix)
number /= 1024.0
return "%.1f%s%s" % (number, "Y", suffix)
[docs]def is_unicode(string):
"""
Return ``True`` if the given string is a sequence of Unicode code points.
:param variant string: the string to test
:rtype: bool
"""
if PY2:
return isinstance(string, unicode)
return isinstance(string, str)
[docs]def is_bytes(string):
"""
Return ``True`` if the given string is a sequence of bytes.
:param variant string: the string to test
:rtype: bool
"""
if PY2:
return isinstance(string, str)
return isinstance(string, bytes)
[docs]def is_utf8_encoded(bstring):
"""
Return ``True`` if the given byte string can be decoded
into a Unicode string using the UTF-8 decoder.
:param bytes bstring: the string to test
:rtype: bool
"""
try:
bstring.decode("utf-8")
return True
except UnicodeDecodeError:
pass
return False
[docs]def safe_str(string):
"""
Safely return the given Unicode string
from a ``__str__`` function: as a byte string
in Python 2, or as a Unicode string in Python 3.
:param string string: the string to return
:rtype: bytes or string
"""
if string is None:
return None
if PY2:
return string.encode("utf-8")
return string
[docs]def safe_unichr(codepoint):
"""
Safely return a Unicode string of length one,
containing the Unicode character with given codepoint.
:param int codepoint: the codepoint
:rtype: string
"""
if is_py2_narrow_build():
return ("\\U%08x" % codepoint).decode("unicode-escape")
elif PY2:
return unichr(codepoint)
return chr(codepoint)
[docs]def safe_unicode(string):
"""
Safely convert the given string to a Unicode string.
:param variant string: the byte string or Unicode string to convert
:rtype: string
"""
if string is None:
return None
if is_bytes(string):
return string.decode("utf-8")
return string
[docs]def safe_bytes(string):
"""
Safely convert the given string to a bytes string.
:param variant string: the byte string or Unicode string to convert
:rtype: bytes
"""
if string is None:
return None
if is_unicode(string):
return string.encode("utf-8")
return string
[docs]def safe_unicode_stdin(string):
"""
Safely convert the given string to a Unicode string,
decoding using ``sys.stdin.encoding`` if needed.
If running from a frozen binary, ``utf-8`` encoding is assumed.
:param variant string: the byte string or Unicode string to convert
:rtype: string
"""
if string is None:
return None
if is_bytes(string):
if FROZEN:
return string.decode("utf-8")
try:
return string.decode(sys.stdin.encoding)
except UnicodeDecodeError:
return string.decode(sys.stdin.encoding, "replace")
except:
return string.decode("utf-8")
return string
[docs]def object_to_unicode(obj):
"""
Return a sequence of Unicode code points from the given object.
:param object obj: the object
:rtype: string
"""
if PY2:
return unicode(obj)
return str(obj)
[docs]def object_to_bytes(obj):
"""
Return a sequence of bytes from the given object.
:param object obj: the object
:rtype: bytes
"""
if PY2:
return str(obj)
return bytes(obj, encoding="utf-8")
[docs]def bundle_directory():
"""
Return the absolute path of the bundle directory
if running from a frozen binary; otherwise return ``None``.
:rtype: string
"""
if FROZEN:
return sys._MEIPASS
return None