andre@linuxdnn:~/DeepSpeech$ git pull && runipy DeepSpeech.ipynb
Already up-to-date.
10/15/2016 06:00:04 PM INFO: Reading notebook DeepSpeech.ipynb
10/15/2016 06:00:05 PM INFO: Running cell:
import os
import time
import json
import datetime
import tempfile
import subprocess
import numpy as np
from math import ceil
import tensorflow as tf
from util.log import merge_logs
from util.gpu import get_available_gpus
from util.importers.ted_lium import read_data_sets
from util.text import sparse_tensor_value_to_texts, wers
from tensorflow.python.ops import ctc_ops
I tensorflow/stream_executor/dso_loader.cc:116] successfully opened CUDA library libcublas.so.8.0 locally
I tensorflow/stream_executor/dso_loader.cc:116] successfully opened CUDA library libcudnn.so.5 locally
I tensorflow/stream_executor/dso_loader.cc:116] successfully opened CUDA library libcufft.so.8.0 locally
I tensorflow/stream_executor/dso_loader.cc:116] successfully opened CUDA library libcuda.so.1 locally
I tensorflow/stream_executor/dso_loader.cc:116] successfully opened CUDA library libcurand.so.8.0 locally
10/15/2016 06:00:05 PM INFO: Cell returned
10/15/2016 06:00:05 PM INFO: Running cell:
learning_rate = 0.001 # TODO: Determine a reasonable value for this
beta1 = 0.9 # TODO: Determine a reasonable value for this
beta2 = 0.999 # TODO: Determine a reasonable value for this
epsilon = 1e-8 # TODO: Determine a reasonable value for this
training_iters = 15 # TODO: Determine a reasonable value for this
batch_size = 64 # TODO: Determine a reasonable value for this
display_step = 1 # TODO: Determine a reasonable value for this
validation_step = 1 # TODO: Determine a reasonable value for this
checkpoint_step = 5 # TODO: Determine a reasonable value for this
checkpoint_dir = tempfile.gettempdir() # TODO: Determine a reasonable value for this
10/15/2016 06:00:05 PM INFO: Cell returned
10/15/2016 06:00:05 PM INFO: Running cell:
dropout_rate = 0.05 # TODO: Validate this is a reasonable value
This global placeholder will be used for all dropout definitions
dropout_rate_placeholder = tf.placeholder(tf.float32)
The feed_dict used for training employs the given dropout_rate
feed_dict_train = { dropout_rate_placeholder: dropout_rate }
While the feed_dict used for validation, test and train progress reporting employs zero dropout
feed_dict = { dropout_rate_placeholder: 0.0 }
10/15/2016 06:00:06 PM INFO: Cell returned
10/15/2016 06:00:06 PM INFO: Running cell:
relu_clip = 20 # TODO: Validate this is a reasonable value
10/15/2016 06:00:06 PM INFO: Cell returned
10/15/2016 06:00:06 PM INFO: Running cell:
n_input = 26 # TODO: Determine this programatically from the sample rate
10/15/2016 06:00:06 PM INFO: Cell returned
10/15/2016 06:00:06 PM INFO: Running cell:
n_context = 9 # TODO: Determine the optimal value using a validation data set
10/15/2016 06:00:06 PM INFO: Cell returned
10/15/2016 06:00:06 PM INFO: Running cell:
n_hidden_1 = n_input + 2_n_input_n_context # Note: This value was not specified in the original paper
n_hidden_2 = n_input + 2_n_input_n_context # Note: This value was not specified in the original paper
n_hidden_5 = n_input + 2_n_input_n_context # Note: This value was not specified in the original paper
10/15/2016 06:00:06 PM INFO: Cell returned
10/15/2016 06:00:06 PM INFO: Running cell:
n_cell_dim = n_input + 2_n_input_n_context # TODO: Is this a reasonable value
10/15/2016 06:00:06 PM INFO: Cell returned
10/15/2016 06:00:06 PM INFO: Running cell:
n_hidden_3 = 2 * n_cell_dim
10/15/2016 06:00:06 PM INFO: Cell returned
10/15/2016 06:00:06 PM INFO: Running cell:
n_character = 29 # TODO: Determine if this should be extended with other punctuation
10/15/2016 06:00:06 PM INFO: Cell returned
10/15/2016 06:00:06 PM INFO: Running cell:
n_hidden_6 = n_character
10/15/2016 06:00:06 PM INFO: Cell returned
10/15/2016 06:00:06 PM INFO: Running cell:
def variable_on_cpu(name, shape, initializer):
# Use the /cpu:0 device for scoped operations
with tf.device('/cpu:0'):
# Create or get apropos variable
var = tf.get_variable(name=name, shape=shape, initializer=initializer)
return var
10/15/2016 06:00:06 PM INFO: Cell returned
10/15/2016 06:00:06 PM INFO: Running cell:
def BiRNN(batch_x, n_steps):
# Input shape: [batch_size, n_steps, n_input + 2_n_input_n_context]
batch_x = tf.transpose(batch_x, [1, 0, 2]) # Permute n_steps and batch_size
# Reshape to prepare input for first layer
batch_x = tf.reshape(batch_x, [-1, n_input + 2_n_input_n_context]) # (n_steps_batch_size, n_input + 2_n_input*n_context)
#Hidden layer with clipped RELU activation and dropout
b1 = variable_on_cpu('b1', [n_hidden_1], tf.random_normal_initializer())
h1 = variable_on_cpu('h1', [n_input + 2*n_input*n_context, n_hidden_1], tf.random_normal_initializer())
layer_1 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(batch_x, h1), b1)), relu_clip)
layer_1 = tf.nn.dropout(layer_1, (1.0 - dropout_rate_placeholder))
#Hidden layer with clipped RELU activation and dropout
b2 = variable_on_cpu('b2', [n_hidden_2], tf.random_normal_initializer())
h2 = variable_on_cpu('h2', [n_hidden_1, n_hidden_2], tf.random_normal_initializer())
layer_2 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_1, h2), b2)), relu_clip)
layer_2 = tf.nn.dropout(layer_2, (1.0 - dropout_rate_placeholder))
#Hidden layer with clipped RELU activation and dropout
b3 = variable_on_cpu('b3', [n_hidden_3], tf.random_normal_initializer())
h3 = variable_on_cpu('h3', [n_hidden_2, n_hidden_3], tf.random_normal_initializer())
layer_3 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(layer_2, h3), b3)), relu_clip)
layer_3 = tf.nn.dropout(layer_3, (1.0 - dropout_rate_placeholder))
# Define lstm cells with tensorflow
# Forward direction cell
lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_cell_dim, forget_bias=1.0)
# Backward direction cell
lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_cell_dim, forget_bias=1.0)
# Split data because rnn cell needs a list of inputs for the BRNN inner loop
layer_3 = tf.split(0, n_steps, layer_3)
# Get lstm cell output
outputs, output_state_fw, output_state_bw = tf.nn.bidirectional_rnn(cell_fw=lstm_fw_cell,
cell_bw=lstm_bw_cell,
inputs=layer_3,
dtype=tf.float32)
# Reshape outputs from a list of n_steps tensors each of shape [batch_size, 2*n_cell_dim]
# to a single tensor of shape [n_steps*batch_size, 2*n_cell_dim]
outputs = tf.pack(outputs)
outputs = tf.reshape(outputs, [-1, 2*n_cell_dim])
#Hidden layer with clipped RELU activation and dropout
b5 = variable_on_cpu('b5', [n_hidden_5], tf.random_normal_initializer())
h5 = variable_on_cpu('h5', [(2 * n_cell_dim), n_hidden_5], tf.random_normal_initializer())
layer_5 = tf.minimum(tf.nn.relu(tf.add(tf.matmul(outputs, h5), b5)), relu_clip)
layer_5 = tf.nn.dropout(layer_5, (1.0 - dropout_rate_placeholder))
#Hidden layer of logits
b6 = variable_on_cpu('b6', [n_hidden_6], tf.random_normal_initializer())
h6 = variable_on_cpu('h6', [n_hidden_5, n_hidden_6], tf.random_normal_initializer())
layer_6 = tf.add(tf.matmul(layer_5, h6), b6)
# Reshape layer_6 from a tensor of shape [n_steps*batch_size, n_hidden_6]
# to a tensor of shape [batch_size, n_steps, n_hidden_6]
layer_6 = tf.reshape(layer_6, [n_steps, batch_size, n_hidden_6])
layer_6 = tf.transpose(layer_6, [1, 0, 2]) # Permute n_steps and batch_size
# Return layer_6
return layer_6
10/15/2016 06:00:07 PM INFO: Cell returned
10/15/2016 06:00:07 PM INFO: Running cell:
def calculate_accuracy_and_loss(batch_set):
# Obtain the next batch of data
batch_x, batch_y, n_steps = batch_set.next_batch()
# Set batch_seq_len for the batch
batch_seq_len = batch_x.shape[0] * [n_steps]
# Calculate the logits of the batch using BiRNN
logits = BiRNN(batch_x, n_steps)
# CTC loss requires the logits be time major
logits = tf.transpose(logits, [1, 0, 2])
# Compute the CTC loss
total_loss = ctc_ops.ctc_loss(logits, batch_y, batch_seq_len)
# Calculate the average loss across the batch
avg_loss = tf.reduce_mean(total_loss)
# Beam search decode the batch
decoded, _ = ctc_ops.ctc_beam_search_decoder(logits, batch_seq_len)
# Compute the edit (Levenshtein) distance
distance = tf.edit_distance(tf.cast(decoded[0], tf.int32), batch_y)
# Compute the accuracy
accuracy = tf.reduce_mean(distance)
# Return results to the caller
return avg_loss, accuracy, decoded, batch_y
10/15/2016 06:00:07 PM INFO: Cell returned
10/15/2016 06:00:07 PM INFO: Running cell:
def create_optimizer():
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
beta1=beta1,
beta2=beta2,
epsilon=epsilon)
return optimizer
10/15/2016 06:00:07 PM INFO: Cell returned
10/15/2016 06:00:07 PM INFO: Running cell:
Get a list of the available gpu's ['/gpu:0', '/gpu:1'...]
available_devices = get_available_gpus()
If there are no GPU's use the CPU
if 0 == len(available_devices):
available_devices = ['/cpu:0']
I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
I tensorflow/core/common_runtime/gpu/gpu_device.cc:951] Found device 0 with properties:
name: GeForce GTX 950
major: 5 minor: 2 memoryClockRate (GHz) 1.405
pciBusID 0000:01:00.0
Total memory: 1.95GiB
Free memory: 1.92GiB
I tensorflow/core/common_runtime/gpu/gpu_device.cc:972] DMA: 0
I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] 0: Y
I tensorflow/core/common_runtime/gpu/gpu_device.cc:1041] Creating TensorFlow device (/gpu:0) -> (device: 0, name: GeForce GTX 950, pci bus id: 0000:01:00.0)
10/15/2016 06:00:07 PM INFO: Cell returned
10/15/2016 06:00:07 PM INFO: Running cell:
def get_tower_results(batch_set, optimizer=None):
# Tower decodings to return
tower_decodings = []
# Tower labels to return
tower_labels = []
# Tower gradients to return
tower_gradients = []
# Loop over available_devices
for i in xrange(len(available_devices)):
# Execute operations of tower i on device i
with tf.device(available_devices[i]):
# Create a scope for all operations of tower i
with tf.name_scope('tower_%d' % i) as scope:
# Calculate the avg_loss and accuracy and retrieve the decoded
# batch along with the original batch's labels (Y) of this tower
avg_loss, accuracy, decoded, labels = calculate_accuracy_and_loss(batch_set)
# Allow for variables to be re-used by the next tower
tf.get_variable_scope().reuse_variables()
# Retain tower's decoded batch
tower_decodings.append(decoded)
# Retain tower's labels (Y)
tower_labels.append(labels)
# If we are in training, there will be an optimizer given and
# only then we will compute and retain gradients on base of the loss
if optimizer is not None:
# Compute gradients for model parameters using tower's mini-batch
gradients = optimizer.compute_gradients(avg_loss)
# Retain tower's gradients
tower_gradients.append(gradients)
# Return results to caller
return tower_decodings, tower_labels, tower_gradients, avg_loss, accuracy
10/15/2016 06:00:07 PM INFO: Cell returned
10/15/2016 06:00:07 PM INFO: Running cell:
def average_gradients(tower_gradients):
# List of average gradients to return to the caller
average_grads = []
# Loop over gradient/variable pairs from all towers
for grad_and_vars in zip(*tower_gradients):
# Introduce grads to store the gradients for the current variable
grads = []
# Loop over the gradients for the current variable
for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g)
# Average over the 'tower' dimension
grad = tf.concat(0, grads)
grad = tf.reduce_mean(grad, 0)
# Create a gradient/variable tuple for the current variable with its average gradient
grad_and_var = (grad, grad_and_vars[0][1])
# Add the current tuple to average_grads
average_grads.append(grad_and_var)
#Return result to caller
return average_grads
10/15/2016 06:00:08 PM INFO: Cell returned
10/15/2016 06:00:08 PM INFO: Running cell:
def apply_gradients(optimizer, average_grads):
apply_gradient_op = optimizer.apply_gradients(average_grads)
return apply_gradient_op
10/15/2016 06:00:08 PM INFO: Cell returned
10/15/2016 06:00:08 PM INFO: Running cell:
def log_variable(variable, gradient=None):
name = variable.name
mean = tf.reduce_mean(variable)
tf.scalar_summary(name + '/mean', mean)
tf.scalar_summary(name + '/sttdev', tf.sqrt(tf.reduce_mean(tf.square(variable - mean))))
tf.scalar_summary(name + '/max', tf.reduce_max(variable))
tf.scalar_summary(name + '/min', tf.reduce_min(variable))
tf.histogram_summary(name, variable)
if gradient is not None:
if isinstance(gradient, tf.IndexedSlices):
grad_values = gradient.values
else:
grad_values = gradient
if grad_values is not None:
tf.histogram_summary(name + "/gradients", grad_values)
10/15/2016 06:00:08 PM INFO: Cell returned
10/15/2016 06:00:08 PM INFO: Running cell:
def log_grads_and_vars(grads_and_vars):
for gradient, variable in grads_and_vars:
log_variable(variable, gradient=gradient)
10/15/2016 06:00:08 PM INFO: Cell returned
10/15/2016 06:00:08 PM INFO: Running cell:
logs_dir = "logs"
log_dir = '%s/%s' % (logs_dir, time.strftime("%Y%m%d-%H%M%S"))
def get_git_revision_hash():
return subprocess.check_output(['git', 'rev-parse', 'HEAD']).strip()
def get_git_branch():
return subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
10/15/2016 06:00:08 PM INFO: Cell returned
10/15/2016 06:00:08 PM INFO: Running cell:
def decode_batch(data_set):
# Get gradients for each tower (Runs across all GPU's)
tower_decodings, tower_labels, _, _, _ = get_tower_results(data_set)
return tower_decodings, tower_labels
10/15/2016 06:00:08 PM INFO: Cell returned
10/15/2016 06:00:08 PM INFO: Running cell:
def calculate_wer(session, tower_decodings, tower_labels):
originals = []
results = []
# Normalization
tower_decodings = [j for i in tower_decodings for j in i]
# Iterating over the towers
for i in range(len(tower_decodings)):
decoded, labels = session.run([tower_decodings[i], tower_labels[i]], feed_dict)
originals.extend(sparse_tensor_value_to_texts(labels))
results.extend(sparse_tensor_value_to_texts(decoded))
# Pairwise calculation of all rates
rates, mean = wers(originals, results)
return zip(originals, results, rates), mean
10/15/2016 06:00:08 PM INFO: Cell returned
10/15/2016 06:00:08 PM INFO: Running cell:
def print_wer_report(session, caption, tower_decodings, tower_labels, show_example=True):
items, mean = calculate_wer(session, tower_decodings, tower_labels)
print "%s WER: %f09" % (caption, mean)
if len(items) > 0 and show_example:
print "Example (WER = %f09)" % items[0][2]
print " - source: "%s"" % items[0][0]
print " - result: "%s"" % items[0][1]
return items, mean
10/15/2016 06:00:08 PM INFO: Cell returned
10/15/2016 06:00:08 PM INFO: Running cell:
def train(session, data_sets):
# Calculate the total number of batches
total_batches = data_sets.train.total_batches
# Create optimizer
optimizer = create_optimizer()
# Get gradients for each tower (Runs across all GPU's)
tower_decodings, tower_labels, tower_gradients, tower_loss, accuracy = \
get_tower_results(data_sets.train, optimizer)
# Validation step preparation
validation_tower_decodings, validation_tower_labels = decode_batch(data_sets.dev)
# Average tower gradients
avg_tower_gradients = average_gradients(tower_gradients)
# Add logging of averaged gradients
log_grads_and_vars(avg_tower_gradients)
# Apply gradients to modify the model
apply_gradient_op = apply_gradients(optimizer, avg_tower_gradients)
# Create a saver to checkpoint the model
saver = tf.train.Saver(tf.all_variables())
# Prepare tensor board logging
merged = tf.merge_all_summaries()
writer = tf.train.SummaryWriter(log_dir, session.graph)
# Init all variables in session
session.run(tf.initialize_all_variables())
# Init recent word error rate levels
last_train_wer = 0.0
last_validation_wer = 0.0
# Loop over the data set for training_epochs epochs
for epoch in range(training_iters):
# Define total accuracy for the epoch
total_accuracy = 0
# Validation step
if epoch % validation_step == 0:
_, last_validation_wer = print_wer_report(session, "Validation", validation_tower_decodings, validation_tower_labels)
print
# Loop over the batches
for batch in range(int(ceil(float(total_batches)/len(available_devices)))):
# Compute the average loss for the last batch
_, batch_avg_loss = session.run([apply_gradient_op, tower_loss], feed_dict_train)
# Add batch to total_accuracy
total_accuracy += session.run(accuracy, feed_dict_train)
# Log all variable states in current step
step = epoch * total_batches + batch * len(available_devices)
summary_str = session.run(merged, feed_dict_train)
writer.add_summary(summary_str, step)
writer.flush()
# Print progress message
if epoch % display_step == 0:
print "Epoch:", '%04d' % (epoch+1), "avg_cer=", "{:.9f}".format((total_accuracy / total_batches))
_, last_train_wer = print_wer_report(session, "Training", tower_decodings, tower_labels)
print
# Checkpoint the model
if (epoch % checkpoint_step == 0) or (epoch == training_iters - 1):
checkpoint_path = os.path.join(checkpoint_dir, 'model.ckpt')
print "Checkpointing in directory", "%s" % checkpoint_dir
saver.save(session, checkpoint_path, global_step=epoch)
print
# Indicate optimization has concluded
print "Optimization Finished!"
return last_train_wer, last_validation_wer
10/15/2016 06:00:08 PM INFO: Cell returned
10/15/2016 06:00:08 PM INFO: Running cell:
Define CPU as device on which the muti-gpu training is orchestrated
with tf.device('/gpu:0'):
# Obtain ted lium data
ted_lium = read_data_sets(tf.get_default_graph(), './data/ted', batch_size, n_input, n_context)
# Create session in which to execute
session = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True))
# Take start time for time measurement
time_started = datetime.datetime.utcnow()
# Train the network
last_train_wer, last_validation_wer = train(session, ted_lium)
# Take final time for time measurement
time_finished = datetime.datetime.utcnow()
# Calculate duration in seconds
duration = time_finished - time_started
duration = duration.days * 86400 + duration.seconds
10/15/2016 11:52:33 PM INFO: Cell raised uncaught exception:
InvalidArgumentError Traceback (most recent call last)
in ()
2 with tf.device('/gpu:0'):
3 # Obtain ted lium data
----> 4 ted_lium = read_data_sets(tf.get_default_graph(), './data/ted', batch_size, n_input, n_context)
5
6 # Create session in which to execute
/home/andre/DeepSpeech/util/importers/ted_lium.pyc in read_data_sets(graph, data_dir, batch_size, numcep, numcontext, thread_count)
114 TED_DATA = "TEDLIUM_release2.tar.gz"
115 TED_DATA_URL = "http://www.openslr.org/resources/19/TEDLIUM_release2.tar.gz"
--> 116 local_file = base.maybe_download(TED_DATA, data_dir, TED_DATA_URL)
117
118 # Conditionally extract TED data
/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/learn/python/learn/datasets/base.pyc in maybe_download(filename, work_directory, source_url)
140 temp_file_name = tmpfile.name
141 urllib.request.urlretrieve(source_url, temp_file_name)
--> 142 gfile.Copy(temp_file_name, filepath)
143 with gfile.GFile(filepath) as f:
144 size = f.size()
/usr/local/lib/python2.7/dist-packages/tensorflow/python/lib/io/file_io.pyc in copy(oldpath, newpath, overwrite)
299 with errors.raise_exception_on_not_ok_status() as status:
300 pywrap_tensorflow.CopyFile(
--> 301 compat.as_bytes(oldpath), compat.as_bytes(newpath), overwrite, status)
302
303
/usr/lib/python2.7/contextlib.pyc in exit(self, type, value, traceback)
22 if type is None:
23 try:
---> 24 self.gen.next()
25 except StopIteration:
26 return
/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/errors.pyc in raise_exception_on_not_ok_status()
461 None, None,
462 compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 463 pywrap_tensorflow.TF_GetCode(status))
464 finally:
465 pywrap_tensorflow.TF_DeleteStatus(status)
InvalidArgumentError: /tmp/tmpnlUgej
10/15/2016 11:52:33 PM INFO: Shutdown kernel
10/15/2016 11:52:36 PM WARNING: Exiting with nonzero exit status
andre@linuxdnn:/DeepSpeech$
andre@linuxdnn:/DeepSpeech$ git pull && runipy DeepSpeech.ipynb