Comments (3)
I can't tell why without seeing the code.
from effectivetensorflow.
BELOW ARE THE MULIT GPU CODE:
#data parallisim
import numpy as np
import tensorflow as tf
import time
start_time=time.time()
def make_parallel(fn, num_gpus, **kwargs):
in_splits = {}
for k, v in kwargs.items():
in_splits[k] = tf.split(v, num_gpus)
out_split = []
for i in range(num_gpus):
with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)):
with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0):
out_split.append(fn(**{k : v[i] for k, v in in_splits.items()}))
return tf.concat(out_split, axis=0)
def model(x, y):
w = tf.get_variable("W", shape=[3, 1])
f = tf.stack([tf.square(x), x, tf.ones_like(x)], 1)
yhat = tf.squeeze(tf.matmul(f, w), 1)
loss = tf.square(yhat - y)
return loss
x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)
#vvvvvvvvvvvvvv!!!parallel!!!vvvvvvvvvvvvvvvvvvvvvv#
loss = make_parallel(model, 4, x=x, y=y)
#^^^^^^^^^^^^^^!!!parallel!!!^^^^^^^^^^^^^^^^^^^^^^#
#vvvvvvvvvvvvvv!!!parallel!!!vvvvvvvvvvvvvvvvvvvvvv#
train_op = tf.train.AdamOptimizer(0.1).minimize(
tf.reduce_mean(loss),
colocate_gradients_with_ops=True)
#^^^^^^^^^^^^^^!!!parallel!!!^^^^^^^^^^^^^^^^^^^^^^#
def generate_data():
x_val = np.random.uniform(-10.0, 10.0, size=1024 * 1024 * 100)
y_val = 5 * np.square(x_val) + 3
return x_val, y_val
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(10):
print "%d %.4f" % (i,(time.time()-start_time))
x_val, y_val = generate_data()
_, loss_val = sess.run([train_op, loss], {x: x_val, y: y_val})
_, loss_val = sess.run([train_op, loss], {x: x_val, y: y_val})
print(sess.run(tf.contrib.framework.get_variables_by_name("W")))
print loss_val
end_time = time.time()
print "time: %.4f" % (end_time-start_time)
BELOW ARE THE SINGLE GPU CODE
# before data parallisim
import numpy as np
import tensorflow as tf
import time
start_time=time.time()
def model(x, y):
w = tf.get_variable("W", shape=[3, 1])
f = tf.stack([tf.square(x), x, tf.ones_like(x)], 1)
yhat = tf.squeeze(tf.matmul(f, w), 1)
loss = tf.square(yhat - y)
return loss
x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)
loss = model(x, y)
train_op = tf.train.AdamOptimizer(0.1).minimize(
tf.reduce_mean(loss))
def generate_data():
x_val = np.random.uniform(-10.0, 10.0, size=1024 * 1024 * 100)
y_val = 5 * np.square(x_val) + 3
return x_val, y_val
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(10):
print "%d %.4f" % (i,(time.time()-start_time))
x_val, y_val = generate_data()
_, loss_val = sess.run([train_op, loss], {x: x_val, y: y_val})
_, loss_val = sess.run([train_op, loss], {x: x_val, y: y_val})
print(sess.run(tf.contrib.framework.get_variables_by_name("W")))
print loss_val
end_time = time.time()
print "time: %.4f" % (end_time-start_time)
from effectivetensorflow.
This is not the correct way to evaluate the runtime performance. I fixed it:
# before data parallisim
import numpy as np
import tensorflow as tf
import time
tf.reset_default_graph()
def make_parallel(fn, num_gpus, **kwargs):
in_splits = {}
for k, v in kwargs.items():
in_splits[k] = tf.split(v, num_gpus)
out_split = []
for i in range(num_gpus):
with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)):
with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0):
out_split.append(fn(**{k : v[i] for k, v in in_splits.items()}))
return tf.concat(out_split, axis=0)
def model(x, y):
w = tf.get_variable("W", shape=[3, 1])
f = tf.stack([tf.square(x), x, tf.ones_like(x)], 1)
yhat = tf.squeeze(tf.matmul(f, w), 1)
loss = tf.square(yhat - y)
return loss
x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)
loss = model(x, y)
# loss = make_parallel(model, 2, x=x, y=y)
train_op = tf.train.AdamOptimizer(0.1).minimize(
tf.reduce_mean(loss),
colocate_gradients_with_ops=True)
def generate_data():
x_val = np.random.uniform(-10.0, 10.0, size=1024 * 1024 * 100)
y_val = 5 * np.square(x_val) + 3
return x_val, y_val
sess = tf.Session()
sess.run(tf.global_variables_initializer())
diffs = []
for i in range(10):
x_val, y_val = generate_data()
start_time=time.time()
_, loss_val = sess.run([train_op, loss], {x: x_val, y: y_val})
diff = time.time() - start_time
diffs.append(diff)
print "%d %.4f" % (i,(diff))
_, loss_val = sess.run([train_op, loss], {x: x_val, y: y_val})
print(sess.run(tf.contrib.framework.get_variables_by_name("W")))
print loss_val
print "time: %.4f" % np.median(diffs)
This is the result:
1 GPU: 0.7662
2 GPUs: 0.6438
Furthermore this model is too simple. You'd see much larger gains if your model had more computation. The current bottleneck is mostly data transfer between cpu/gpu.
from effectivetensorflow.
Related Issues (18)
- tf.name_scope & variables HOT 1
- Explain why tf.nn.softmax isn't used for entropy HOT 2
- TensorFlow data input HOT 1
- Avoiding blocking of processes due to lack of data HOT 2
- Why use static shapes while converting the Tensor of rank 3 to rank 2? HOT 2
- Fix import of TFRecordDataset HOT 1
- typo? HOT 1
- how does the `get_shape` function work with placeholders? HOT 4
- Mistake in "Scopes and when to use them" HOT 1
- Detailed comments. HOT 1
- tf.AUTO_REUSE work with tf.layers.conv2d HOT 1
- Where are the trainable variables placed in the "Multi-GPU processing with data parallelism " ? HOT 1
- Multi-GPU code HOT 1
- Please add a license to this repo HOT 5
- Mistake in "broadcasting good and ugly" HOT 1
- Mistake in “Broadcasting the good and the ugly” HOT 1
- Check your gradients with tf.compute_gradient_error - softmax applied twice. HOT 1
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from effectivetensorflow.