GithubHelp home page GithubHelp logo

Comments (3)

vahidk avatar vahidk commented on May 5, 2024

I can't tell why without seeing the code.

from effectivetensorflow.

realbns2008 avatar realbns2008 commented on May 5, 2024

BELOW ARE THE MULIT GPU CODE:

#data parallisim

import numpy as np
import tensorflow as tf
import time
start_time=time.time()

def make_parallel(fn, num_gpus, **kwargs):
    in_splits = {}
    for k, v in kwargs.items():
        in_splits[k] = tf.split(v, num_gpus)

    out_split = []
    for i in range(num_gpus):
        with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)):
            with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0):
                out_split.append(fn(**{k : v[i] for k, v in in_splits.items()}))

    return tf.concat(out_split, axis=0)

def model(x, y):
    w = tf.get_variable("W", shape=[3, 1])

    f = tf.stack([tf.square(x), x, tf.ones_like(x)], 1)
    yhat = tf.squeeze(tf.matmul(f, w), 1)
    loss = tf.square(yhat - y)
    return loss

x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)

#vvvvvvvvvvvvvv!!!parallel!!!vvvvvvvvvvvvvvvvvvvvvv#
loss = make_parallel(model, 4, x=x, y=y)
#^^^^^^^^^^^^^^!!!parallel!!!^^^^^^^^^^^^^^^^^^^^^^#

#vvvvvvvvvvvvvv!!!parallel!!!vvvvvvvvvvvvvvvvvvvvvv#
train_op = tf.train.AdamOptimizer(0.1).minimize(
    tf.reduce_mean(loss),
    colocate_gradients_with_ops=True)
#^^^^^^^^^^^^^^!!!parallel!!!^^^^^^^^^^^^^^^^^^^^^^#

def generate_data():
    x_val = np.random.uniform(-10.0, 10.0, size=1024 * 1024 * 100)
    y_val = 5 * np.square(x_val) + 3
    return x_val, y_val

sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(10):
    print "%d %.4f" % (i,(time.time()-start_time))
    x_val, y_val = generate_data()
    _, loss_val = sess.run([train_op, loss], {x: x_val, y: y_val})

_, loss_val = sess.run([train_op, loss], {x: x_val, y: y_val})
print(sess.run(tf.contrib.framework.get_variables_by_name("W")))
print loss_val
end_time = time.time()
print "time: %.4f" % (end_time-start_time)

BELOW ARE THE SINGLE GPU CODE

# before data parallisim

import numpy as np
import tensorflow as tf
import time
start_time=time.time()

def model(x, y):
    w = tf.get_variable("W", shape=[3, 1])

    f = tf.stack([tf.square(x), x, tf.ones_like(x)], 1)
    yhat = tf.squeeze(tf.matmul(f, w), 1)
    loss = tf.square(yhat - y)
    return loss

x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)

loss = model(x, y)

train_op = tf.train.AdamOptimizer(0.1).minimize(
    tf.reduce_mean(loss))

def generate_data():
    x_val = np.random.uniform(-10.0, 10.0, size=1024 * 1024 * 100)
    y_val = 5 * np.square(x_val) + 3
    return x_val, y_val

sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(10):
    print "%d %.4f" % (i,(time.time()-start_time))
    x_val, y_val = generate_data()
    _, loss_val = sess.run([train_op, loss], {x: x_val, y: y_val})

_, loss_val = sess.run([train_op, loss], {x: x_val, y: y_val})
print(sess.run(tf.contrib.framework.get_variables_by_name("W")))
print loss_val
end_time = time.time()
print "time: %.4f" % (end_time-start_time)

from effectivetensorflow.

vahidk avatar vahidk commented on May 5, 2024

This is not the correct way to evaluate the runtime performance. I fixed it:

# before data parallisim

import numpy as np
import tensorflow as tf
import time

tf.reset_default_graph()

def make_parallel(fn, num_gpus, **kwargs):
    in_splits = {}
    for k, v in kwargs.items():
        in_splits[k] = tf.split(v, num_gpus)

    out_split = []
    for i in range(num_gpus):
        with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)):
            with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0):
                out_split.append(fn(**{k : v[i] for k, v in in_splits.items()}))

    return tf.concat(out_split, axis=0)

def model(x, y):
    w = tf.get_variable("W", shape=[3, 1])

    f = tf.stack([tf.square(x), x, tf.ones_like(x)], 1)
    yhat = tf.squeeze(tf.matmul(f, w), 1)
    loss = tf.square(yhat - y)
    return loss

x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)

loss = model(x, y)
# loss = make_parallel(model, 2, x=x, y=y)

train_op = tf.train.AdamOptimizer(0.1).minimize(
    tf.reduce_mean(loss),
    colocate_gradients_with_ops=True)

def generate_data():
    x_val = np.random.uniform(-10.0, 10.0, size=1024 * 1024 * 100)
    y_val = 5 * np.square(x_val) + 3
    return x_val, y_val

sess = tf.Session()
sess.run(tf.global_variables_initializer())

diffs = []
for i in range(10):
    x_val, y_val = generate_data()
    start_time=time.time()
    _, loss_val = sess.run([train_op, loss], {x: x_val, y: y_val})
    diff = time.time() - start_time
    diffs.append(diff)
    print "%d %.4f" % (i,(diff))

_, loss_val = sess.run([train_op, loss], {x: x_val, y: y_val})
print(sess.run(tf.contrib.framework.get_variables_by_name("W")))
print loss_val
print "time: %.4f" % np.median(diffs)

This is the result:

1 GPU: 0.7662
2 GPUs: 0.6438

Furthermore this model is too simple. You'd see much larger gains if your model had more computation. The current bottleneck is mostly data transfer between cpu/gpu.

from effectivetensorflow.

Related Issues (18)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.