wut-worker-mas, sorta
parent
c6aa78fa0e
commit
ac33fbe4ff
|
@ -0,0 +1,24 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# wut-worker-mas
|
||||||
|
#
|
||||||
|
# Starts worker client.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# wut-worker-mas
|
||||||
|
# Example:
|
||||||
|
# wut-worker-mas
|
||||||
|
#
|
||||||
|
# Note:
|
||||||
|
# Each node needs a unique index number.
|
||||||
|
#
|
||||||
|
# NOTE!
|
||||||
|
# This generates the node number based off the hostname.
|
||||||
|
# The hosts are ml0 through ml5.
|
||||||
|
|
||||||
|
HOSTNUM=`hostname | sed -e 's/ml//g'`
|
||||||
|
|
||||||
|
export TF_CONFIG='{"cluster": {"worker": [ "ml0-int:2222", "ml1-int:2222", "ml2-int:2222", "ml3-int:2222", "ml4-int:2222", "ml5-int:2222"]}, "task": {"index": '$HOSTNUM', "type": "worker"}}'
|
||||||
|
|
||||||
|
echo $TF_CONFIG
|
||||||
|
python3 wut-worker-mas.py
|
||||||
|
|
|
@ -43,8 +43,13 @@ BUFFER_SIZE = 10000
|
||||||
NUM_WORKERS = 6
|
NUM_WORKERS = 6
|
||||||
GLOBAL_BATCH_SIZE = 64 * NUM_WORKERS
|
GLOBAL_BATCH_SIZE = 64 * NUM_WORKERS
|
||||||
|
|
||||||
|
# XXX
|
||||||
POSITIVE_DIRECTORY = '/home/jebba/devel/spacecruft/satnogs-wut/data/pos'
|
POSITIVE_DIRECTORY = '/home/jebba/devel/spacecruft/satnogs-wut/data/pos'
|
||||||
pos_dir = '/home/jebba/devel/spacecruft/satnogs-wut/data/posdir'
|
pos_dir = '/home/jebba/devel/spacecruft/satnogs-wut/data/posdir'
|
||||||
|
|
||||||
|
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(
|
||||||
|
tf.distribute.experimental.CollectiveCommunication.RING)
|
||||||
|
|
||||||
def get_bytes_and_label(filepath):
|
def get_bytes_and_label(filepath):
|
||||||
raw_bytes = tf.io.read_file(filepath)
|
raw_bytes = tf.io.read_file(filepath)
|
||||||
label = tf.strings.regex_full_match(
|
label = tf.strings.regex_full_match(
|
||||||
|
@ -88,7 +93,7 @@ dataset = dataset.map(process_image, num_parallel_calls=AUTOTUNE)
|
||||||
dataset = dataset.batch(batch_size=32)
|
dataset = dataset.batch(batch_size=32)
|
||||||
dataset = dataset.prefetch(buffer_size=AUTOTUNE)
|
dataset = dataset.prefetch(buffer_size=AUTOTUNE)
|
||||||
|
|
||||||
print(tf.__version__)
|
print("Tensorflow Version: ", tf.__version__)
|
||||||
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
|
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
|
||||||
print("Num CPUs Available: ", len(tf.config.experimental.list_physical_devices('CPU')))
|
print("Num CPUs Available: ", len(tf.config.experimental.list_physical_devices('CPU')))
|
||||||
#with tf.device("GPU:0"):
|
#with tf.device("GPU:0"):
|
||||||
|
@ -105,8 +110,6 @@ tf.config.optimizer.set_jit(True)
|
||||||
|
|
||||||
tf.summary.trace_on(profiler=True)
|
tf.summary.trace_on(profiler=True)
|
||||||
|
|
||||||
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(
|
|
||||||
tf.distribute.experimental.CollectiveCommunication.RING)
|
|
||||||
|
|
||||||
|
|
||||||
def compiled_model():
|
def compiled_model():
|
||||||
|
|
Loading…
Reference in New Issue