wut-worker-mas, sorta

2020-01-20 10:12:12 -07:00 · 2020-01-20 10:12:12 -07:00 · ac33fbe4ff
parent c6aa78fa0e
commit ac33fbe4ff
2 changed files with 30 additions and 3 deletions
--- a/24
+++ b/24
@ -0,0 +1,24 @@
 #!/bin/bash
 # wut-worker-mas
 #
 # Starts worker client.
 #
 # Usage:
 # wut-worker-mas
 # Example:
 # wut-worker-mas
 #
 # Note:
 # Each node needs a unique index number.
 #
 # NOTE!
 # This generates the node number based off the hostname.
 # The hosts are ml0 through ml5.
 HOSTNUM=`hostname | sed -e 's/ml//g'`
 export TF_CONFIG='{"cluster": {"worker": [ "ml0-int:2222", "ml1-int:2222", "ml2-int:2222", "ml3-int:2222", "ml4-int:2222", "ml5-int:2222"]}, "task": {"index": '$HOSTNUM', "type": "worker"}}'
 echo $TF_CONFIG
 python3 wut-worker-mas.py
--- a/wut-worker-mas.py
+++ b/wut-worker-mas.py
@ -43,8 +43,13 @@ BUFFER_SIZE = 10000
 NUM_WORKERS = 6
 GLOBAL_BATCH_SIZE = 64 * NUM_WORKERS
 # XXX
 POSITIVE_DIRECTORY = '/home/jebba/devel/spacecruft/satnogs-wut/data/pos'
 pos_dir = '/home/jebba/devel/spacecruft/satnogs-wut/data/posdir'
 strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(
    tf.distribute.experimental.CollectiveCommunication.RING)
 def get_bytes_and_label(filepath):
  raw_bytes = tf.io.read_file(filepath)
  label = tf.strings.regex_full_match(
@ -88,7 +93,7 @@ dataset = dataset.map(process_image, num_parallel_calls=AUTOTUNE)
 dataset = dataset.batch(batch_size=32)
 dataset = dataset.prefetch(buffer_size=AUTOTUNE)
-print(tf.__version__)
+print("Tensorflow Version: ", tf.__version__)
 print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
 print("Num CPUs Available: ", len(tf.config.experimental.list_physical_devices('CPU')))
 #with tf.device("GPU:0"):
@ -105,8 +110,6 @@ tf.config.optimizer.set_jit(True)
 tf.summary.trace_on(profiler=True)
 strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(
    tf.distribute.experimental.CollectiveCommunication.RING)
 def compiled_model():