wtf scripts to check tensorflow setup

2020-01-20 12:26:00 -07:00 · 2020-01-20 12:26:00 -07:00 · aaa0091db8
parent ac33fbe4ff
commit aaa0091db8
2 changed files with 85 additions and 0 deletions
--- a/25
+++ b/25
@ -0,0 +1,25 @@
 #!/bin/bash
 # wut-tf
 #
 # Starts worker client.
 #
 # Usage:
 # wut-tf
 # Example:
 # wut-tf
 #
 # Note:
 # Each node needs a unique index number.
 #
 # NOTE!
 # This generates the node number based off the hostname.
 # The hosts are ml0 through ml5.
 HOSTNUM=`hostname | sed -e 's/ml//g'`
 #export TF_CONFIG='{"cluster": {"worker": [ "ml0-int:2222", "ml1-int:2222", "ml2-int:2222", "ml3-int:2222", "ml4-int:2222", "ml5-int:2222"]}, "task": {"index": '$HOSTNUM', "type": "worker"}}'
 export TF_CONFIG='{"cluster": {"worker": [ "ml1-int:2222", "ml2-int:2222", "ml3-int:2222", "ml4-int:2222", "ml5-int:2222"]}}'
 echo $TF_CONFIG
 python3 wut-tf.py
--- a/wut-tf.py
+++ b/wut-tf.py
@ -0,0 +1,60 @@
 #!/usr/bin/env python3
 #
 # wut-tf.py
 #
 # https://spacecruft.org/spacecruft/satnogs-wut
 #
 # Distributed Learning
 from __future__ import absolute_import, division, print_function, unicode_literals
 from __future__ import print_function
 import os
 import json
 import numpy as np
 import datetime
 import tensorflow as tf
 import tensorflow.python.keras
 from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
 from tensorflow.python.keras import optimizers
 from tensorflow.python.keras import Sequential
 from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Dense
 from tensorflow.python.keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
 from tensorflow.python.keras.layers import Input, concatenate
 from tensorflow.python.keras.models import load_model
 from tensorflow.python.keras.models import Model
 from tensorflow.python.keras.preprocessing import image
 from tensorflow.python.keras.preprocessing.image import img_to_array
 from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
 from tensorflow.python.keras.preprocessing.image import load_img
 os.environ["TF_CONFIG"] = json.dumps({
    "cluster": {
        "worker": [ "ml1-int:2222", "ml2-int:2222", "ml3-int:2222", "ml4-int:2222", "ml5-int:2222" ]
    }#,
   #"task": {"type": "worker", "index": 0 },
 })
 print("Tensorflow Version: ", tf.__version__)
 print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
 print("Num CPUs Available: ", len(tf.config.experimental.list_physical_devices('CPU')))
 print(tf.config.experimental.list_physical_devices())
 #with tf.device("GPU:0"):
 #  tf.ones(())  # Make sure we can run on GPU
 print("XLA_FLAGS='{}'".format(os.getenv("XLA_FLAGS")))
 print(os.getenv("XLA_FLAGS"))
 tf.keras.backend.clear_session()
 IMG_HEIGHT = 416
 IMG_WIDTH= 804
 batch_size = 32
 epochs = 4
 BUFFER_SIZE = 10000
 NUM_WORKERS = 6
 GLOBAL_BATCH_SIZE = 64 * NUM_WORKERS
 #strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
 #strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(
 #    tf.distribute.experimental.CollectiveCommunication.RING)
 AUTOTUNE = tf.data.experimental.AUTOTUNE
 NUM_TOTAL_IMAGES=100
 tf.config.optimizer.set_jit(True)
 #tf.summary.trace_on(profiler=True)
 #tf.summary.trace_export(name=trace-export,profiler_outdir=logs)
 options = tf.data.Options()