From aaa0091db8b230f65e3d32157c1bf5b430d7af57 Mon Sep 17 00:00:00 2001 From: ml server Date: Mon, 20 Jan 2020 12:26:00 -0700 Subject: [PATCH] wtf scripts to check tensorflow setup --- wut-tf | 25 +++++++++++++++++++++++ wut-tf.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) create mode 100755 wut-tf create mode 100644 wut-tf.py diff --git a/wut-tf b/wut-tf new file mode 100755 index 0000000..bf8c5f6 --- /dev/null +++ b/wut-tf @@ -0,0 +1,25 @@ +#!/bin/bash +# wut-tf +# +# Starts worker client. +# +# Usage: +# wut-tf +# Example: +# wut-tf +# +# Note: +# Each node needs a unique index number. +# +# NOTE! +# This generates the node number based off the hostname. +# The hosts are ml0 through ml5. + +HOSTNUM=`hostname | sed -e 's/ml//g'` + +#export TF_CONFIG='{"cluster": {"worker": [ "ml0-int:2222", "ml1-int:2222", "ml2-int:2222", "ml3-int:2222", "ml4-int:2222", "ml5-int:2222"]}, "task": {"index": '$HOSTNUM', "type": "worker"}}' +export TF_CONFIG='{"cluster": {"worker": [ "ml1-int:2222", "ml2-int:2222", "ml3-int:2222", "ml4-int:2222", "ml5-int:2222"]}}' + +echo $TF_CONFIG +python3 wut-tf.py + diff --git a/wut-tf.py b/wut-tf.py new file mode 100644 index 0000000..0e3b0de --- /dev/null +++ b/wut-tf.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# +# wut-tf.py +# +# https://spacecruft.org/spacecruft/satnogs-wut +# +# Distributed Learning + +from __future__ import absolute_import, division, print_function, unicode_literals +from __future__ import print_function +import os +import json +import numpy as np +import datetime +import tensorflow as tf +import tensorflow.python.keras +from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D +from tensorflow.python.keras import optimizers +from tensorflow.python.keras import Sequential +from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Dense +from tensorflow.python.keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D +from tensorflow.python.keras.layers import Input, concatenate +from tensorflow.python.keras.models import load_model +from tensorflow.python.keras.models import Model +from tensorflow.python.keras.preprocessing import image +from tensorflow.python.keras.preprocessing.image import img_to_array +from tensorflow.python.keras.preprocessing.image import ImageDataGenerator +from tensorflow.python.keras.preprocessing.image import load_img +os.environ["TF_CONFIG"] = json.dumps({ + "cluster": { + "worker": [ "ml1-int:2222", "ml2-int:2222", "ml3-int:2222", "ml4-int:2222", "ml5-int:2222" ] + }#, + #"task": {"type": "worker", "index": 0 }, +}) +print("Tensorflow Version: ", tf.__version__) +print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) +print("Num CPUs Available: ", len(tf.config.experimental.list_physical_devices('CPU'))) +print(tf.config.experimental.list_physical_devices()) +#with tf.device("GPU:0"): +# tf.ones(()) # Make sure we can run on GPU +print("XLA_FLAGS='{}'".format(os.getenv("XLA_FLAGS"))) +print(os.getenv("XLA_FLAGS")) +tf.keras.backend.clear_session() +IMG_HEIGHT = 416 +IMG_WIDTH= 804 +batch_size = 32 +epochs = 4 +BUFFER_SIZE = 10000 +NUM_WORKERS = 6 +GLOBAL_BATCH_SIZE = 64 * NUM_WORKERS +#strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() +#strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy( +# tf.distribute.experimental.CollectiveCommunication.RING) +AUTOTUNE = tf.data.experimental.AUTOTUNE +NUM_TOTAL_IMAGES=100 +tf.config.optimizer.set_jit(True) +#tf.summary.trace_on(profiler=True) +#tf.summary.trace_export(name=trace-export,profiler_outdir=logs) +options = tf.data.Options() +