parent
b7bdc2521e
commit
ecbc5fe3e8
|
@ -134,7 +134,10 @@ firefox https://github.com/bazelbuild/bazel/releases
|
||||||
# Install Tensorflow
|
# Install Tensorflow
|
||||||
git clone tensorflow...
|
git clone tensorflow...
|
||||||
cd tensorflow
|
cd tensorflow
|
||||||
git checkout remotes/origin/r2.1
|
git checkout v2.1.0
|
||||||
|
bazel clean
|
||||||
|
# Get flags to pass:
|
||||||
|
grep flags -m1 /proc/cpuinfo | cut -d ":" -f 2 | tr '[:upper:]' '[:lower:]' | { read FLAGS; OPT="-march=native"; for flag in $FLAGS; do case "$flag" in "sse4_1" | "sse4_2" | "ssse3" | "fma" | "cx16" | "popcnt" | "avx" | "avx2") OPT+=" -m$flag";; esac; done; MODOPT=${OPT//_/\.}; echo "$MODOPT"; }
|
||||||
./configure
|
./configure
|
||||||
# Run Bazel to build pip package. Takes nearly 2 hours to build.
|
# Run Bazel to build pip package. Takes nearly 2 hours to build.
|
||||||
bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package
|
bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package
|
||||||
|
|
|
@ -18,8 +18,8 @@
|
||||||
HOSTNUM=`hostname | sed -e 's/ml//g'`
|
HOSTNUM=`hostname | sed -e 's/ml//g'`
|
||||||
|
|
||||||
#export TF_CONFIG='{"cluster": {"worker": [ "ml0-int:2222", "ml1-int:2222", "ml2-int:2222", "ml3-int:2222", "ml4-int:2222", "ml5-int:2222"]}, "task": {"index": '$HOSTNUM', "type": "worker"}}'
|
#export TF_CONFIG='{"cluster": {"worker": [ "ml0-int:2222", "ml1-int:2222", "ml2-int:2222", "ml3-int:2222", "ml4-int:2222", "ml5-int:2222"]}, "task": {"index": '$HOSTNUM', "type": "worker"}}'
|
||||||
#export TF_CONFIG='{"cluster": {"worker": [ "ml1-int:2222", "ml2-int:2222", "ml3-int:2222", "ml4-int:2222", "ml5-int:2222"]}}'
|
export TF_CONFIG='{"cluster": {"worker": [ "ml1-int:2222", "ml2-int:2222", "ml3-int:2222", "ml4-int:2222", "ml5-int:2222"]}}'
|
||||||
export TF_CONFIG='{"cluster": {"chief": [ "ml0-int:2222" ], "worker": [ "ml1-int:2222", "ml2-int:2222", "ml3-int:2222", "ml4-int:2222", "ml5-int:2222"]}, "task": {"index": '$HOSTNUM', "type": "worker"}}'
|
#export TF_CONFIG='{"cluster": {"chief": [ "ml0-int:2222" ], "worker": [ "ml1-int:2222", "ml2-int:2222", "ml3-int:2222", "ml4-int:2222", "ml5-int:2222"]}, "task": {"index": '$HOSTNUM', "type": "worker"}}'
|
||||||
|
|
||||||
echo $TF_CONFIG
|
echo $TF_CONFIG
|
||||||
python3 wut-worker-mas.py
|
python3 wut-worker-mas.py
|
||||||
|
|
|
@ -26,11 +26,9 @@ from tensorflow.python.keras.preprocessing import image
|
||||||
from tensorflow.python.keras.preprocessing.image import img_to_array
|
from tensorflow.python.keras.preprocessing.image import img_to_array
|
||||||
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
|
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
|
||||||
from tensorflow.python.keras.preprocessing.image import load_img
|
from tensorflow.python.keras.preprocessing.image import load_img
|
||||||
#import tensorflow.python.distribute.cluster_resolver
|
|
||||||
#from tensorflow.python.distribute.cluster_resolver import TFConfigClusterResolver
|
|
||||||
#from tensorflow.python.distribute.cluster_resolver.TFConfigClusterResolver
|
|
||||||
|
|
||||||
tf.keras.backend.clear_session()
|
tf.keras.backend.clear_session()
|
||||||
|
tf.config.optimizer.set_jit(True)
|
||||||
options = tf.data.Options()
|
options = tf.data.Options()
|
||||||
os.environ["TF_CONFIG"] = json.dumps({
|
os.environ["TF_CONFIG"] = json.dumps({
|
||||||
"cluster": {
|
"cluster": {
|
||||||
|
@ -39,27 +37,16 @@ os.environ["TF_CONFIG"] = json.dumps({
|
||||||
},
|
},
|
||||||
"task": {"type": "chief", "index": 0 },
|
"task": {"type": "chief", "index": 0 },
|
||||||
})
|
})
|
||||||
#os.environ["TF_CONFIG"] = json.dumps({
|
|
||||||
# "cluster": {
|
|
||||||
# "worker": [ "ml1-int:2222", "ml2-int:2222", "ml3-int:2222", "ml4-int:2222", "ml5-int:2222" ]
|
|
||||||
# }#,
|
|
||||||
# #"task": {"type": "worker", "index": 0 },
|
|
||||||
#})
|
|
||||||
|
|
||||||
print("Tensorflow Version: ", tf.__version__)
|
print("Tensorflow Version: ", tf.__version__)
|
||||||
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
|
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
|
||||||
print("Num CPUs Available: ", len(tf.config.experimental.list_physical_devices('CPU')))
|
print("Num CPUs Available: ", len(tf.config.experimental.list_physical_devices('CPU')))
|
||||||
#with tf.device("GPU:0"):
|
|
||||||
# tf.ones(()) # Make sure we can run on GPU
|
|
||||||
|
|
||||||
# This ensures that XLA and ptxas work well together, and helps with scaling.
|
|
||||||
print("XLA_FLAGS='{}'".format(os.getenv("XLA_FLAGS")))
|
print("XLA_FLAGS='{}'".format(os.getenv("XLA_FLAGS")))
|
||||||
|
|
||||||
IMG_HEIGHT = 416
|
IMG_HEIGHT = 416
|
||||||
IMG_WIDTH= 804
|
IMG_WIDTH= 804
|
||||||
batch_size = 32
|
batch_size = 32
|
||||||
epochs = 4
|
epochs = 4
|
||||||
|
|
||||||
BUFFER_SIZE = 10000
|
BUFFER_SIZE = 10000
|
||||||
NUM_WORKERS = 6
|
NUM_WORKERS = 6
|
||||||
GLOBAL_BATCH_SIZE = 64 * NUM_WORKERS
|
GLOBAL_BATCH_SIZE = 64 * NUM_WORKERS
|
||||||
|
@ -68,12 +55,9 @@ GLOBAL_BATCH_SIZE = 64 * NUM_WORKERS
|
||||||
POSITIVE_DIRECTORY = '/home/jebba/devel/spacecruft/satnogs-wut/data/pos'
|
POSITIVE_DIRECTORY = '/home/jebba/devel/spacecruft/satnogs-wut/data/pos'
|
||||||
pos_dir = '/home/jebba/devel/spacecruft/satnogs-wut/data/posdir'
|
pos_dir = '/home/jebba/devel/spacecruft/satnogs-wut/data/posdir'
|
||||||
|
|
||||||
from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver
|
|
||||||
|
|
||||||
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(
|
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(
|
||||||
tf.distribute.experimental.CollectiveCommunication.RING)
|
tf.distribute.experimental.CollectiveCommunication.RING)
|
||||||
|
|
||||||
|
|
||||||
def get_bytes_and_label(filepath):
|
def get_bytes_and_label(filepath):
|
||||||
raw_bytes = tf.io.read_file(filepath)
|
raw_bytes = tf.io.read_file(filepath)
|
||||||
label = tf.strings.regex_full_match(
|
label = tf.strings.regex_full_match(
|
||||||
|
@ -117,13 +101,9 @@ dataset = dataset.map(process_image, num_parallel_calls=AUTOTUNE)
|
||||||
dataset = dataset.batch(batch_size=32)
|
dataset = dataset.batch(batch_size=32)
|
||||||
dataset = dataset.prefetch(buffer_size=AUTOTUNE)
|
dataset = dataset.prefetch(buffer_size=AUTOTUNE)
|
||||||
|
|
||||||
|
|
||||||
os.makedirs(profile_dir, exist_ok=True)
|
os.makedirs(profile_dir, exist_ok=True)
|
||||||
|
|
||||||
# tf.data.Dataset.from_generator
|
# tf.data.Dataset.from_generator
|
||||||
|
|
||||||
tf.config.optimizer.set_jit(True)
|
|
||||||
|
|
||||||
#tf.summary.trace_on(profiler=True)
|
#tf.summary.trace_on(profiler=True)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue