parent
67cab8731a
commit
c66b6d51e5
|
@ -0,0 +1,413 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# wut-train-cluster --- What U Think? SatNOGS Observation AI, training application cluster edition.
|
||||
#
|
||||
# https://spacecruft.org/spacecruft/satnogs-wut
|
||||
#
|
||||
# Based on data/train and data/val directories builds a wut.tf file.
|
||||
# GPLv3+
|
||||
# Built using Jupyter, Tensorflow, Keras
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from __future__ import print_function
|
||||
import os
|
||||
import numpy as np
|
||||
import simplejson as json
|
||||
import datetime
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import tensorflow as tf
|
||||
import tensorflow.python.keras
|
||||
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
|
||||
from tensorflow.python.keras import optimizers
|
||||
from tensorflow.python.keras import Sequential
|
||||
from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Dense
|
||||
from tensorflow.python.keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
|
||||
from tensorflow.python.keras.layers import Input, concatenate
|
||||
from tensorflow.python.keras.models import load_model
|
||||
from tensorflow.python.keras.models import Model
|
||||
from tensorflow.python.keras.preprocessing import image
|
||||
from tensorflow.python.keras.preprocessing.image import img_to_array
|
||||
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
|
||||
from tensorflow.python.keras.preprocessing.image import load_img
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().run_line_magic('matplotlib', 'inline')
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from sklearn.decomposition import PCA
|
||||
from ipywidgets import interact, interactive, fixed, interact_manual
|
||||
import ipywidgets as widgets
|
||||
from IPython.display import display, Image
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
os.environ["TF_CONFIG"] = json.dumps({
|
||||
"cluster": {
|
||||
"worker": [ "10.100.100.130:2222", "ml1:2222", "ml2:2222", "ml3:2222", "ml4:2222", "ml5:2222" ]
|
||||
},
|
||||
"task": {"type": "worker", "index": 0 },
|
||||
"num_workers": 5
|
||||
})
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
IMG_HEIGHT = 416
|
||||
IMG_WIDTH= 804
|
||||
batch_size = 32
|
||||
epochs = 4
|
||||
# Full size, machine barfs probably needs more RAM
|
||||
#IMG_HEIGHT = 832
|
||||
#IMG_WIDTH = 1606
|
||||
# Good results
|
||||
#batch_size = 128
|
||||
#epochs = 6
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
tf.keras.backend.clear_session()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
#from tensorflow.python.framework.ops import disable_eager_execution
|
||||
#disable_eager_execution()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(
|
||||
tf.distribute.experimental.CollectiveCommunication.RING)
|
||||
#
|
||||
# MultiWorkerMirroredStrategy needs TF_CONFIG
|
||||
#multiworker_strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
|
||||
# Central Storage Strategy
|
||||
#central_storage_strategy = tf.distribute.experimental.CentralStorageStrategy()
|
||||
# ParameterServerStrategy needs TF_CONFIG
|
||||
#ps_strategy = tf.distribute.experimental.ParameterServerStrategy()
|
||||
# OneDeviceStrategy No cluster
|
||||
#strategy = tf.distribute.OneDeviceStrategy(device="/CPU:0")
|
||||
# Mirrored Strategy
|
||||
#mirrored_strategy = tf.distribute.MirroredStrategy()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
train_dir = os.path.join('data/', 'train')
|
||||
val_dir = os.path.join('data/', 'val')
|
||||
train_good_dir = os.path.join(train_dir, 'good')
|
||||
train_bad_dir = os.path.join(train_dir, 'bad')
|
||||
val_good_dir = os.path.join(val_dir, 'good')
|
||||
val_bad_dir = os.path.join(val_dir, 'bad')
|
||||
num_train_good = len(os.listdir(train_good_dir))
|
||||
num_train_bad = len(os.listdir(train_bad_dir))
|
||||
num_val_good = len(os.listdir(val_good_dir))
|
||||
num_val_bad = len(os.listdir(val_bad_dir))
|
||||
total_train = num_train_good + num_train_bad
|
||||
total_val = num_val_good + num_val_bad
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
print('total training good images:', num_train_good)
|
||||
print('total training bad images:', num_train_bad)
|
||||
print("--")
|
||||
print("Total training images:", total_train)
|
||||
print('total validation good images:', num_val_good)
|
||||
print('total validation bad images:', num_val_bad)
|
||||
print("--")
|
||||
print("Total validation images:", total_val)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
print("--")
|
||||
print("Reduce training and validation set when testing")
|
||||
#total_train = 16
|
||||
#total_val = 16
|
||||
print("Reduced training images:", total_train)
|
||||
print("Reduced validation images:", total_val)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
train_image_generator = ImageDataGenerator(
|
||||
rescale=1./255
|
||||
)
|
||||
val_image_generator = ImageDataGenerator(
|
||||
rescale=1./255
|
||||
)
|
||||
train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
|
||||
directory=train_dir,
|
||||
shuffle=True,
|
||||
target_size=(IMG_HEIGHT, IMG_WIDTH),
|
||||
class_mode='binary')
|
||||
val_data_gen = val_image_generator.flow_from_directory(batch_size=batch_size,
|
||||
directory=val_dir,
|
||||
target_size=(IMG_HEIGHT, IMG_WIDTH),
|
||||
class_mode='binary')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
sample_train_images, _ = next(train_data_gen)
|
||||
sample_val_images, _ = next(val_data_gen)
|
||||
# This function will plot images in the form of a grid with 1 row and 3 columns where images are placed in each column.
|
||||
def plotImages(images_arr):
|
||||
fig, axes = plt.subplots(1, 3, figsize=(20,20))
|
||||
axes = axes.flatten()
|
||||
for img, ax in zip( images_arr, axes):
|
||||
ax.imshow(img)
|
||||
ax.axis('off')
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
#plotImages(sample_train_images[0:3])
|
||||
#plotImages(sample_val_images[0:3])
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().run_line_magic('load_ext', 'tensorboard')
|
||||
get_ipython().system('rm -rf ./clusterlogs/')
|
||||
#log_dir="clusterlogs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
log_dir="clusterlogs"
|
||||
#tensorboard_callback = tensorflow.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
|
||||
tensorboard_callback = tensorflow.keras.callbacks.TensorBoard(log_dir=log_dir)
|
||||
#%tensorboard --logdir clusterlogs --port 6006
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
#strategy.num_replicas_in_sync
|
||||
## Compute global batch size using number of replicas.
|
||||
#BATCH_SIZE_PER_REPLICA = 5
|
||||
#print(BATCH_SIZE_PER_REPLICA)
|
||||
#global_batch_size = (BATCH_SIZE_PER_REPLICA *
|
||||
# strategy.num_replicas_in_sync)
|
||||
#print(global_batch_size)
|
||||
#dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(100)
|
||||
#dataset = dataset.batch(global_batch_size)
|
||||
#LEARNING_RATES_BY_BATCH_SIZE = {5: 0.1, 10: 0.15}
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
#learning_rate = LEARNING_RATES_BY_BATCH_SIZE[global_batch_size]
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def get_uncompiled_model():
|
||||
model = Sequential([
|
||||
Conv2D(16, 3, padding='same', activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH ,3)),
|
||||
MaxPooling2D(),
|
||||
Conv2D(32, 3, padding='same', activation='relu'),
|
||||
MaxPooling2D(),
|
||||
Conv2D(64, 3, padding='same', activation='relu'),
|
||||
MaxPooling2D(),
|
||||
Flatten(),
|
||||
Dense(512, activation='relu'),
|
||||
Dense(1, activation='sigmoid')
|
||||
])
|
||||
return model
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
#get_uncompiled_model()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def get_compiled_model():
|
||||
model = get_uncompiled_model()
|
||||
model.compile(optimizer='adam',
|
||||
loss='binary_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
return model
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
#model = get_compiled_model()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
#callbacks = [tf.keras.callbacks.ModelCheckpoint(filepath='tmp/keras-ckpt')]
|
||||
#callbacks=[tensorboard_callback,callbacks]
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
def get_fit_model():
|
||||
model = get_compiled_model()
|
||||
model.fit(
|
||||
train_data_gen,
|
||||
steps_per_epoch=total_train // batch_size,
|
||||
epochs=epochs,
|
||||
validation_data=val_data_gen,
|
||||
validation_steps=total_val // batch_size,
|
||||
verbose=2
|
||||
)
|
||||
return model
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
with strategy.scope():
|
||||
get_uncompiled_model()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
with strategy.scope():
|
||||
get_compiled_model()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
#with strategy.scope():
|
||||
# get_fit_model()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
#multi_worker_model = get_compiled_model()
|
||||
#multi_worker_model.fit(
|
||||
# x=train_data_gen,
|
||||
# epochs=epochs,
|
||||
# steps_per_epoch=total_train // batch_size
|
||||
# )
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
with strategy.scope():
|
||||
multi_worker_model = get_compiled_model()
|
||||
multi_worker_model.fit(
|
||||
x=train_data_gen,
|
||||
epochs=epochs,
|
||||
steps_per_epoch=total_train // batch_size
|
||||
)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
#model.summary()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
print("TRAINING info")
|
||||
print(train_dir)
|
||||
print(train_good_dir)
|
||||
print(train_bad_dir)
|
||||
print(train_image_generator)
|
||||
print(train_data_gen)
|
||||
#print(sample_train_images)
|
||||
#print(history)
|
||||
#model.to_json()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
#history = model.fit(X, y, batch_size=32, epochs=40, validation_split=0.1)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
model.save('data/models/FOO/wut-train-cluster.tf')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
model.save('data/models/FOO/wut-train-cluster.h5')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
model.save_weights('data/models/FOO/wut-weights-train-cluster.tf')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
model.save_weights('data/models/FOO/wut-weights-train-cluster.h5')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
acc = history.history['accuracy']
|
||||
val_acc = history.history['val_accuracy']
|
||||
loss = history.history['loss']
|
||||
val_loss = history.history['val_loss']
|
||||
epochs_range = range(epochs)
|
||||
plt.figure(figsize=(8, 8))
|
||||
plt.subplot(1, 2, 1)
|
||||
plt.plot(epochs_range, acc, label='Training Accuracy')
|
||||
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
|
||||
plt.legend(loc='lower right')
|
||||
plt.title('Training and Validation Accuracy')
|
||||
plt.subplot(1, 2, 2)
|
||||
plt.plot(epochs_range, loss, label='Training Loss')
|
||||
plt.plot(epochs_range, val_loss, label='Validation Loss')
|
||||
plt.legend(loc='upper right')
|
||||
plt.title('Training and Validation Loss')
|
||||
plt.show()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# The End
|
||||
|
|
@ -57,7 +57,7 @@ def get_compiled_model():
|
|||
def get_fit_model():
|
||||
model = get_compiled_model()
|
||||
model.fit(
|
||||
)
|
||||
model )
|
||||
return model
|
||||
|
||||
#def get_fit_model():
|
||||
|
|
Loading…
Reference in New Issue