diff --git a/jupyter/wut-predict.ipynb b/jupyter/wut-predict.ipynb index 4403c88..53f4fdb 100644 --- a/jupyter/wut-predict.ipynb +++ b/jupyter/wut-predict.ipynb @@ -93,16 +93,6 @@ "from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from tensorflow.python.keras.models import Model\n", - "from tensorflow.python.keras.layers import Input, concatenate" - ] - }, { "cell_type": "code", "execution_count": null, @@ -158,6 +148,24 @@ "print(\"Python import done\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Load HDF file\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = load_model('data/hdf/wut-KgazZMKEa74VnquqXLwAvD.h5')" + ] + }, { "cell_type": "code", "execution_count": null, @@ -268,25 +276,7 @@ "metadata": {}, "outputs": [], "source": [ - "plotImages(sample_test_images[0:3])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load .h5 file here plz" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "model = load_model('data/wut.h5')" + "plotImages(sample_test_images[0:1])" ] }, { @@ -318,7 +308,7 @@ "source": [ "prediction = model.predict(\n", " x=test_data_gen,\n", - " verbose=2\n", + " verbose=1\n", ")\n", "print(\"end predict\")" ] @@ -377,32 +367,6 @@ "print('Observation: %s' % (rating))" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if prediction_bool[1] == False:\n", - " rating = 'bad'\n", - "else:\n", - " rating = 'good'\n", - "print('Observation: %s' % (rating))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "if prediction_bool[2] == False:\n", - " rating = 'bad'\n", - "else:\n", - " rating = 'good'\n", - "print('Observation: %s' % (rating))" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/jupyter/wut-train-cluster.ipynb b/jupyter/wut-train-cluster.ipynb index d96d8c3..ae9cfd0 100644 --- a/jupyter/wut-train-cluster.ipynb +++ b/jupyter/wut-train-cluster.ipynb @@ -6,11 +6,11 @@ "metadata": {}, "outputs": [], "source": [ - "# wut-train --- What U Think? SatNOGS Observation AI, training application.\n", + "# wut-train-cluster --- What U Think? SatNOGS Observation AI, training application cluster edition.\n", "#\n", "# https://spacecruft.org/spacecruft/satnogs-wut\n", "#\n", - "# Based on data/train and data/val directories builds a wut.h5 file." + "# Based on data/train and data/val directories builds a wut.tf file." ] }, { @@ -149,6 +149,18 @@ "from IPython.display import display, Image" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Cluster\n", + "from __future__ import absolute_import, division, print_function, unicode_literals\n", + "import tensorflow as tf\n", + "import simplejson as json" + ] + }, { "cell_type": "code", "execution_count": null, @@ -158,6 +170,141 @@ "print(\"Python import done\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# CLUSTER" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Valid roles: \"chief\", \"worker\", \"ps\" and \"evaluator\".\n", + "os.environ[\"TF_CONFIG\"] = json.dumps({\n", + " \"cluster\": {\n", + " \"worker\": [ \"ml1:2222\", \"ml2:2222\", \"ml3:2222\", \"ml4:2222\", \"ml5:2222\" ]\n", + " },\n", + " \"task\": {\"type\": \"worker\", \"index\": 1},\n", + " \"num_workers\": 5\n", + "})\n", + "# \"worker\": [\"ml1:port\", \"ml2:port\", \"ml3:port\", \"ml4:port\", \"ml5:port\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# MultiWorkerMirroredStrategy needs TF_CONFIG\n", + "multiworker_strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#os.environ[\"TF_CONFIG\"] = json.dumps({\n", + "# \"cluster\": {\n", + "# \"worker\": [\"host1:port\", \"host2:port\", \"host3:port\"]\n", + "# },\n", + "# \"task\": {\"type\": \"worker\", \"index\": 1}\n", + "#})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Pick one Strategy Below\n", + "# moved further down above Sequence()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Central Storage Strategy\n", + "#central_storage_strategy = tf.distribute.experimental.CentralStorageStrategy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ParameterServerStrategy needs TF_CONFIG\n", + "#ps_strategy = tf.distribute.experimental.ParameterServerStrategy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# OneDeviceStrategy No cluster\n", + "#strategy = tf.distribute.OneDeviceStrategy(device=\"/CPU:0\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Mirrored Strategy\n", + "#mirrored_strategy = tf.distribute.MirroredStrategy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Mirrored Strategy\n", + "#mirrored_strategy = tf.distribute.MirroredStrategy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# END CLUSTER" + ] + }, { "cell_type": "code", "execution_count": null, @@ -259,8 +406,8 @@ "outputs": [], "source": [ "print(\"Reduce training and validation set when testing\")\n", - "#total_train = 1\n", - "#total_val = 1\n", + "total_train = 100\n", + "total_val = 100\n", "print(\"Train =\")\n", "print(total_train)\n", "print(\"Validation =\")\n", @@ -284,8 +431,8 @@ "#epochs = 16 # BEST SO FAR\n", "#\n", "# Fast, but reasonable answers\n", - "batch_size = 64\n", - "epochs = 4\n", + "#batch_size = 64\n", + "#epochs = 4\n", "# Faster, but reasonable answers ?\n", "#batch_size = 32\n", "#epochs = 2\n", @@ -295,8 +442,8 @@ "#epochs = 3\n", "#\n", "# Smallest set for testing\n", - "#batch_size = 1\n", - "#epochs = 1" + "batch_size = 8\n", + "epochs = 4" ] }, { @@ -401,7 +548,7 @@ "metadata": {}, "outputs": [], "source": [ - "plotImages(sample_train_images[0:3])" + "#plotImages(sample_train_images[0:3])" ] }, { @@ -410,7 +557,7 @@ "metadata": {}, "outputs": [], "source": [ - "plotImages(sample_val_images[0:3])" + "#plotImages(sample_val_images[0:3])" ] }, { @@ -419,7 +566,47 @@ "metadata": {}, "outputs": [], "source": [ - "model = Sequential([\n", + "# CLUSTER" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# USE CPU only (doesn't work)\n", + "#import tensorflow as tf\n", + "#sess = Session(config=tf.ConfigProto(device_count={'GPU': 0}))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#with mirrored_strategy.scope():\n", + "# model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(1,))])\n", + "# model.compile(loss='mse', optimizer='sgd')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#with mirrored_strategy.scope():\n", + "with multiworker_strategy.scope():\n", + " model = Sequential([\n", " Conv2D(16, 3, padding='same', activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH ,3)),\n", " MaxPooling2D(),\n", " Conv2D(32, 3, padding='same', activation='relu'),\n", @@ -429,7 +616,10 @@ " Flatten(),\n", " Dense(512, activation='relu'),\n", " Dense(1, activation='sigmoid')\n", - "])" + " ])\n", + " model.compile(optimizer='adam',\n", + " loss='binary_crossentropy',\n", + " metrics=['accuracy'])" ] }, { @@ -438,9 +628,7 @@ "metadata": {}, "outputs": [], "source": [ - "model.compile(optimizer='adam',\n", - " loss='binary_crossentropy',\n", - " metrics=['accuracy'])" + "strategy.num_replicas_in_sync" ] }, { @@ -576,7 +764,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Save .h5 data here" + "model.to_json()" ] }, { @@ -585,9 +773,80 @@ "metadata": {}, "outputs": [], "source": [ - "model.save('data/hdf/wut-KgazZMKEa74VnquqXLwAvD.h5')" + "# Save .tf model data here" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.save('data/models/DUV/wut-train-cluster.tf')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.save('data/models/DUV/wut-train-cluster.h5')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.save_weights('data/models/DUV/wut-weights-train-cluster.tf')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.save_weights('data/models/DUV/wut-weights-train-cluster.h5')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, diff --git a/jupyter/wut-train.ipynb b/jupyter/wut-train.ipynb index 9798287..d96d8c3 100644 --- a/jupyter/wut-train.ipynb +++ b/jupyter/wut-train.ipynb @@ -259,8 +259,8 @@ "outputs": [], "source": [ "print(\"Reduce training and validation set when testing\")\n", - "#total_train = 100\n", - "#total_val = 100\n", + "#total_train = 1\n", + "#total_val = 1\n", "print(\"Train =\")\n", "print(total_train)\n", "print(\"Validation =\")\n", @@ -280,12 +280,23 @@ "# Large Test\n", "#batch_size = 512 # FAIL\n", "#batch_size = 256 # FAIL\n", - "batch_size = 192\n", - "epochs = 16\n", + "#batch_size = 192 # BEST SO FAR\n", + "#epochs = 16 # BEST SO FAR\n", + "#\n", + "# Fast, but reasonable answers\n", + "batch_size = 64\n", + "epochs = 4\n", + "# Faster, but reasonable answers ?\n", + "#batch_size = 32\n", + "#epochs = 2\n", "#\n", "# Testing, faster more inaccurate results\n", "#batch_size = 16\n", - "#epochs = 3" + "#epochs = 3\n", + "#\n", + "# Smallest set for testing\n", + "#batch_size = 1\n", + "#epochs = 1" ] }, { @@ -574,7 +585,7 @@ "metadata": {}, "outputs": [], "source": [ - "model.save('data/hdf/wut-train.h5')" + "model.save('data/hdf/wut-KgazZMKEa74VnquqXLwAvD.h5')" ] }, {