diff --git a/README.md b/README.md index 1221f9f..7dd9a59 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ The following scripts are in the repo: * `wut-dl-sort-txmode` --- Populate `data/` dir with waterfalls from `download/` using selected encoding. * `wut-files` --- Tells you about what files you have in `downloads/` and `data/`. * `wut-ml` --- Main machine learning Python script using Tensorflow and Keras. +* `wut-ml.ipynb` --- Machine learning Python script using Tensorflow and Keras in a Jupyter Notebook. * `wut-ml-load` --- Machine learning Python script using Tensorflow and Keras, load `data/wut.h5`. * `wut-ml-save` --- Machine learning Python script using Tensorflow and Keras, save `data/wut.h5`. * `wut-obs` --- Download the JSON for an observation ID. diff --git a/wut-ml.ipynb b/wut-ml.ipynb new file mode 100644 index 0000000..af568cd --- /dev/null +++ b/wut-ml.ipynb @@ -0,0 +1,387 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#!/usr/bin/python3\n", + "# wut-ml\n", + "#\n", + "# Vet a SatNOGS image using machine learning (guessing).\n", + "# It will vet the image located at test/unvetted/waterfall.png.\n", + "#\n", + "# Note, there is an issue to fix where it will vet everything\n", + "# under the data/test directory, so fix that. For now, just delete\n", + "# everything else. :)\n", + "#\n", + "# Usage:\n", + "# wut-ml\n", + "# Example:\n", + "# wut-ml" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "import done\n" + ] + } + ], + "source": [ + "import os\n", + "import numpy as np\n", + "import tensorflow.python.keras\n", + "from tensorflow.python.keras import Sequential\n", + "from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Dense\n", + "from tensorflow.python.keras.preprocessing.image import ImageDataGenerator\n", + "from tensorflow.python.keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D\n", + "from tensorflow.python.keras import optimizers\n", + "from tensorflow.python.keras.preprocessing import image\n", + "from tensorflow.python.keras.models import load_model\n", + "from tensorflow.python.keras.preprocessing.image import load_img\n", + "from tensorflow.python.keras.preprocessing.image import img_to_array\n", + "\n", + "# XXX\n", + "from tensorflow.python.keras.models import Model\n", + "from tensorflow.python.keras.layers import Input, concatenate\n", + "#from tensorflow.python.keras.optimizers import Adam\n", + "\n", + "# XXX Plot\n", + "from tensorflow.python.keras.utils import plot_model\n", + "from tensorflow.python.keras.callbacks import ModelCheckpoint\n", + "## for visualizing \n", + "import matplotlib.pyplot as plt, numpy as np\n", + "from sklearn.decomposition import PCA\n", + "\n", + "print(\"import done\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "datagen\n" + ] + } + ], + "source": [ + "# https://keras.io/preprocessing/image/\n", + "# TODO:\n", + "# * Pre-process image\n", + "print(\"datagen\")\n", + "datagen = ImageDataGenerator(\n", + "\tfeaturewise_center=False,\n", + "\tsamplewise_center=False,\n", + "\tfeaturewise_std_normalization=False,\n", + "\tsamplewise_std_normalization=False,\n", + "\tzca_whitening=False,\n", + "\tzca_epsilon=1e-06,\n", + "\trescale=1./255,\n", + "\tshear_range=0.0,\n", + "\tzoom_range=0.0,\n", + "\trotation_range=0,\n", + "\twidth_shift_range=0.0,\n", + "\theight_shift_range=0.0,\n", + "\tbrightness_range=None,\n", + "\tchannel_shift_range=0.0,\n", + "\tfill_mode='nearest',\n", + "\tcval=0.0,\n", + "\thorizontal_flip=False,\n", + "\tvertical_flip=False,\n", + "\tpreprocessing_function=None,\n", + "\tdata_format='channels_last',\n", + "\tvalidation_split=0.0,\n", + "\tdtype='float32')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "datagen.flow\n", + "Found 761 images belonging to 4 classes.\n", + "Found 741 images belonging to 3 classes.\n", + "Found 1 images belonging to 1 classes.\n", + "train_it.next()\n", + "Batch shape=(32, 256, 256, 3), min=0.000, max=1.000\n", + "Batch shape=(32, 256, 256, 3), min=0.000, max=1.000\n", + "Batch shape=(1, 256, 256, 3), min=0.000, max=1.000\n", + "input shape\n", + "(256, 256, 3)\n", + "Height 256 Width 256\n" + ] + } + ], + "source": [ + "print(\"datagen.flow\")\n", + "train_it = datagen.flow_from_directory('data/train/', class_mode='binary')\n", + "val_it = datagen.flow_from_directory('data/val/', class_mode='binary')\n", + "test_it = datagen.flow_from_directory('data/test/', class_mode='binary')\n", + "\n", + "print(\"train_it.next()\")\n", + "trainX, trainY = train_it.next()\n", + "print('Batch shape=%s, min=%.3f, max=%.3f' % (trainX.shape, trainX.min(), trainX.max()))\n", + "valX, valY = val_it.next()\n", + "print('Batch shape=%s, min=%.3f, max=%.3f' % (valX.shape, valX.min(), valX.max()))\n", + "testX, testY = test_it.next()\n", + "print('Batch shape=%s, min=%.3f, max=%.3f' % (testX.shape, testX.min(), testX.max()))\n", + "\n", + "print(\"input shape\")\n", + "input_shape=trainX.shape[1:]\n", + "print(input_shape)\n", + "\n", + "#img_width=823\n", + "#img_height=1606\n", + "img_width=256\n", + "img_height=256\n", + "print(\"Height\", img_height, \"Width\", img_width)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sequential\n", + "add\n" + ] + } + ], + "source": [ + "# https://keras.io/models/sequential/\n", + "# https://keras.io/getting-started/sequential-model-guide/\n", + "print(\"Sequential\")\n", + "model = Sequential()\n", + "\n", + "print(\"add\")\n", + "# Other data to consider adding:\n", + "# * JSON metadata\n", + "# * TLE\n", + "# * Audio File (ogg)\n", + "# https://www.tensorflow.org/io/api_docs/python/tfio/ffmpeg/AudioDataset\n", + "# * Decoded Data (HEX, ASCII, PNG)\n", + "# Data from external sources to consider adding:\n", + "# * Weather\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "convolution 2 deeeee\n", + "Activation relu\n", + "Pooling\n", + "Convolution2D\n", + "Activation relu\n", + "Pooling\n", + "Convolution2D\n", + "Activation relu\n", + "Pooling\n", + "Flatten\n", + "Dense\n", + "Activation relu\n", + "Dropout\n", + "Dense\n", + "Activation softmax\n", + "compile\n", + "fit\n", + "Train for 24 steps, validate for 24 steps\n", + "24/24 - 60s - loss: 0.0000e+00 - accuracy: 0.1209 - val_loss: 0.0000e+00 - val_accuracy: 0.0013\n", + "fit done\n" + ] + } + ], + "source": [ + "print(\"convolution 2 deeeee\")\n", + "# https://keras.io/layers/convolutional/\n", + "#model.add(Convolution2D(32, 3, 3, input_shape=trainX.shape[1:]))\n", + "#model.add(Convolution2D(32, 3, 3, input_shape=(255,255,3)))\n", + "model.add(Convolution2D(32, 3, 3, input_shape=(img_width, img_height,3)))\n", + "# https://keras.io/activations/\n", + "print(\"Activation relu\")\n", + "model.add(Activation('relu'))\n", + "# https://keras.io/layers/pooling/\n", + "print(\"Pooling\")\n", + "model.add(MaxPooling2D(pool_size=(2, 2)))\n", + "print(\"Convolution2D\")\n", + "model.add(Convolution2D(32, 3, 3))\n", + "print(\"Activation relu\")\n", + "model.add(Activation('relu'))\n", + "print(\"Pooling\")\n", + "model.add(MaxPooling2D(pool_size=(2, 2)))\n", + "print(\"Convolution2D\")\n", + "model.add(Convolution2D(64, 3, 3))\n", + "print(\"Activation relu\")\n", + "model.add(Activation('relu'))\n", + "print(\"Pooling\")\n", + "model.add(MaxPooling2D(pool_size=(2, 2)))\n", + "# https://keras.io/layers/core/\n", + "print(\"Flatten\")\n", + "model.add(Flatten())\n", + "# https://keras.io/layers/core/\n", + "print(\"Dense\")\n", + "model.add(Dense(64))\n", + "print(\"Activation relu\")\n", + "model.add(Activation('relu'))\n", + "# https://keras.io/layers/core/\n", + "print(\"Dropout\")\n", + "model.add(Dropout(0.5))\n", + "print(\"Dense\")\n", + "model.add(Dense(1))\n", + "print(\"Activation softmax\")\n", + "model.add(Activation('softmax'))\n", + "\n", + "# https://keras.io/models/sequential/\n", + "print(\"compile\")\n", + "model.compile(\n", + "\tloss='categorical_crossentropy',\n", + "\tloss_weights=None,\n", + "\tsample_weight_mode=None,\n", + "\tweighted_metrics=None,\n", + "\ttarget_tensors=None,\t\n", + "\toptimizer='rmsprop',\n", + "\tmetrics=['accuracy'])\n", + "\n", + "# https://keras.io/models/sequential/\n", + "print(\"fit\")\n", + "model.fit(\n", + "\tx=train_it,\n", + "\ty=None,\n", + "\tbatch_size=None,\n", + "\tepochs=1,\n", + "\tverbose=2,\n", + "\tcallbacks=None,\n", + "\tvalidation_split=0.0,\n", + "\tvalidation_data=val_it,\n", + "\tshuffle=True,\n", + "\tclass_weight=None,\n", + "\tsample_weight=None,\n", + "\tinitial_epoch=0,\n", + "\tsteps_per_epoch=None,\n", + "\tvalidation_steps=None,\n", + "\tvalidation_freq=1,\n", + "\tmax_queue_size=10,\n", + "\tworkers=16,\n", + "\tuse_multiprocessing=True)\n", + "print(\"fit done\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "# evaluate(x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO:\n", + "# * Generate output to visualize training/validating/testing.\n", + "\n", + "# Plot, fail\n", + "#print(\"plot\")\n", + "#plot_model(test_it,dpi=72)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "predict\n", + "1/1 - 0s\n", + "[[1.]]\n", + "Observation: bad\n" + ] + } + ], + "source": [ + "# https://keras.io/models/sequential/\n", + "print(\"predict\")\n", + "\n", + "prediction = model.predict(\n", + "\tx=test_it,\n", + "\tbatch_size=None,\n", + "\tverbose=2,\n", + "\tsteps=None,\n", + "\tcallbacks=None,\n", + "\tmax_queue_size=10,\n", + "\tworkers=16,\n", + "\tuse_multiprocessing=True)\n", + "\n", + "print(prediction)\n", + "\n", + "if prediction[0][0] == 1:\n", + " rating = 'bad'\n", + "else:\n", + " rating = 'good'\n", + "print('Observation: %s' % (rating))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}