jupyter notebook

2020-01-10 21:13:44 -07:00 · 2020-01-10 21:13:44 -07:00 · 9d44337a61
parent 9996882ec1
commit 9d44337a61
2 changed files with 388 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -39,6 +39,7 @@ The following scripts are in the repo:
 * `wut-dl-sort-txmode` --- Populate `data/` dir with waterfalls from `download/` using selected encoding.
 * `wut-files` --- Tells you about what files you have in `downloads/` and `data/`.
 * `wut-ml` --- Main machine learning Python script using Tensorflow and Keras.
+* `wut-ml.ipynb` --- Machine learning Python script using Tensorflow and Keras in a Jupyter Notebook.
 * `wut-ml-load` --- Machine learning Python script using Tensorflow and Keras, load `data/wut.h5`.
 * `wut-ml-save` --- Machine learning Python script using Tensorflow and Keras, save `data/wut.h5`.
 * `wut-obs` --- Download the JSON for an observation ID.
--- a/wut-ml.ipynb
+++ b/wut-ml.ipynb
@ -0,0 +1,387 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#!/usr/bin/python3\n",
+    "# wut-ml\n",
+    "#\n",
+    "# Vet a SatNOGS image using machine learning (guessing).\n",
+    "# It will vet the image located at test/unvetted/waterfall.png.\n",
+    "#\n",
+    "# Note, there is an issue to fix where it will vet everything\n",
+    "# under the data/test directory, so fix that. For now, just delete\n",
+    "# everything else. :)\n",
+    "#\n",
+    "# Usage:\n",
+    "# wut-ml\n",
+    "# Example:\n",
+    "# wut-ml"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "import done\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "import numpy as np\n",
+    "import tensorflow.python.keras\n",
+    "from tensorflow.python.keras import Sequential\n",
+    "from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Dense\n",
+    "from tensorflow.python.keras.preprocessing.image import ImageDataGenerator\n",
+    "from tensorflow.python.keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D\n",
+    "from tensorflow.python.keras import optimizers\n",
+    "from tensorflow.python.keras.preprocessing import image\n",
+    "from tensorflow.python.keras.models import load_model\n",
+    "from tensorflow.python.keras.preprocessing.image import load_img\n",
+    "from tensorflow.python.keras.preprocessing.image import img_to_array\n",
+    "\n",
+    "# XXX\n",
+    "from tensorflow.python.keras.models import Model\n",
+    "from tensorflow.python.keras.layers import Input, concatenate\n",
+    "#from tensorflow.python.keras.optimizers import Adam\n",
+    "\n",
+    "# XXX Plot\n",
+    "from tensorflow.python.keras.utils import plot_model\n",
+    "from tensorflow.python.keras.callbacks import ModelCheckpoint\n",
+    "## for visualizing \n",
+    "import matplotlib.pyplot as plt, numpy as np\n",
+    "from sklearn.decomposition import PCA\n",
+    "\n",
+    "print(\"import done\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "datagen\n"
+     ]
+    }
+   ],
+   "source": [
+    "# https://keras.io/preprocessing/image/\n",
+    "# TODO:\n",
+    "# * Pre-process image\n",
+    "print(\"datagen\")\n",
+    "datagen = ImageDataGenerator(\n",
+    "\tfeaturewise_center=False,\n",
+    "\tsamplewise_center=False,\n",
+    "\tfeaturewise_std_normalization=False,\n",
+    "\tsamplewise_std_normalization=False,\n",
+    "\tzca_whitening=False,\n",
+    "\tzca_epsilon=1e-06,\n",
+    "\trescale=1./255,\n",
+    "\tshear_range=0.0,\n",
+    "\tzoom_range=0.0,\n",
+    "\trotation_range=0,\n",
+    "\twidth_shift_range=0.0,\n",
+    "\theight_shift_range=0.0,\n",
+    "\tbrightness_range=None,\n",
+    "\tchannel_shift_range=0.0,\n",
+    "\tfill_mode='nearest',\n",
+    "\tcval=0.0,\n",
+    "\thorizontal_flip=False,\n",
+    "\tvertical_flip=False,\n",
+    "\tpreprocessing_function=None,\n",
+    "\tdata_format='channels_last',\n",
+    "\tvalidation_split=0.0,\n",
+    "\tdtype='float32')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "datagen.flow\n",
+      "Found 761 images belonging to 4 classes.\n",
+      "Found 741 images belonging to 3 classes.\n",
+      "Found 1 images belonging to 1 classes.\n",
+      "train_it.next()\n",
+      "Batch shape=(32, 256, 256, 3), min=0.000, max=1.000\n",
+      "Batch shape=(32, 256, 256, 3), min=0.000, max=1.000\n",
+      "Batch shape=(1, 256, 256, 3), min=0.000, max=1.000\n",
+      "input shape\n",
+      "(256, 256, 3)\n",
+      "Height 256 Width 256\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"datagen.flow\")\n",
+    "train_it = datagen.flow_from_directory('data/train/', class_mode='binary')\n",
+    "val_it = datagen.flow_from_directory('data/val/', class_mode='binary')\n",
+    "test_it = datagen.flow_from_directory('data/test/', class_mode='binary')\n",
+    "\n",
+    "print(\"train_it.next()\")\n",
+    "trainX, trainY = train_it.next()\n",
+    "print('Batch shape=%s, min=%.3f, max=%.3f' % (trainX.shape, trainX.min(), trainX.max()))\n",
+    "valX, valY = val_it.next()\n",
+    "print('Batch shape=%s, min=%.3f, max=%.3f' % (valX.shape, valX.min(), valX.max()))\n",
+    "testX, testY = test_it.next()\n",
+    "print('Batch shape=%s, min=%.3f, max=%.3f' % (testX.shape, testX.min(), testX.max()))\n",
+    "\n",
+    "print(\"input shape\")\n",
+    "input_shape=trainX.shape[1:]\n",
+    "print(input_shape)\n",
+    "\n",
+    "#img_width=823\n",
+    "#img_height=1606\n",
+    "img_width=256\n",
+    "img_height=256\n",
+    "print(\"Height\", img_height, \"Width\", img_width)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sequential\n",
+      "add\n"
+     ]
+    }
+   ],
+   "source": [
+    "# https://keras.io/models/sequential/\n",
+    "# https://keras.io/getting-started/sequential-model-guide/\n",
+    "print(\"Sequential\")\n",
+    "model = Sequential()\n",
+    "\n",
+    "print(\"add\")\n",
+    "# Other data to consider adding:\n",
+    "# * JSON metadata\n",
+    "# * TLE\n",
+    "# * Audio File (ogg)\n",
+    "#   https://www.tensorflow.org/io/api_docs/python/tfio/ffmpeg/AudioDataset\n",
+    "# * Decoded Data (HEX, ASCII, PNG)\n",
+    "# Data from external sources to consider adding:\n",
+    "# * Weather\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "convolution 2 deeeee\n",
+      "Activation relu\n",
+      "Pooling\n",
+      "Convolution2D\n",
+      "Activation relu\n",
+      "Pooling\n",
+      "Convolution2D\n",
+      "Activation relu\n",
+      "Pooling\n",
+      "Flatten\n",
+      "Dense\n",
+      "Activation relu\n",
+      "Dropout\n",
+      "Dense\n",
+      "Activation softmax\n",
+      "compile\n",
+      "fit\n",
+      "Train for 24 steps, validate for 24 steps\n",
+      "24/24 - 60s - loss: 0.0000e+00 - accuracy: 0.1209 - val_loss: 0.0000e+00 - val_accuracy: 0.0013\n",
+      "fit done\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"convolution 2 deeeee\")\n",
+    "# https://keras.io/layers/convolutional/\n",
+    "#model.add(Convolution2D(32, 3, 3, input_shape=trainX.shape[1:]))\n",
+    "#model.add(Convolution2D(32, 3, 3, input_shape=(255,255,3)))\n",
+    "model.add(Convolution2D(32, 3, 3, input_shape=(img_width, img_height,3)))\n",
+    "# https://keras.io/activations/\n",
+    "print(\"Activation relu\")\n",
+    "model.add(Activation('relu'))\n",
+    "# https://keras.io/layers/pooling/\n",
+    "print(\"Pooling\")\n",
+    "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
+    "print(\"Convolution2D\")\n",
+    "model.add(Convolution2D(32, 3, 3))\n",
+    "print(\"Activation relu\")\n",
+    "model.add(Activation('relu'))\n",
+    "print(\"Pooling\")\n",
+    "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
+    "print(\"Convolution2D\")\n",
+    "model.add(Convolution2D(64, 3, 3))\n",
+    "print(\"Activation relu\")\n",
+    "model.add(Activation('relu'))\n",
+    "print(\"Pooling\")\n",
+    "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
+    "# https://keras.io/layers/core/\n",
+    "print(\"Flatten\")\n",
+    "model.add(Flatten())\n",
+    "# https://keras.io/layers/core/\n",
+    "print(\"Dense\")\n",
+    "model.add(Dense(64))\n",
+    "print(\"Activation relu\")\n",
+    "model.add(Activation('relu'))\n",
+    "# https://keras.io/layers/core/\n",
+    "print(\"Dropout\")\n",
+    "model.add(Dropout(0.5))\n",
+    "print(\"Dense\")\n",
+    "model.add(Dense(1))\n",
+    "print(\"Activation softmax\")\n",
+    "model.add(Activation('softmax'))\n",
+    "\n",
+    "# https://keras.io/models/sequential/\n",
+    "print(\"compile\")\n",
+    "model.compile(\n",
+    "\tloss='categorical_crossentropy',\n",
+    "\tloss_weights=None,\n",
+    "\tsample_weight_mode=None,\n",
+    "\tweighted_metrics=None,\n",
+    "\ttarget_tensors=None,\t\n",
+    "\toptimizer='rmsprop',\n",
+    "\tmetrics=['accuracy'])\n",
+    "\n",
+    "# https://keras.io/models/sequential/\n",
+    "print(\"fit\")\n",
+    "model.fit(\n",
+    "\tx=train_it,\n",
+    "\ty=None,\n",
+    "\tbatch_size=None,\n",
+    "\tepochs=1,\n",
+    "\tverbose=2,\n",
+    "\tcallbacks=None,\n",
+    "\tvalidation_split=0.0,\n",
+    "\tvalidation_data=val_it,\n",
+    "\tshuffle=True,\n",
+    "\tclass_weight=None,\n",
+    "\tsample_weight=None,\n",
+    "\tinitial_epoch=0,\n",
+    "\tsteps_per_epoch=None,\n",
+    "\tvalidation_steps=None,\n",
+    "\tvalidation_freq=1,\n",
+    "\tmax_queue_size=10,\n",
+    "\tworkers=16,\n",
+    "\tuse_multiprocessing=True)\n",
+    "print(\"fit done\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# evaluate(x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TODO:\n",
+    "# * Generate output to visualize training/validating/testing.\n",
+    "\n",
+    "# Plot, fail\n",
+    "#print(\"plot\")\n",
+    "#plot_model(test_it,dpi=72)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "predict\n",
+      "1/1 - 0s\n",
+      "[[1.]]\n",
+      "Observation: bad\n"
+     ]
+    }
+   ],
+   "source": [
+    "# https://keras.io/models/sequential/\n",
+    "print(\"predict\")\n",
+    "\n",
+    "prediction = model.predict(\n",
+    "\tx=test_it,\n",
+    "\tbatch_size=None,\n",
+    "\tverbose=2,\n",
+    "\tsteps=None,\n",
+    "\tcallbacks=None,\n",
+    "\tmax_queue_size=10,\n",
+    "\tworkers=16,\n",
+    "\tuse_multiprocessing=True)\n",
+    "\n",
+    "print(prediction)\n",
+    "\n",
+    "if prediction[0][0] == 1:\n",
+    "  rating = 'bad'\n",
+    "else:\n",
+    "  rating = 'good'\n",
+    "print('Observation: %s' % (rating))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}