jupyter

2020-01-10 23:14:03 -07:00 · 2020-01-10 23:14:03 -07:00 · e033515ace
parent 2cc2f87962
commit e033515ace
2 changed files with 257 additions and 105 deletions
--- a/jupyter/wut-ml.ipynb
+++ b/jupyter/wut-ml.ipynb
@ -2,15 +2,14 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
-    "#!/usr/bin/python3\n",
-    "# wut-ml\n",
+    "# wut-ml.ipynb\n",
    "#\n",
    "# Vet a SatNOGS image using machine learning (guessing).\n",
-    "# It will vet the image located at test/unvetted/waterfall.png.\n",
+    "# It will vet the image located at data/test/unvetted/waterfall.png.\n",
    "#\n",
    "# Note, there is an issue to fix where it will vet everything\n",
    "# under the data/test directory, so fix that. For now, just delete\n",
@ -24,7 +23,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
@ -40,7 +39,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
@ -57,7 +56,114 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import tensorflow.python.keras"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from tensorflow.python.keras import Sequential\n",
+    "from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Dense\n",
+    "from tensorflow.python.keras.preprocessing.image import ImageDataGenerator\n",
+    "from tensorflow.python.keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D\n",
+    "from tensorflow.python.keras import optimizers\n",
+    "from tensorflow.python.keras.preprocessing import image\n",
+    "from tensorflow.python.keras.models import load_model\n",
+    "from tensorflow.python.keras.preprocessing.image import load_img\n",
+    "from tensorflow.python.keras.preprocessing.image import img_to_array"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# XXX\n",
+    "from tensorflow.python.keras.models import Model\n",
+    "from tensorflow.python.keras.layers import Input, concatenate\n",
+    "#from tensorflow.python.keras.optimizers import Adam\n",
+    "\n",
+    "# XXX Plot\n",
+    "from tensorflow.python.keras.utils import plot_model\n",
+    "from tensorflow.python.keras.callbacks import ModelCheckpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "## for visualizing\n",
+    "#%pylab\n",
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "from sklearn.decomposition import PCA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Seaborn pip dependency\n",
+    "import seaborn as sns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Interact\n",
+    "# https://ipywidgets.readthedocs.io/en/stable/examples/Using%20Interact.html\n",
+    "from __future__ import print_function\n",
+    "from ipywidgets import interact, interactive, fixed, interact_manual\n",
+    "import ipywidgets as widgets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Display Images\n",
+    "from IPython.display import display, Image"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
@ -69,37 +175,24 @@
    }
   ],
   "source": [
-    "import os\n",
-    "import numpy as np\n",
-    "import tensorflow.python.keras\n",
-    "from tensorflow.python.keras import Sequential\n",
-    "from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Dense\n",
-    "from tensorflow.python.keras.preprocessing.image import ImageDataGenerator\n",
-    "from tensorflow.python.keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D\n",
-    "from tensorflow.python.keras import optimizers\n",
-    "from tensorflow.python.keras.preprocessing import image\n",
-    "from tensorflow.python.keras.models import load_model\n",
-    "from tensorflow.python.keras.preprocessing.image import load_img\n",
-    "from tensorflow.python.keras.preprocessing.image import img_to_array\n",
-    "\n",
-    "# XXX\n",
-    "from tensorflow.python.keras.models import Model\n",
-    "from tensorflow.python.keras.layers import Input, concatenate\n",
-    "#from tensorflow.python.keras.optimizers import Adam\n",
-    "\n",
-    "# XXX Plot\n",
-    "from tensorflow.python.keras.utils import plot_model\n",
-    "from tensorflow.python.keras.callbacks import ModelCheckpoint\n",
-    "## for visualizing \n",
-    "import matplotlib.pyplot as plt, numpy as np\n",
-    "from sklearn.decomposition import PCA\n",
-    "\n",
    "print(\"Python import done\")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Should create interactive slider, I guess not\n",
+    "#def f(x):\n",
+    "#    return x\n",
+    "#interact(f, x=10);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
@ -115,57 +208,103 @@
    "# TODO:\n",
    "# * Pre-process image\n",
    "print(\"datagen\")\n",
-    "datagen = ImageDataGenerator(\n",
-    "\tfeaturewise_center=False,\n",
-    "\tsamplewise_center=False,\n",
-    "\tfeaturewise_std_normalization=False,\n",
-    "\tsamplewise_std_normalization=False,\n",
-    "\tzca_whitening=False,\n",
-    "\tzca_epsilon=1e-06,\n",
-    "\trescale=1./255,\n",
-    "\tshear_range=0.0,\n",
-    "\tzoom_range=0.0,\n",
-    "\trotation_range=0,\n",
-    "\twidth_shift_range=0.0,\n",
-    "\theight_shift_range=0.0,\n",
-    "\tbrightness_range=None,\n",
-    "\tchannel_shift_range=0.0,\n",
-    "\tfill_mode='nearest',\n",
-    "\tcval=0.0,\n",
-    "\thorizontal_flip=False,\n",
-    "\tvertical_flip=False,\n",
-    "\tpreprocessing_function=None,\n",
-    "\tdata_format='channels_last',\n",
-    "\tvalidation_split=0.0,\n",
-    "\tdtype='float32')"
+    "\n",
+    "datagen = ImageDataGenerator(dtype='float32', zca_epsilon=1e-06, data_format='channels_last')\n",
+    "\n",
+    "#datagen = ImageDataGenerator(\n",
+    "#\tfeaturewise_center=False,\n",
+    "#\tsamplewise_center=False,\n",
+    "#\tfeaturewise_std_normalization=False,\n",
+    "#\tsamplewise_std_normalization=False,\n",
+    "#\tzca_whitening=False,\n",
+    "#\tzca_epsilon=1e-06,\n",
+    "#\trescale=1./255,\n",
+    "#\tshear_range=0.0,\n",
+    "#\tzoom_range=0.0,\n",
+    "#\trotation_range=0,\n",
+    "#\twidth_shift_range=0.0,\n",
+    "#\theight_shift_range=0.0,\n",
+    "#\tbrightness_range=None,\n",
+    "#\tchannel_shift_range=0.0,\n",
+    "#\tfill_mode='nearest',\n",
+    "#\tcval=0.0,\n",
+    "#\thorizontal_flip=False,\n",
+    "#\tvertical_flip=False,\n",
+    "#\tpreprocessing_function=None,\n",
+    "#\tdata_format='channels_last',\n",
+    "#\tvalidation_split=0.0,\n",
+    "#\tdtype='float32')"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "datagen.flow\n",
-      "Found 761 images belonging to 4 classes.\n",
-      "Found 741 images belonging to 3 classes.\n",
+      "import data/train/\n",
+      "Found 761 images belonging to 4 classes.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"import data/train/\")\n",
+    "train_it = datagen.flow_from_directory('data/train/', class_mode='binary')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "import data/val/\n",
+      "Found 741 images belonging to 3 classes.\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"import data/val/\")\n",
+    "val_it = datagen.flow_from_directory('data/val/', class_mode='binary')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "import data/test/\n",
      "Found 1 images belonging to 1 classes.\n"
     ]
    }
   ],
   "source": [
-    "print(\"datagen.flow\")\n",
-    "train_it = datagen.flow_from_directory('data/train/', class_mode='binary')\n",
-    "val_it = datagen.flow_from_directory('data/val/', class_mode='binary')\n",
+    "print(\"import data/test/\")\n",
    "test_it = datagen.flow_from_directory('data/test/', class_mode='binary')"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#display(Image('data/test/unvetted/waterfall.png'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
@ -173,7 +312,7 @@
     "output_type": "stream",
     "text": [
      "train_it.next()\n",
-      "Batch shape=(32, 256, 256, 3), min=0.000, max=1.000\n"
+      "Batch shape=(32, 256, 256, 3), min=0.000, max=255.000\n"
     ]
    }
   ],
@ -185,14 +324,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Batch shape=(32, 256, 256, 3), min=0.000, max=1.000\n"
+      "Batch shape=(32, 256, 256, 3), min=0.000, max=255.000\n"
     ]
    }
   ],
@ -203,14 +342,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Batch shape=(1, 256, 256, 3), min=0.000, max=1.000\n"
+      "Batch shape=(1, 256, 256, 3), min=0.000, max=255.000\n"
     ]
    }
   ],
@ -221,7 +360,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
@ -241,7 +380,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
@ -262,7 +401,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
@ -282,7 +421,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
@ -305,7 +444,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
@ -324,7 +463,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
@ -343,7 +482,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
@ -361,7 +500,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
@ -379,7 +518,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
@ -397,7 +536,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
@ -415,7 +554,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
@ -433,7 +572,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
@ -451,7 +590,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
@ -470,7 +609,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
@ -489,7 +628,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
@ -507,7 +646,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
@ -526,7 +665,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
@ -544,7 +683,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 55,
+   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
@ -562,7 +701,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 56,
+   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
@ -588,7 +727,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 57,
+   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
@ -596,8 +735,8 @@
     "output_type": "stream",
     "text": [
      "Fit Start\n",
-      "Train for 24 steps, validate for 24 steps\n",
-      "24/24 [==============================] - 60s 3s/step - loss: 0.0000e+00 - accuracy: 0.1209 - val_loss: 0.0000e+00 - val_accuracy: 0.0013\n",
+      "Train for 4 steps, validate for 4 steps\n",
+      "4/4 [==============================] - 34s 9s/step - loss: 0.0000e+00 - accuracy: 0.1328 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00\n",
      "Fit Done\n"
     ]
    }
@ -618,8 +757,8 @@
    "\tclass_weight=None,\n",
    "\tsample_weight=None,\n",
    "\tinitial_epoch=0,\n",
-    "\tsteps_per_epoch=None,\n",
-    "\tvalidation_steps=None,\n",
+    "\tsteps_per_epoch=4,\n",
+    "\tvalidation_steps=4,\n",
    "\tvalidation_freq=1,\n",
    "\tmax_queue_size=10,\n",
    "\tworkers=16,\n",
@ -629,7 +768,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
@ -638,7 +777,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
@ -656,7 +795,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
@ -671,13 +810,14 @@
    "# TODO:\n",
    "# * Generate output to visualize training/validating/testing.\n",
    "# Plot, fail\n",
+    "#%matplotlib inline\n",
    "print(\"Plot\")\n",
-    "#plot_model(test_it,dpi=72)"
+    "#plot_model(test_it)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
@ -685,18 +825,22 @@
     "output_type": "stream",
     "text": [
      "predict\n",
-      "1/1 - 0s\n"
+      "1/2 [==============>...............] - ETA: 0sWARNING:tensorflow:Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches (in this case, 2 batches). You may need to use the repeat() function when building your dataset.\n"
     ]
    }
   ],
   "source": [
    "# https://keras.io/models/sequential/\n",
    "print(\"predict\")\n",
+    "\n",
+    "#prediction = model.predict(x=test_it, batch_size=None, verbose=1, steps=None, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False)\n",
+    "#prediction = model.predict(x=test_it, batch_size=None, verbose=1, steps=None, use_multiprocessing=True)\n",
+    "\n",
    "prediction = model.predict(\n",
    "\tx=test_it,\n",
    "\tbatch_size=None,\n",
    "\tverbose=1,\n",
-    "\tsteps=None,\n",
+    "\tsteps=2,\n",
    "\tcallbacks=None,\n",
    "\tmax_queue_size=10,\n",
    "\tworkers=16,\n",
@ -705,15 +849,14 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "[[1.]]\n",
-      "Observation: bad\n"
+      "[[1.]]\n"
     ]
    }
   ],
@ -724,9 +867,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 49,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Observation: bad\n"
+     ]
+    }
+   ],
   "source": [
    "# Make final prediction\n",
    "if prediction[0][0] == 1:\n",
--- a/1
+++ b/1
@ -9,6 +9,7 @@
 # wut-dl-sort [Minimum Observation ID] [Maximum Observation ID]
 # Example:
 # wut-dl-sort 1467000 1470000
+# wut-dl-sort 1292461 1470525
 #
 # * Takes the files in the download/ dir.
 # * Looks at the JSON files to see if it is :good", "bad", or "failed".