master
ml server 2020-01-10 23:14:03 -07:00
parent 2cc2f87962
commit e033515ace
2 changed files with 257 additions and 105 deletions

View File

@ -2,15 +2,14 @@
"cells": [
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"#!/usr/bin/python3\n",
"# wut-ml\n",
"# wut-ml.ipynb\n",
"#\n",
"# Vet a SatNOGS image using machine learning (guessing).\n",
"# It will vet the image located at test/unvetted/waterfall.png.\n",
"# It will vet the image located at data/test/unvetted/waterfall.png.\n",
"#\n",
"# Note, there is an issue to fix where it will vet everything\n",
"# under the data/test directory, so fix that. For now, just delete\n",
@ -24,7 +23,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@ -40,7 +39,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 4,
"metadata": {},
"outputs": [
{
@ -57,7 +56,114 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"import os"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow.python.keras"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from tensorflow.python.keras import Sequential\n",
"from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Dense\n",
"from tensorflow.python.keras.preprocessing.image import ImageDataGenerator\n",
"from tensorflow.python.keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D\n",
"from tensorflow.python.keras import optimizers\n",
"from tensorflow.python.keras.preprocessing import image\n",
"from tensorflow.python.keras.models import load_model\n",
"from tensorflow.python.keras.preprocessing.image import load_img\n",
"from tensorflow.python.keras.preprocessing.image import img_to_array"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# XXX\n",
"from tensorflow.python.keras.models import Model\n",
"from tensorflow.python.keras.layers import Input, concatenate\n",
"#from tensorflow.python.keras.optimizers import Adam\n",
"\n",
"# XXX Plot\n",
"from tensorflow.python.keras.utils import plot_model\n",
"from tensorflow.python.keras.callbacks import ModelCheckpoint"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"## for visualizing\n",
"#%pylab\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from sklearn.decomposition import PCA"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# Seaborn pip dependency\n",
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# Interact\n",
"# https://ipywidgets.readthedocs.io/en/stable/examples/Using%20Interact.html\n",
"from __future__ import print_function\n",
"from ipywidgets import interact, interactive, fixed, interact_manual\n",
"import ipywidgets as widgets"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# Display Images\n",
"from IPython.display import display, Image"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
@ -69,37 +175,24 @@
}
],
"source": [
"import os\n",
"import numpy as np\n",
"import tensorflow.python.keras\n",
"from tensorflow.python.keras import Sequential\n",
"from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Dense\n",
"from tensorflow.python.keras.preprocessing.image import ImageDataGenerator\n",
"from tensorflow.python.keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D\n",
"from tensorflow.python.keras import optimizers\n",
"from tensorflow.python.keras.preprocessing import image\n",
"from tensorflow.python.keras.models import load_model\n",
"from tensorflow.python.keras.preprocessing.image import load_img\n",
"from tensorflow.python.keras.preprocessing.image import img_to_array\n",
"\n",
"# XXX\n",
"from tensorflow.python.keras.models import Model\n",
"from tensorflow.python.keras.layers import Input, concatenate\n",
"#from tensorflow.python.keras.optimizers import Adam\n",
"\n",
"# XXX Plot\n",
"from tensorflow.python.keras.utils import plot_model\n",
"from tensorflow.python.keras.callbacks import ModelCheckpoint\n",
"## for visualizing \n",
"import matplotlib.pyplot as plt, numpy as np\n",
"from sklearn.decomposition import PCA\n",
"\n",
"print(\"Python import done\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"# Should create interactive slider, I guess not\n",
"#def f(x):\n",
"# return x\n",
"#interact(f, x=10);"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
@ -115,57 +208,103 @@
"# TODO:\n",
"# * Pre-process image\n",
"print(\"datagen\")\n",
"datagen = ImageDataGenerator(\n",
"\tfeaturewise_center=False,\n",
"\tsamplewise_center=False,\n",
"\tfeaturewise_std_normalization=False,\n",
"\tsamplewise_std_normalization=False,\n",
"\tzca_whitening=False,\n",
"\tzca_epsilon=1e-06,\n",
"\trescale=1./255,\n",
"\tshear_range=0.0,\n",
"\tzoom_range=0.0,\n",
"\trotation_range=0,\n",
"\twidth_shift_range=0.0,\n",
"\theight_shift_range=0.0,\n",
"\tbrightness_range=None,\n",
"\tchannel_shift_range=0.0,\n",
"\tfill_mode='nearest',\n",
"\tcval=0.0,\n",
"\thorizontal_flip=False,\n",
"\tvertical_flip=False,\n",
"\tpreprocessing_function=None,\n",
"\tdata_format='channels_last',\n",
"\tvalidation_split=0.0,\n",
"\tdtype='float32')"
"\n",
"datagen = ImageDataGenerator(dtype='float32', zca_epsilon=1e-06, data_format='channels_last')\n",
"\n",
"#datagen = ImageDataGenerator(\n",
"#\tfeaturewise_center=False,\n",
"#\tsamplewise_center=False,\n",
"#\tfeaturewise_std_normalization=False,\n",
"#\tsamplewise_std_normalization=False,\n",
"#\tzca_whitening=False,\n",
"#\tzca_epsilon=1e-06,\n",
"#\trescale=1./255,\n",
"#\tshear_range=0.0,\n",
"#\tzoom_range=0.0,\n",
"#\trotation_range=0,\n",
"#\twidth_shift_range=0.0,\n",
"#\theight_shift_range=0.0,\n",
"#\tbrightness_range=None,\n",
"#\tchannel_shift_range=0.0,\n",
"#\tfill_mode='nearest',\n",
"#\tcval=0.0,\n",
"#\thorizontal_flip=False,\n",
"#\tvertical_flip=False,\n",
"#\tpreprocessing_function=None,\n",
"#\tdata_format='channels_last',\n",
"#\tvalidation_split=0.0,\n",
"#\tdtype='float32')"
]
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"datagen.flow\n",
"Found 761 images belonging to 4 classes.\n",
"Found 741 images belonging to 3 classes.\n",
"import data/train/\n",
"Found 761 images belonging to 4 classes.\n"
]
}
],
"source": [
"print(\"import data/train/\")\n",
"train_it = datagen.flow_from_directory('data/train/', class_mode='binary')"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"import data/val/\n",
"Found 741 images belonging to 3 classes.\n"
]
}
],
"source": [
"print(\"import data/val/\")\n",
"val_it = datagen.flow_from_directory('data/val/', class_mode='binary')"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"import data/test/\n",
"Found 1 images belonging to 1 classes.\n"
]
}
],
"source": [
"print(\"datagen.flow\")\n",
"train_it = datagen.flow_from_directory('data/train/', class_mode='binary')\n",
"val_it = datagen.flow_from_directory('data/val/', class_mode='binary')\n",
"print(\"import data/test/\")\n",
"test_it = datagen.flow_from_directory('data/test/', class_mode='binary')"
]
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"#display(Image('data/test/unvetted/waterfall.png'))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
@ -173,7 +312,7 @@
"output_type": "stream",
"text": [
"train_it.next()\n",
"Batch shape=(32, 256, 256, 3), min=0.000, max=1.000\n"
"Batch shape=(32, 256, 256, 3), min=0.000, max=255.000\n"
]
}
],
@ -185,14 +324,14 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Batch shape=(32, 256, 256, 3), min=0.000, max=1.000\n"
"Batch shape=(32, 256, 256, 3), min=0.000, max=255.000\n"
]
}
],
@ -203,14 +342,14 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Batch shape=(1, 256, 256, 3), min=0.000, max=1.000\n"
"Batch shape=(1, 256, 256, 3), min=0.000, max=255.000\n"
]
}
],
@ -221,7 +360,7 @@
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 24,
"metadata": {},
"outputs": [
{
@ -241,7 +380,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 25,
"metadata": {},
"outputs": [
{
@ -262,7 +401,7 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 26,
"metadata": {},
"outputs": [
{
@ -282,7 +421,7 @@
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": 27,
"metadata": {},
"outputs": [
{
@ -305,7 +444,7 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 28,
"metadata": {},
"outputs": [
{
@ -324,7 +463,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 29,
"metadata": {},
"outputs": [
{
@ -343,7 +482,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 30,
"metadata": {},
"outputs": [
{
@ -361,7 +500,7 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 31,
"metadata": {},
"outputs": [
{
@ -379,7 +518,7 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 32,
"metadata": {},
"outputs": [
{
@ -397,7 +536,7 @@
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 33,
"metadata": {},
"outputs": [
{
@ -415,7 +554,7 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 34,
"metadata": {},
"outputs": [
{
@ -433,7 +572,7 @@
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": 35,
"metadata": {},
"outputs": [
{
@ -451,7 +590,7 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 36,
"metadata": {},
"outputs": [
{
@ -470,7 +609,7 @@
},
{
"cell_type": "code",
"execution_count": 51,
"execution_count": 37,
"metadata": {},
"outputs": [
{
@ -489,7 +628,7 @@
},
{
"cell_type": "code",
"execution_count": 52,
"execution_count": 38,
"metadata": {},
"outputs": [
{
@ -507,7 +646,7 @@
},
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 39,
"metadata": {},
"outputs": [
{
@ -526,7 +665,7 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 40,
"metadata": {},
"outputs": [
{
@ -544,7 +683,7 @@
},
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 41,
"metadata": {},
"outputs": [
{
@ -562,7 +701,7 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 42,
"metadata": {},
"outputs": [
{
@ -588,7 +727,7 @@
},
{
"cell_type": "code",
"execution_count": 57,
"execution_count": 43,
"metadata": {},
"outputs": [
{
@ -596,8 +735,8 @@
"output_type": "stream",
"text": [
"Fit Start\n",
"Train for 24 steps, validate for 24 steps\n",
"24/24 [==============================] - 60s 3s/step - loss: 0.0000e+00 - accuracy: 0.1209 - val_loss: 0.0000e+00 - val_accuracy: 0.0013\n",
"Train for 4 steps, validate for 4 steps\n",
"4/4 [==============================] - 34s 9s/step - loss: 0.0000e+00 - accuracy: 0.1328 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00\n",
"Fit Done\n"
]
}
@ -618,8 +757,8 @@
"\tclass_weight=None,\n",
"\tsample_weight=None,\n",
"\tinitial_epoch=0,\n",
"\tsteps_per_epoch=None,\n",
"\tvalidation_steps=None,\n",
"\tsteps_per_epoch=4,\n",
"\tvalidation_steps=4,\n",
"\tvalidation_freq=1,\n",
"\tmax_queue_size=10,\n",
"\tworkers=16,\n",
@ -629,7 +768,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
@ -638,7 +777,7 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": 45,
"metadata": {},
"outputs": [
{
@ -656,7 +795,7 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 46,
"metadata": {},
"outputs": [
{
@ -671,13 +810,14 @@
"# TODO:\n",
"# * Generate output to visualize training/validating/testing.\n",
"# Plot, fail\n",
"#%matplotlib inline\n",
"print(\"Plot\")\n",
"#plot_model(test_it,dpi=72)"
"#plot_model(test_it)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": 47,
"metadata": {},
"outputs": [
{
@ -685,18 +825,22 @@
"output_type": "stream",
"text": [
"predict\n",
"1/1 - 0s\n"
"1/2 [==============>...............] - ETA: 0sWARNING:tensorflow:Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches (in this case, 2 batches). You may need to use the repeat() function when building your dataset.\n"
]
}
],
"source": [
"# https://keras.io/models/sequential/\n",
"print(\"predict\")\n",
"\n",
"#prediction = model.predict(x=test_it, batch_size=None, verbose=1, steps=None, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False)\n",
"#prediction = model.predict(x=test_it, batch_size=None, verbose=1, steps=None, use_multiprocessing=True)\n",
"\n",
"prediction = model.predict(\n",
"\tx=test_it,\n",
"\tbatch_size=None,\n",
"\tverbose=1,\n",
"\tsteps=None,\n",
"\tsteps=2,\n",
"\tcallbacks=None,\n",
"\tmax_queue_size=10,\n",
"\tworkers=16,\n",
@ -705,15 +849,14 @@
},
{
"cell_type": "code",
"execution_count": 61,
"execution_count": 48,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[1.]]\n",
"Observation: bad\n"
"[[1.]]\n"
]
}
],
@ -724,9 +867,17 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 49,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Observation: bad\n"
]
}
],
"source": [
"# Make final prediction\n",
"if prediction[0][0] == 1:\n",

View File

@ -9,6 +9,7 @@
# wut-dl-sort [Minimum Observation ID] [Maximum Observation ID]
# Example:
# wut-dl-sort 1467000 1470000
# wut-dl-sort 1292461 1470525
#
# * Takes the files in the download/ dir.
# * Looks at the JSON files to see if it is :good", "bad", or "failed".