From 210957f770574330391bc7cb9bf18eaddd568cab Mon Sep 17 00:00:00 2001 From: ml server Date: Fri, 3 Jan 2020 14:34:22 -0700 Subject: [PATCH] wut-dl-sort-txmode --- README.md | 5 ++-- wut-dl-sort-txmode | 73 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 2 deletions(-) create mode 100755 wut-dl-sort-txmode diff --git a/README.md b/README.md index 11b15d4..01eb168 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ The following scripts are in the repo: * `wut-compare` --- Compare an observations' current presumably human vetting with a `wut` vetting. * `wut-compare-all` --- Compare all the observations in `download/` with `wut` vettings. * `wut-dl-sort` --- Populate `data/` dir with waterfalls from `download/`. +* `wut-dl-sort-txmode` --- Populate `data/` dir with waterfalls from `download/` using selected encoding. * `wut-ml` --- Main machine learning Python script using Tensorflow and Keras. * `wut-obs` --- Download the JSON for an observation ID. * `wut-review-staging` --- Review all images in `data/staging`. @@ -187,8 +188,8 @@ At present Tensorflow and Keras are used. # Caveats -This is the first machine learning script I've done, -I know little about satellites and less about radio, +This is nearly the first machine learning script I've done, +I know little about radio and less about satellites, and I'm not a programmer. diff --git a/wut-dl-sort-txmode b/wut-dl-sort-txmode new file mode 100755 index 0000000..b7b8a14 --- /dev/null +++ b/wut-dl-sort-txmode @@ -0,0 +1,73 @@ +#!/bin/bash +# wut-dl-sort-txmode +# +# Populates the data/ directory from the download/dir. +# Does it just for a specific transmitter mode (encoding) +# Available encodings: +# AFSK AFSK1k2 AHRPT APT BPSK BPSK1k2 BPSK9k6 BPSK12k5 BPSK400 CERTO CW DUV +# FFSK1k2 FM FSK1k2 FSK4k8 FSK9k6 FSK19k2 GFSK1k2 GFSK2k4 GFSK4k8 GFSK9k6 +# GFSK19k2 GFSK Rktr GMSK GMSK1k2 GMSK2k4 GMSK4k8 GMSK9k6 GMSK19k2 HRPT LRPT +# MSK1k2 MSK2k4 MSK4k8 PSK PSK31 SSTV USB WSJT +# +# XXX This script removes directories in data/ !!! XXX +# +# Usage: +# wut-dl-sort-txmode [Encoding] [Minimum Observation ID] [Maximum Observation ID] +# Example: +# wut-dl-sort-txmode CW 1467000 1470000 +# +# * Takes the files in the download/ dir. +# * Looks at the JSON files to see if it is :good", "bad", or "failed". +# * Hard link it in the appropriate data/ directory. +# * File is randomly copied to either data/train or data/val directory. +# +# Possible vetted_status: bad, failed, good, null, unknown. +set -x +OBSENC="$1" +OBSIDMIN="$2" +OBSIDMAX="$3" +OBSID=$OBSIDMIN + +# Enable the following if you want to download waterfalls in this range: +#echo "Downloading Waterfalls" +#./wut-water-range $OBSIDMIN $OBSIDMAX + +# XXX remove data/train and data/val directories XXX +echo "Removing data/ subdirectories" +rm -rf data/train data/val +# Create new empty dirs +mkdir -p data/train/good data/train/bad data/train/failed +mkdir -p data/val/good data/val/bad data/val/failed + +# Then parse each file and link appropriately +echo "Parsing download/ directory for observation IDs $OBSIDMIN to $OBSIDMAX" +cd download/ || exit + +while [ $OBSID -lt $OBSIDMAX ] + do echo "ID: $OBSID " + cd $OBSID + VET=`cat $OBSID.json | jq --compact-output '.[0] | {vetted_status}' | cut -f 2 -d ":" | sed -e 's/}//g' -e 's/"//g'` + ENC=`cat $OBSID.json | jq --compact-output '.[0] | {transmitter_mode}' | cut -f 2 -d ":" | sed -e 's/}//g' -e 's/"//g'` + if [ $OBSENC = $ENC ] ; then + RAND_DIR=`echo $((0 + RANDOM % 2))` + if [ $RAND_DIR = 1 ] ; then + CLASS_DIR="train" + else + CLASS_DIR="val" + fi + case "$VET" in + bad) ln waterfall_$OBSID_*.png ../../data/$CLASS_DIR/$VET/ + ;; + good) ln waterfall_$OBSID_*.png ../../data/$CLASS_DIR/$VET/ + ;; + failed) ln waterfall_$OBSID_*.png ../../data/$CLASS_DIR/$VET/ + ;; + null) echo "null, not copying" + ;; + unknown) echo "unknown, not copying" + ;; + esac + fi + let OBSID=$OBSID+1 + cd .. +done