wut-dl-sort-txmode

2020-01-03 14:34:22 -07:00 · 2020-01-03 14:34:22 -07:00 · 210957f770
parent 241f92058d
commit 210957f770
2 changed files with 76 additions and 2 deletions
--- a/README.md
+++ b/README.md
@ -31,6 +31,7 @@ The following scripts are in the repo:
 * `wut-compare` --- Compare an observations' current presumably human vetting with a `wut` vetting.
 * `wut-compare-all` --- Compare all the observations in `download/` with `wut` vettings.
 * `wut-dl-sort` --- Populate `data/` dir with waterfalls from `download/`.
+* `wut-dl-sort-txmode` --- Populate `data/` dir with waterfalls from `download/` using selected encoding.
 * `wut-ml` --- Main machine learning Python script using Tensorflow and Keras.
 * `wut-obs` --- Download the JSON for an observation ID.
 * `wut-review-staging` --- Review all images in `data/staging`.
@ -187,8 +188,8 @@ At present Tensorflow and Keras are used.


 # Caveats
-This is the first machine learning script I've done,
-I know little about satellites and less about radio,
+This is nearly the first machine learning script I've done,
+I know little about radio and less about satellites,
 and I'm not a programmer.


--- a/73
+++ b/73
@ -0,0 +1,73 @@
+#!/bin/bash
+# wut-dl-sort-txmode
+#
+# Populates the data/ directory from the download/dir.
+# Does it just for a specific transmitter mode (encoding)
+# Available encodings:
+# AFSK AFSK1k2 AHRPT APT BPSK BPSK1k2 BPSK9k6 BPSK12k5 BPSK400 CERTO CW DUV
+# FFSK1k2 FM FSK1k2 FSK4k8 FSK9k6 FSK19k2 GFSK1k2 GFSK2k4 GFSK4k8 GFSK9k6
+# GFSK19k2 GFSK Rktr GMSK GMSK1k2 GMSK2k4 GMSK4k8 GMSK9k6 GMSK19k2 HRPT LRPT
+# MSK1k2 MSK2k4 MSK4k8 PSK PSK31 SSTV USB WSJT
+#
+# XXX  This script removes directories in data/  !!! XXX
+#
+# Usage:
+# wut-dl-sort-txmode [Encoding] [Minimum Observation ID] [Maximum Observation ID]
+# Example:
+# wut-dl-sort-txmode CW 1467000 1470000
+#
+# * Takes the files in the download/ dir.
+# * Looks at the JSON files to see if it is :good", "bad", or "failed".
+# * Hard link it in the appropriate data/ directory.
+# * File is randomly copied to either data/train or data/val directory.
+#
+# Possible vetted_status: bad, failed, good, null, unknown.
+set -x
+OBSENC="$1"
+OBSIDMIN="$2"
+OBSIDMAX="$3"
+OBSID=$OBSIDMIN
+
+# Enable the following if you want to download waterfalls in this range:
+#echo "Downloading Waterfalls"
+#./wut-water-range $OBSIDMIN $OBSIDMAX
+
+# XXX remove data/train and data/val directories XXX
+echo "Removing data/ subdirectories"
+rm -rf data/train data/val
+# Create new empty dirs
+mkdir -p data/train/good data/train/bad data/train/failed
+mkdir -p data/val/good   data/val/bad   data/val/failed
+
+# Then parse each file and link appropriately
+echo "Parsing download/ directory for observation IDs $OBSIDMIN to $OBSIDMAX"
+cd download/ || exit
+
+while [ $OBSID -lt $OBSIDMAX ]
+        do echo "ID: $OBSID "
+	cd $OBSID
+	VET=`cat $OBSID.json | jq --compact-output '.[0] | {vetted_status}' | cut -f 2 -d ":" | sed -e 's/}//g' -e 's/"//g'`
+	ENC=`cat $OBSID.json | jq --compact-output '.[0] | {transmitter_mode}' | cut -f 2 -d ":" | sed -e 's/}//g' -e 's/"//g'`
+	if [ $OBSENC = $ENC ] ; then
+		RAND_DIR=`echo $((0 + RANDOM % 2))`
+		if [ $RAND_DIR = 1 ] ; then
+			CLASS_DIR="train"
+		else
+			CLASS_DIR="val"
+		fi
+		case "$VET" in
+			bad) ln waterfall_$OBSID_*.png ../../data/$CLASS_DIR/$VET/
+			;;
+			good) ln waterfall_$OBSID_*.png ../../data/$CLASS_DIR/$VET/
+			;;
+			failed) ln waterfall_$OBSID_*.png ../../data/$CLASS_DIR/$VET/
+			;;
+			null) echo "null, not copying"
+			;;
+			unknown) echo "unknown, not copying"
+			;;
+		esac
+	fi
+        let OBSID=$OBSID+1
+	cd ..
+done