parent
241f92058d
commit
210957f770
|
@ -31,6 +31,7 @@ The following scripts are in the repo:
|
|||
* `wut-compare` --- Compare an observations' current presumably human vetting with a `wut` vetting.
|
||||
* `wut-compare-all` --- Compare all the observations in `download/` with `wut` vettings.
|
||||
* `wut-dl-sort` --- Populate `data/` dir with waterfalls from `download/`.
|
||||
* `wut-dl-sort-txmode` --- Populate `data/` dir with waterfalls from `download/` using selected encoding.
|
||||
* `wut-ml` --- Main machine learning Python script using Tensorflow and Keras.
|
||||
* `wut-obs` --- Download the JSON for an observation ID.
|
||||
* `wut-review-staging` --- Review all images in `data/staging`.
|
||||
|
@ -187,8 +188,8 @@ At present Tensorflow and Keras are used.
|
|||
|
||||
|
||||
# Caveats
|
||||
This is the first machine learning script I've done,
|
||||
I know little about satellites and less about radio,
|
||||
This is nearly the first machine learning script I've done,
|
||||
I know little about radio and less about satellites,
|
||||
and I'm not a programmer.
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
#!/bin/bash
|
||||
# wut-dl-sort-txmode
|
||||
#
|
||||
# Populates the data/ directory from the download/dir.
|
||||
# Does it just for a specific transmitter mode (encoding)
|
||||
# Available encodings:
|
||||
# AFSK AFSK1k2 AHRPT APT BPSK BPSK1k2 BPSK9k6 BPSK12k5 BPSK400 CERTO CW DUV
|
||||
# FFSK1k2 FM FSK1k2 FSK4k8 FSK9k6 FSK19k2 GFSK1k2 GFSK2k4 GFSK4k8 GFSK9k6
|
||||
# GFSK19k2 GFSK Rktr GMSK GMSK1k2 GMSK2k4 GMSK4k8 GMSK9k6 GMSK19k2 HRPT LRPT
|
||||
# MSK1k2 MSK2k4 MSK4k8 PSK PSK31 SSTV USB WSJT
|
||||
#
|
||||
# XXX This script removes directories in data/ !!! XXX
|
||||
#
|
||||
# Usage:
|
||||
# wut-dl-sort-txmode [Encoding] [Minimum Observation ID] [Maximum Observation ID]
|
||||
# Example:
|
||||
# wut-dl-sort-txmode CW 1467000 1470000
|
||||
#
|
||||
# * Takes the files in the download/ dir.
|
||||
# * Looks at the JSON files to see if it is :good", "bad", or "failed".
|
||||
# * Hard link it in the appropriate data/ directory.
|
||||
# * File is randomly copied to either data/train or data/val directory.
|
||||
#
|
||||
# Possible vetted_status: bad, failed, good, null, unknown.
|
||||
set -x
|
||||
OBSENC="$1"
|
||||
OBSIDMIN="$2"
|
||||
OBSIDMAX="$3"
|
||||
OBSID=$OBSIDMIN
|
||||
|
||||
# Enable the following if you want to download waterfalls in this range:
|
||||
#echo "Downloading Waterfalls"
|
||||
#./wut-water-range $OBSIDMIN $OBSIDMAX
|
||||
|
||||
# XXX remove data/train and data/val directories XXX
|
||||
echo "Removing data/ subdirectories"
|
||||
rm -rf data/train data/val
|
||||
# Create new empty dirs
|
||||
mkdir -p data/train/good data/train/bad data/train/failed
|
||||
mkdir -p data/val/good data/val/bad data/val/failed
|
||||
|
||||
# Then parse each file and link appropriately
|
||||
echo "Parsing download/ directory for observation IDs $OBSIDMIN to $OBSIDMAX"
|
||||
cd download/ || exit
|
||||
|
||||
while [ $OBSID -lt $OBSIDMAX ]
|
||||
do echo "ID: $OBSID "
|
||||
cd $OBSID
|
||||
VET=`cat $OBSID.json | jq --compact-output '.[0] | {vetted_status}' | cut -f 2 -d ":" | sed -e 's/}//g' -e 's/"//g'`
|
||||
ENC=`cat $OBSID.json | jq --compact-output '.[0] | {transmitter_mode}' | cut -f 2 -d ":" | sed -e 's/}//g' -e 's/"//g'`
|
||||
if [ $OBSENC = $ENC ] ; then
|
||||
RAND_DIR=`echo $((0 + RANDOM % 2))`
|
||||
if [ $RAND_DIR = 1 ] ; then
|
||||
CLASS_DIR="train"
|
||||
else
|
||||
CLASS_DIR="val"
|
||||
fi
|
||||
case "$VET" in
|
||||
bad) ln waterfall_$OBSID_*.png ../../data/$CLASS_DIR/$VET/
|
||||
;;
|
||||
good) ln waterfall_$OBSID_*.png ../../data/$CLASS_DIR/$VET/
|
||||
;;
|
||||
failed) ln waterfall_$OBSID_*.png ../../data/$CLASS_DIR/$VET/
|
||||
;;
|
||||
null) echo "null, not copying"
|
||||
;;
|
||||
unknown) echo "unknown, not copying"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
let OBSID=$OBSID+1
|
||||
cd ..
|
||||
done
|
Loading…
Reference in New Issue