satnogs-wut/wut-dl-sort

68 lines
1.8 KiB
Bash
Executable File

#!/bin/bash
# wut-dl-sort
#
# Populates the data/ directory from the download/dir.
#
# XXX This script removes directories in data/ !!! XXX
#
# Usage:
# wut-dl-sort [Minimum Observation ID] [Maximum Observation ID]
# Example:
# wut-dl-sort 1467000 1470000
# wut-dl-sort 1292461 1470525
#
# * Takes the files in the download/ dir.
# * Looks at the JSON files to see if it is :good", "bad", or "failed".
# * Hard link it in the appropriate data/ directory.
# * File is randomly copied to either data/train or data/val directory.
#
# Possible vetted_status: bad, failed, good, null, unknown.
OBSIDMIN="$1"
OBSIDMAX="$2"
OBSID=$OBSIDMIN
cd /srv/satnogs
# Enable the following if you want to download waterfalls in this range:
#echo "Downloading Waterfalls"
#./wut-water-range $OBSIDMIN $OBSIDMAX
# XXX remove data/train and data/val directories XXX
echo "Removing data/ subdirectories"
rm -rf data/train data/val
# Create new empty dirs
mkdir -p data/train/good data/train/bad data/train/failed
mkdir -p data/val/good data/val/bad data/val/failed
# Then parse each file and link appropriately
echo "Parsing download/ directory for observation IDs $OBSIDMIN to $OBSIDMAX"
cd download/ || exit
while [ $OBSID -lt $OBSIDMAX ]
do echo "ID: $OBSID "
cd $OBSID
VET=`cat $OBSID.json | jq --compact-output '.[0] | {vetted_status}' | cut -f 2 -d ":" | sed -e 's/}//g' -e 's/"//g'`
RAND_DIR=`echo $((0 + RANDOM % 2))`
if [ $RAND_DIR = 1 ] ; then
CLASS_DIR="train"
else
CLASS_DIR="val"
fi
case "$VET" in
bad) ln waterfall_$OBSID_*.png ../../data/$CLASS_DIR/$VET/
;;
good) ln waterfall_$OBSID_*.png ../../data/$CLASS_DIR/$VET/
;;
failed) ln waterfall_$OBSID_*.png ../../data/$CLASS_DIR/$VET/
;;
null) echo "null, not copying"
;;
unknown) echo "unknown, not copying"
;;
esac
let OBSID=$OBSID+1
cd ..
done