#!/bin/bash # wut-dl-sort # # Populates the data/ directory from the download/dir. # # XXX This script removes directories in data/ !!! XXX # # Usage: # wut-dl-sort [Minimum Observation ID] [Maximum Observation ID] # Example: # wut-dl-sort 1467000 1470000 # wut-dl-sort 1292461 1470525 # # * Takes the files in the download/ dir. # * Looks at the JSON files to see if it is :good", "bad", or "failed". # * Hard link it in the appropriate data/ directory. # * File is randomly copied to either data/train or data/val directory. # # Possible vetted_status: bad, failed, good, null, unknown. OBSIDMIN="$1" OBSIDMAX="$2" OBSID=$OBSIDMIN cd /srv/satnogs # Enable the following if you want to download waterfalls in this range: #echo "Downloading Waterfalls" #./wut-water-range $OBSIDMIN $OBSIDMAX # XXX remove data/train and data/val directories XXX echo "Removing data/ subdirectories" rm -rf data/train data/val # Create new empty dirs mkdir -p data/train/good data/train/bad data/train/failed mkdir -p data/val/good data/val/bad data/val/failed # Then parse each file and link appropriately echo "Parsing download/ directory for observation IDs $OBSIDMIN to $OBSIDMAX" cd download/ || exit while [ $OBSID -lt $OBSIDMAX ] do echo "ID: $OBSID " cd $OBSID VET=`cat $OBSID.json | jq --compact-output '.[0] | {vetted_status}' | cut -f 2 -d ":" | sed -e 's/}//g' -e 's/"//g'` RAND_DIR=`echo $((0 + RANDOM % 2))` if [ $RAND_DIR = 1 ] ; then CLASS_DIR="train" else CLASS_DIR="val" fi case "$VET" in bad) ln waterfall_$OBSID_*.png ../../data/$CLASS_DIR/$VET/ ;; good) ln waterfall_$OBSID_*.png ../../data/$CLASS_DIR/$VET/ ;; failed) ln waterfall_$OBSID_*.png ../../data/$CLASS_DIR/$VET/ ;; null) echo "null, not copying" ;; unknown) echo "unknown, not copying" ;; esac let OBSID=$OBSID+1 cd .. done