diff --git a/wut-audio-archive b/wut-audio-archive index 524177d..53c4263 100755 --- a/wut-audio-archive +++ b/wut-audio-archive @@ -19,6 +19,8 @@ # # So to get mostly all of the observations in December, 2019, run: # wut-audio-archive 1292461 1470525 +# Archive.org doesn't have everything from December, 2019 yet. Run: +# wut-audio-archive 1292461 1333333 # # XXX Should check input is sane... @@ -32,7 +34,7 @@ cd $DOWNDIR || exit # Download JSON while [ $OBSID -lt $OBSIDMAX ] - do echo "ID: $OBSID" + do echo "Audio. ID: $OBSID" mkdir -p $OBSID cd $OBSID # Download if is isn't there already diff --git a/wut-audio-sha1 b/wut-audio-sha1 index fec2680..c465dc5 100755 --- a/wut-audio-sha1 +++ b/wut-audio-sha1 @@ -62,6 +62,7 @@ for i in */satnogs_*.ogg echo "Re-download is good" else echo "Still bad after re-downloading" + rm "$AUDIOFILE" fi fi cd .. diff --git a/wut-ml b/wut-ml index 3a52d68..dc5d24e 100755 --- a/wut-ml +++ b/wut-ml @@ -100,6 +100,7 @@ print("add") # * JSON metadata # * TLE # * Audio File (ogg) +# https://www.tensorflow.org/io/api_docs/python/tfio/ffmpeg/AudioDataset # * Decoded Data (HEX, ASCII, PNG) # Data from external sources to consider adding: # * Weather diff --git a/wut-ogg2wav b/wut-ogg2wav new file mode 100755 index 0000000..b1002e2 --- /dev/null +++ b/wut-ogg2wav @@ -0,0 +1,77 @@ +#!/bin/bash +# ogg2wav +# +# Convert .ogg files in downloads/ to .wav files. +# It checks sha1 before converting, re-downloads if bad. +# Dependency: vorbis-tools +# +# Usage: +# ogg2wav +# Example: +# ogg2wav + +OGGDECOPT="" + +cd download/ || exit + +# Compile a list of ogg files (NOTE: THIS WILL BREAK AS ARCHIVE GROWS XXX) +echo "Total audio files: `ls -1 */satnogs_*.ogg | wc -l`" +for i in */satnogs_*.ogg + do OBSID=`dirname $i` + # Go into directories with audiofiles + echo $OBSID + cd $OBSID + # See if there is an archive.org XML file, if not, download it. + XMLURL="https://archive.org/download/satnogs-observation-$OBSID/satnogs-observation-$OBSID""_files.xml" + XMLFILE=`basename "$XMLURL"` + [ ! -f "$XMLFILE" ] && \ + curl \ + --location \ + --silent \ + --http2 --ipv4 \ + --remote-time \ + --output $XMLFILE \ + $XMLURL \ + && sleep `echo $((0 + RANDOM % 1))` + # Get name of audio file. + AUDIOURL=`cat $OBSID.json | jq --compact-output '.[0] | {archive_url}' | grep ogg | cut -f 2- -d : | sed -e 's/}//g' -e 's/http:/https:/g' -e 's/"//g'` + AUDIOFILE=`basename "$AUDIOURL"` + # Get sha1 for audio file. XXX sgrep dependency XXX + AUDIOXMLSHA1=`sgrep -g xml \ + '""' \ + "$XMLFILE" | \ + grep "" | \ + sed -e 's/.*//' -e 's/<\/sha1>//'` + AUDIOFILESHA1=`sha1sum $AUDIOFILE | cut -f 1 -d " "` + echo -e -n "XML: $AUDIOXMLSHA1\nFile: $AUDIOFILESHA1 " +sleep 1 + if [ "$AUDIOXMLSHA1" = "$AUDIOFILESHA1" ] ; then + echo "Encode" + echo "oggdec $OGGDECOPT $AUDIOFILE" + ls -hl $AUDIOFILE *wav + nice oggdec $OGGDECOPT $AUDIOFILE + else + echo "Bad, re-downloading $AUDIOURL" + rm "$AUDIOFILE" + curl \ + --location \ + --silent \ + --http2 --ipv4 \ + --remote-time \ + --output $AUDIOFILE \ + $AUDIOURL \ + && sleep `echo $((0 + RANDOM % 1))` + AUDIOFILESHA1=`sha1sum $AUDIOFILE | cut -f 1 -d " "` + echo -e -n "XML: $AUDIOXMLSHA1\nFile: $AUDIOFILESHA1 " + if [ "$AUDIOXMLSHA1" = "$AUDIOFILESHA1" ] ; then + echo "Re-download is good, encode" + echo 'oggdec "$OGGDECOPT" "$AUDIOFILE"' + nice oggdec $OGGDECOPT $AUDIOFILE + else + echo "Still bad after re-downloading, remove" + rm "$AUDIOFILE" + fi + fi + cd .. +done +