parent
bdeb891fee
commit
da2c7d2c49
|
@ -19,6 +19,8 @@
|
|||
#
|
||||
# So to get mostly all of the observations in December, 2019, run:
|
||||
# wut-audio-archive 1292461 1470525
|
||||
# Archive.org doesn't have everything from December, 2019 yet. Run:
|
||||
# wut-audio-archive 1292461 1333333
|
||||
#
|
||||
# XXX Should check input is sane...
|
||||
|
||||
|
@ -32,7 +34,7 @@ cd $DOWNDIR || exit
|
|||
|
||||
# Download JSON
|
||||
while [ $OBSID -lt $OBSIDMAX ]
|
||||
do echo "ID: $OBSID"
|
||||
do echo "Audio. ID: $OBSID"
|
||||
mkdir -p $OBSID
|
||||
cd $OBSID
|
||||
# Download if is isn't there already
|
||||
|
|
|
@ -62,6 +62,7 @@ for i in */satnogs_*.ogg
|
|||
echo "Re-download is good"
|
||||
else
|
||||
echo "Still bad after re-downloading"
|
||||
rm "$AUDIOFILE"
|
||||
fi
|
||||
fi
|
||||
cd ..
|
||||
|
|
1
wut-ml
1
wut-ml
|
@ -100,6 +100,7 @@ print("add")
|
|||
# * JSON metadata
|
||||
# * TLE
|
||||
# * Audio File (ogg)
|
||||
# https://www.tensorflow.org/io/api_docs/python/tfio/ffmpeg/AudioDataset
|
||||
# * Decoded Data (HEX, ASCII, PNG)
|
||||
# Data from external sources to consider adding:
|
||||
# * Weather
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
#!/bin/bash
|
||||
# ogg2wav
|
||||
#
|
||||
# Convert .ogg files in downloads/ to .wav files.
|
||||
# It checks sha1 before converting, re-downloads if bad.
|
||||
# Dependency: vorbis-tools
|
||||
#
|
||||
# Usage:
|
||||
# ogg2wav
|
||||
# Example:
|
||||
# ogg2wav
|
||||
|
||||
OGGDECOPT=""
|
||||
|
||||
cd download/ || exit
|
||||
|
||||
# Compile a list of ogg files (NOTE: THIS WILL BREAK AS ARCHIVE GROWS XXX)
|
||||
echo "Total audio files: `ls -1 */satnogs_*.ogg | wc -l`"
|
||||
for i in */satnogs_*.ogg
|
||||
do OBSID=`dirname $i`
|
||||
# Go into directories with audiofiles
|
||||
echo $OBSID
|
||||
cd $OBSID
|
||||
# See if there is an archive.org XML file, if not, download it.
|
||||
XMLURL="https://archive.org/download/satnogs-observation-$OBSID/satnogs-observation-$OBSID""_files.xml"
|
||||
XMLFILE=`basename "$XMLURL"`
|
||||
[ ! -f "$XMLFILE" ] && \
|
||||
curl \
|
||||
--location \
|
||||
--silent \
|
||||
--http2 --ipv4 \
|
||||
--remote-time \
|
||||
--output $XMLFILE \
|
||||
$XMLURL \
|
||||
&& sleep `echo $((0 + RANDOM % 1))`
|
||||
# Get name of audio file.
|
||||
AUDIOURL=`cat $OBSID.json | jq --compact-output '.[0] | {archive_url}' | grep ogg | cut -f 2- -d : | sed -e 's/}//g' -e 's/http:/https:/g' -e 's/"//g'`
|
||||
AUDIOFILE=`basename "$AUDIOURL"`
|
||||
# Get sha1 for audio file. XXX sgrep dependency XXX
|
||||
AUDIOXMLSHA1=`sgrep -g xml \
|
||||
'"<file name=\"'$AUDIOFILE'\" source=\"original\"" .. "/file>"' \
|
||||
"$XMLFILE" | \
|
||||
grep "<sha1>" | \
|
||||
sed -e 's/.*<sha1>//' -e 's/<\/sha1>//'`
|
||||
AUDIOFILESHA1=`sha1sum $AUDIOFILE | cut -f 1 -d " "`
|
||||
echo -e -n "XML: $AUDIOXMLSHA1\nFile: $AUDIOFILESHA1 "
|
||||
sleep 1
|
||||
if [ "$AUDIOXMLSHA1" = "$AUDIOFILESHA1" ] ; then
|
||||
echo "Encode"
|
||||
echo "oggdec $OGGDECOPT $AUDIOFILE"
|
||||
ls -hl $AUDIOFILE *wav
|
||||
nice oggdec $OGGDECOPT $AUDIOFILE
|
||||
else
|
||||
echo "Bad, re-downloading $AUDIOURL"
|
||||
rm "$AUDIOFILE"
|
||||
curl \
|
||||
--location \
|
||||
--silent \
|
||||
--http2 --ipv4 \
|
||||
--remote-time \
|
||||
--output $AUDIOFILE \
|
||||
$AUDIOURL \
|
||||
&& sleep `echo $((0 + RANDOM % 1))`
|
||||
AUDIOFILESHA1=`sha1sum $AUDIOFILE | cut -f 1 -d " "`
|
||||
echo -e -n "XML: $AUDIOXMLSHA1\nFile: $AUDIOFILESHA1 "
|
||||
if [ "$AUDIOXMLSHA1" = "$AUDIOFILESHA1" ] ; then
|
||||
echo "Re-download is good, encode"
|
||||
echo 'oggdec "$OGGDECOPT" "$AUDIOFILE"'
|
||||
nice oggdec $OGGDECOPT $AUDIOFILE
|
||||
else
|
||||
echo "Still bad after re-downloading, remove"
|
||||
rm "$AUDIOFILE"
|
||||
fi
|
||||
fi
|
||||
cd ..
|
||||
done
|
||||
|
Loading…
Reference in New Issue