parent
bdeb891fee
commit
da2c7d2c49
|
@ -19,6 +19,8 @@
|
||||||
#
|
#
|
||||||
# So to get mostly all of the observations in December, 2019, run:
|
# So to get mostly all of the observations in December, 2019, run:
|
||||||
# wut-audio-archive 1292461 1470525
|
# wut-audio-archive 1292461 1470525
|
||||||
|
# Archive.org doesn't have everything from December, 2019 yet. Run:
|
||||||
|
# wut-audio-archive 1292461 1333333
|
||||||
#
|
#
|
||||||
# XXX Should check input is sane...
|
# XXX Should check input is sane...
|
||||||
|
|
||||||
|
@ -32,7 +34,7 @@ cd $DOWNDIR || exit
|
||||||
|
|
||||||
# Download JSON
|
# Download JSON
|
||||||
while [ $OBSID -lt $OBSIDMAX ]
|
while [ $OBSID -lt $OBSIDMAX ]
|
||||||
do echo "ID: $OBSID"
|
do echo "Audio. ID: $OBSID"
|
||||||
mkdir -p $OBSID
|
mkdir -p $OBSID
|
||||||
cd $OBSID
|
cd $OBSID
|
||||||
# Download if is isn't there already
|
# Download if is isn't there already
|
||||||
|
|
|
@ -62,6 +62,7 @@ for i in */satnogs_*.ogg
|
||||||
echo "Re-download is good"
|
echo "Re-download is good"
|
||||||
else
|
else
|
||||||
echo "Still bad after re-downloading"
|
echo "Still bad after re-downloading"
|
||||||
|
rm "$AUDIOFILE"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
cd ..
|
cd ..
|
||||||
|
|
1
wut-ml
1
wut-ml
|
@ -100,6 +100,7 @@ print("add")
|
||||||
# * JSON metadata
|
# * JSON metadata
|
||||||
# * TLE
|
# * TLE
|
||||||
# * Audio File (ogg)
|
# * Audio File (ogg)
|
||||||
|
# https://www.tensorflow.org/io/api_docs/python/tfio/ffmpeg/AudioDataset
|
||||||
# * Decoded Data (HEX, ASCII, PNG)
|
# * Decoded Data (HEX, ASCII, PNG)
|
||||||
# Data from external sources to consider adding:
|
# Data from external sources to consider adding:
|
||||||
# * Weather
|
# * Weather
|
||||||
|
|
|
@ -0,0 +1,77 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# ogg2wav
|
||||||
|
#
|
||||||
|
# Convert .ogg files in downloads/ to .wav files.
|
||||||
|
# It checks sha1 before converting, re-downloads if bad.
|
||||||
|
# Dependency: vorbis-tools
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ogg2wav
|
||||||
|
# Example:
|
||||||
|
# ogg2wav
|
||||||
|
|
||||||
|
OGGDECOPT=""
|
||||||
|
|
||||||
|
cd download/ || exit
|
||||||
|
|
||||||
|
# Compile a list of ogg files (NOTE: THIS WILL BREAK AS ARCHIVE GROWS XXX)
|
||||||
|
echo "Total audio files: `ls -1 */satnogs_*.ogg | wc -l`"
|
||||||
|
for i in */satnogs_*.ogg
|
||||||
|
do OBSID=`dirname $i`
|
||||||
|
# Go into directories with audiofiles
|
||||||
|
echo $OBSID
|
||||||
|
cd $OBSID
|
||||||
|
# See if there is an archive.org XML file, if not, download it.
|
||||||
|
XMLURL="https://archive.org/download/satnogs-observation-$OBSID/satnogs-observation-$OBSID""_files.xml"
|
||||||
|
XMLFILE=`basename "$XMLURL"`
|
||||||
|
[ ! -f "$XMLFILE" ] && \
|
||||||
|
curl \
|
||||||
|
--location \
|
||||||
|
--silent \
|
||||||
|
--http2 --ipv4 \
|
||||||
|
--remote-time \
|
||||||
|
--output $XMLFILE \
|
||||||
|
$XMLURL \
|
||||||
|
&& sleep `echo $((0 + RANDOM % 1))`
|
||||||
|
# Get name of audio file.
|
||||||
|
AUDIOURL=`cat $OBSID.json | jq --compact-output '.[0] | {archive_url}' | grep ogg | cut -f 2- -d : | sed -e 's/}//g' -e 's/http:/https:/g' -e 's/"//g'`
|
||||||
|
AUDIOFILE=`basename "$AUDIOURL"`
|
||||||
|
# Get sha1 for audio file. XXX sgrep dependency XXX
|
||||||
|
AUDIOXMLSHA1=`sgrep -g xml \
|
||||||
|
'"<file name=\"'$AUDIOFILE'\" source=\"original\"" .. "/file>"' \
|
||||||
|
"$XMLFILE" | \
|
||||||
|
grep "<sha1>" | \
|
||||||
|
sed -e 's/.*<sha1>//' -e 's/<\/sha1>//'`
|
||||||
|
AUDIOFILESHA1=`sha1sum $AUDIOFILE | cut -f 1 -d " "`
|
||||||
|
echo -e -n "XML: $AUDIOXMLSHA1\nFile: $AUDIOFILESHA1 "
|
||||||
|
sleep 1
|
||||||
|
if [ "$AUDIOXMLSHA1" = "$AUDIOFILESHA1" ] ; then
|
||||||
|
echo "Encode"
|
||||||
|
echo "oggdec $OGGDECOPT $AUDIOFILE"
|
||||||
|
ls -hl $AUDIOFILE *wav
|
||||||
|
nice oggdec $OGGDECOPT $AUDIOFILE
|
||||||
|
else
|
||||||
|
echo "Bad, re-downloading $AUDIOURL"
|
||||||
|
rm "$AUDIOFILE"
|
||||||
|
curl \
|
||||||
|
--location \
|
||||||
|
--silent \
|
||||||
|
--http2 --ipv4 \
|
||||||
|
--remote-time \
|
||||||
|
--output $AUDIOFILE \
|
||||||
|
$AUDIOURL \
|
||||||
|
&& sleep `echo $((0 + RANDOM % 1))`
|
||||||
|
AUDIOFILESHA1=`sha1sum $AUDIOFILE | cut -f 1 -d " "`
|
||||||
|
echo -e -n "XML: $AUDIOXMLSHA1\nFile: $AUDIOFILESHA1 "
|
||||||
|
if [ "$AUDIOXMLSHA1" = "$AUDIOFILESHA1" ] ; then
|
||||||
|
echo "Re-download is good, encode"
|
||||||
|
echo 'oggdec "$OGGDECOPT" "$AUDIOFILE"'
|
||||||
|
nice oggdec $OGGDECOPT $AUDIOFILE
|
||||||
|
else
|
||||||
|
echo "Still bad after re-downloading, remove"
|
||||||
|
rm "$AUDIOFILE"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
cd ..
|
||||||
|
done
|
||||||
|
|
Loading…
Reference in New Issue