wut-ogg2wav, misc

2020-01-06 15:35:22 -07:00 · 2020-01-06 15:35:22 -07:00 · da2c7d2c49
parent bdeb891fee
commit da2c7d2c49
4 changed files with 82 additions and 1 deletions
--- a/4
+++ b/4
@ -19,6 +19,8 @@
 #
 # So to get mostly all of the observations in December, 2019, run:
 # wut-audio-archive 1292461 1470525
+# Archive.org doesn't have everything from December, 2019 yet. Run:
+# wut-audio-archive 1292461 1333333
 #
 # XXX Should check input is sane...

@ -32,7 +34,7 @@ cd $DOWNDIR || exit

 # Download JSON
 while [ $OBSID -lt $OBSIDMAX ]
-	do echo "ID: $OBSID"
+	do echo "Audio. ID: $OBSID"
 	mkdir -p $OBSID
 	cd $OBSID
 	# Download if is isn't there already
--- a/1
+++ b/1
@ -62,6 +62,7 @@ for i in */satnogs_*.ogg
 			echo "Re-download is good"
 		else
 			echo "Still bad after re-downloading"
+			rm "$AUDIOFILE"
 		fi
 	fi
 	cd ..
--- a/1
+++ b/1
@ -100,6 +100,7 @@ print("add")
 # * JSON metadata
 # * TLE
 # * Audio File (ogg)
+#   https://www.tensorflow.org/io/api_docs/python/tfio/ffmpeg/AudioDataset
 # * Decoded Data (HEX, ASCII, PNG)
 # Data from external sources to consider adding:
 # * Weather
--- a/77
+++ b/77
@ -0,0 +1,77 @@
+#!/bin/bash
+# ogg2wav
+#
+# Convert .ogg files in downloads/ to .wav files.
+# It checks sha1 before converting, re-downloads if bad.
+# Dependency: vorbis-tools
+#
+# Usage:
+# ogg2wav
+# Example:
+# ogg2wav
+
+OGGDECOPT=""
+
+cd download/ || exit
+
+# Compile a list of ogg files (NOTE: THIS WILL BREAK AS ARCHIVE GROWS XXX)
+echo "Total audio files: `ls -1 */satnogs_*.ogg | wc -l`"
+for i in */satnogs_*.ogg
+	do OBSID=`dirname $i`
+	# Go into directories with audiofiles
+	echo $OBSID
+	cd $OBSID
+	# See if there is an archive.org XML file, if not, download it.
+	XMLURL="https://archive.org/download/satnogs-observation-$OBSID/satnogs-observation-$OBSID""_files.xml"
+	XMLFILE=`basename "$XMLURL"`
+	[ ! -f "$XMLFILE" ] &&						\
+	curl                                                            \
+       		--location                                              \
+		--silent                                                \
+		--http2 --ipv4                                          \
+		--remote-time                                           \
+		--output $XMLFILE                                       \
+		$XMLURL                                                 \
+		&& sleep `echo $((0 + RANDOM % 1))`
+	# Get name of audio file.
+	AUDIOURL=`cat $OBSID.json | jq --compact-output '.[0] | {archive_url}' | grep ogg | cut -f 2- -d : | sed -e 's/}//g' -e 's/http:/https:/g' -e 's/"//g'`
+	AUDIOFILE=`basename "$AUDIOURL"`
+	# Get sha1 for audio file. XXX sgrep dependency XXX
+	AUDIOXMLSHA1=`sgrep -g xml							\
+	'"<file name=\"'$AUDIOFILE'\" source=\"original\"" .. "/file>"' \
+	"$XMLFILE" |				\
+	grep "<sha1>" |							\
+	sed -e 's/.*<sha1>//' -e 's/<\/sha1>//'`
+	AUDIOFILESHA1=`sha1sum $AUDIOFILE | cut -f 1 -d " "`
+	echo -e -n "XML:  $AUDIOXMLSHA1\nFile: $AUDIOFILESHA1 "
+sleep 1
+	if [ "$AUDIOXMLSHA1" = "$AUDIOFILESHA1" ] ; then
+		echo "Encode"
+		echo "oggdec $OGGDECOPT $AUDIOFILE"
+		ls -hl $AUDIOFILE *wav
+		nice oggdec $OGGDECOPT $AUDIOFILE
+	else
+		echo "Bad, re-downloading $AUDIOURL"
+		rm "$AUDIOFILE"
+		curl							\
+		--location						\
+		--silent						\
+		--http2 --ipv4						\
+		--remote-time						\
+		--output $AUDIOFILE					\
+		$AUDIOURL						\
+                && sleep `echo $((0 + RANDOM % 1))`
+		AUDIOFILESHA1=`sha1sum $AUDIOFILE | cut -f 1 -d " "`
+		echo -e -n "XML:  $AUDIOXMLSHA1\nFile: $AUDIOFILESHA1 "
+		if [ "$AUDIOXMLSHA1" = "$AUDIOFILESHA1" ] ; then
+			echo "Re-download is good, encode"
+			echo 'oggdec "$OGGDECOPT" "$AUDIOFILE"'
+			nice oggdec $OGGDECOPT $AUDIOFILE
+		else
+			echo "Still bad after re-downloading, remove"
+			rm "$AUDIOFILE"
+		fi
+	fi
+	cd ..
+done
+