#!/usr/bin/env python3 # # wut-ia-sha1 --- Verify downloaded files checksums # # XXX uses both ET and xml.parsers.expat import argparse import os from xml.parsers.expat import ParserCreate, ExpatError, errors from pathlib import Path import hashlib import xml.etree.ElementTree as ET dl_dir=Path('/srv/dl') def convertxml(xmlfile, xml_attribs=True): with open(xmlfile, "rb") as f: d = xmltodict.parse(f, xml_attribs=xml_attribs, process_namespaces=False) return d def parse_args(): parser = argparse.ArgumentParser(description='sha1 check Internet Archive downloads') parser.add_argument('observations', type=str, help='Observation set. Example: 006050001-006060000') args = parser.parse_args() obs_set = 'satnogs-observations-' + args.observations obs_dir = Path(dl_dir, obs_set) filename_xml = obs_set + '_files.xml' print('filename XML:', filename_xml) xmlfile = Path(obs_dir, filename_xml) p = ParserCreate() try: p.ParseFile(open(xmlfile, 'rb')) except: print('No XML file to process') exit() return(xmlfile, obs_dir) def get_sha1(filename): sha1 = hashlib.sha1() try: with open(filename, 'rb') as f: while True: data = f.read(1048576) if not data: break sha1.update(data) return sha1.hexdigest() except: status='EXCEPTION' def process_set(xmlfile, obs_dir): root_node = ET.parse(xmlfile).getroot() for tag in root_node.findall('file'): name = tag.get('name') for file_sha1 in tag.iter('sha1'): filename = Path(obs_dir, name) sha1_hash=get_sha1(filename) if sha1_hash == file_sha1.text: print('OK ', end='') else: print('FAIL ', end='') print(name) def main(): xmlfile, obs_dir = parse_args() process_set(xmlfile, obs_dir) if __name__ == "__main__": main();