nopenpilot/tools/lib/mkvparse/mkvparse.py

762 lines
27 KiB
Python

# Licence==MIT; Vitaly "_Vi" Shukela 2012
# Simple easy-to-use hacky matroska parser
# Supports SimpleBlock and BlockGroup, lacing, TimecodeScale.
# Does not support seeking, cues, chapters and other features.
# No proper EOF handling unfortunately
# See "mkvuser.py" for the example
import traceback
from struct import unpack
import sys
import datetime
if sys.version < '3':
range=xrange
else:
#identity=lambda x:x
def ord(something):
if type(something)==bytes:
if something == b"":
raise StopIteration
return something[0]
else:
return something
def get_major_bit_number(n):
'''
Takes uint8, returns number of the most significant bit plus the number with that bit cleared.
Examples:
0b10010101 -> (0, 0b00010101)
0b00010101 -> (3, 0b00000101)
0b01111111 -> (1, 0b00111111)
'''
if not n:
raise Exception("Bad number")
i=0x80;
r=0
while not n&i:
r+=1
i>>=1
return (r,n&~i);
def read_matroska_number(f, unmodified=False, signed=False):
'''
Read ebml number. Unmodified means don't clear the length bit (as in Element IDs)
Returns the number and it's length as a tuple
See examples in "parse_matroska_number" function
'''
if unmodified and signed:
raise Exception("Contradictary arguments")
first_byte=f.read(1)
if(first_byte==""):
raise StopIteration
r = ord(first_byte)
(n,r2) = get_major_bit_number(r)
if not unmodified:
r=r2
# from now "signed" means "negative"
i=n
while i:
r = r * 0x100 + ord(f.read(1))
i-=1
if signed:
r-=(2**(7*n+7)-1)
else:
if r==2**(7*n+7)-1:
return (-1, n+1)
return (r,n+1)
def parse_matroska_number(data, pos, unmodified=False, signed=False):
'''
Parse ebml number from buffer[pos:]. Just like read_matroska_number.
Unmodified means don't clear the length bit (as in Element IDs)
Returns the number plus the new position in input buffer
Examples:
"\x81" -> (1, pos+1)
"\x40\x01" -> (1, pos+2)
"\x20\x00\x01" -> (1, pos+3)
"\x3F\xFF\xFF" -> (0x1FFFFF, pos+3)
"\x20\x00\x01" unmodified -> (0x200001, pos+3)
"\xBF" signed -> (0, pos+1)
"\xBE" signed -> (-1, pos+1)
"\xC0" signed -> (1, pos+1)
"\x5F\xEF" signed -> (-16, pos+2)
'''
if unmodified and signed:
raise Exception("Contradictary arguments")
r = ord(data[pos])
pos+=1
(n,r2) = get_major_bit_number(r)
if not unmodified:
r=r2
# from now "signed" means "negative"
i=n
while i:
r = r * 0x100 + ord(data[pos])
pos+=1
i-=1
if signed:
r-=(2**(7*n+6)-1)
else:
if r==2**(7*n+7)-1:
return (-1, pos)
return (r,pos)
def parse_xiph_number(data, pos):
'''
Parse the Xiph lacing number from data[pos:]
Returns the number plus the new position
Examples:
"\x01" -> (1, pos+1)
"\x55" -> (0x55, pos+1)
"\xFF\x04" -> (0x103, pos+2)
"\xFF\xFF\x04" -> (0x202, pos+3)
"\xFF\xFF\x00" -> (0x1FE, pos+3)
'''
v = ord(data[pos])
pos+=1
r=0
while v==255:
r+=v
v = ord(data[pos])
pos+=1
r+=v
return (r, pos)
def parse_fixedlength_number(data, pos, length, signed=False):
'''
Read the big-endian number from data[pos:pos+length]
Returns the number plus the new position
Examples:
"\x01" -> (0x1, pos+1)
"\x55" -> (0x55, pos+1)
"\x55" signed -> (0x55, pos+1)
"\xFF\x04" -> (0xFF04, pos+2)
"\xFF\x04" signed -> (-0x00FC, pos+2)
'''
r=0
for i in range(length):
r=r*0x100+ord(data[pos+i])
if signed:
if ord(data[pos]) & 0x80:
r-=2**(8*length)
return (r, pos+length)
def read_fixedlength_number(f, length, signed=False):
""" Read length bytes and parse (parse_fixedlength_number) it.
Returns only the number"""
buf = f.read(length)
(r, pos) = parse_fixedlength_number(buf, 0, length, signed)
return r
def read_ebml_element_header(f):
'''
Read Element ID and size
Returns id, element size and this header size
'''
(id_, n) = read_matroska_number(f, unmodified=True)
(size, n2) = read_matroska_number(f)
return (id_, size, n+n2)
class EbmlElementType:
VOID=0
MASTER=1 # read all subelements and return tree. Don't use this too large things like Segment
UNSIGNED=2
SIGNED=3
TEXTA=4
TEXTU=5
BINARY=6
FLOAT=7
DATE=8
JUST_GO_ON=10 # For "Segment".
# Actually MASTER, but don't build the tree for all subelements,
# interpreting all child elements as if they were top-level elements
EET=EbmlElementType
# lynx -width=10000 -dump http://matroska.org/technical/specs/index.html
# | sed 's/not 0/not0/g; s/> 0/>0/g; s/Sampling Frequency/SamplingFrequency/g'
# | awk '{print $1 " " $3 " " $8}'
# | grep '\[..\]'
# | perl -ne '/(\S+) (\S+) (.)/;
# $name=$1; $id=$2; $type=$3;
# $id=~s/\[|\]//g;
# %types = (m=>"EET.MASTER",
# u=>"EET.UNSIGNED",
# i=>"EET.SIGNED",
# 8=>"EET.TEXTU",
# s=>"EET.TEXTA",
# b=>"EET.BINARY",
# f=>"EET.FLOAT",
# d=>"EET.DATE");
# $t=$types{$type};
# next unless $t;
# $t="EET.JUST_GO_ON" if $name eq "Segment" or $name eq "Cluster";
# print "\t0x$id: ($t, \"$name\"),\n";'
element_types_names = {
0x1A45DFA3: (EET.MASTER, "EBML"),
0x4286: (EET.UNSIGNED, "EBMLVersion"),
0x42F7: (EET.UNSIGNED, "EBMLReadVersion"),
0x42F2: (EET.UNSIGNED, "EBMLMaxIDLength"),
0x42F3: (EET.UNSIGNED, "EBMLMaxSizeLength"),
0x4282: (EET.TEXTA, "DocType"),
0x4287: (EET.UNSIGNED, "DocTypeVersion"),
0x4285: (EET.UNSIGNED, "DocTypeReadVersion"),
0xEC: (EET.BINARY, "Void"),
0xBF: (EET.BINARY, "CRC-32"),
0x1B538667: (EET.MASTER, "SignatureSlot"),
0x7E8A: (EET.UNSIGNED, "SignatureAlgo"),
0x7E9A: (EET.UNSIGNED, "SignatureHash"),
0x7EA5: (EET.BINARY, "SignaturePublicKey"),
0x7EB5: (EET.BINARY, "Signature"),
0x7E5B: (EET.MASTER, "SignatureElements"),
0x7E7B: (EET.MASTER, "SignatureElementList"),
0x6532: (EET.BINARY, "SignedElement"),
0x18538067: (EET.JUST_GO_ON, "Segment"),
0x114D9B74: (EET.MASTER, "SeekHead"),
0x4DBB: (EET.MASTER, "Seek"),
0x53AB: (EET.BINARY, "SeekID"),
0x53AC: (EET.UNSIGNED, "SeekPosition"),
0x1549A966: (EET.MASTER, "Info"),
0x73A4: (EET.BINARY, "SegmentUID"),
0x7384: (EET.TEXTU, "SegmentFilename"),
0x3CB923: (EET.BINARY, "PrevUID"),
0x3C83AB: (EET.TEXTU, "PrevFilename"),
0x3EB923: (EET.BINARY, "NextUID"),
0x3E83BB: (EET.TEXTU, "NextFilename"),
0x4444: (EET.BINARY, "SegmentFamily"),
0x6924: (EET.MASTER, "ChapterTranslate"),
0x69FC: (EET.UNSIGNED, "ChapterTranslateEditionUID"),
0x69BF: (EET.UNSIGNED, "ChapterTranslateCodec"),
0x69A5: (EET.BINARY, "ChapterTranslateID"),
0x2AD7B1: (EET.UNSIGNED, "TimecodeScale"),
0x4489: (EET.FLOAT, "Duration"),
0x4461: (EET.DATE, "DateUTC"),
0x7BA9: (EET.TEXTU, "Title"),
0x4D80: (EET.TEXTU, "MuxingApp"),
0x5741: (EET.TEXTU, "WritingApp"),
0x1F43B675: (EET.JUST_GO_ON, "Cluster"),
0xE7: (EET.UNSIGNED, "Timecode"),
0x5854: (EET.MASTER, "SilentTracks"),
0x58D7: (EET.UNSIGNED, "SilentTrackNumber"),
0xA7: (EET.UNSIGNED, "Position"),
0xAB: (EET.UNSIGNED, "PrevSize"),
0xA3: (EET.BINARY, "SimpleBlock"),
0xA0: (EET.MASTER, "BlockGroup"),
0xA1: (EET.BINARY, "Block"),
0xA2: (EET.BINARY, "BlockVirtual"),
0x75A1: (EET.MASTER, "BlockAdditions"),
0xA6: (EET.MASTER, "BlockMore"),
0xEE: (EET.UNSIGNED, "BlockAddID"),
0xA5: (EET.BINARY, "BlockAdditional"),
0x9B: (EET.UNSIGNED, "BlockDuration"),
0xFA: (EET.UNSIGNED, "ReferencePriority"),
0xFB: (EET.SIGNED, "ReferenceBlock"),
0xFD: (EET.SIGNED, "ReferenceVirtual"),
0xA4: (EET.BINARY, "CodecState"),
0x8E: (EET.MASTER, "Slices"),
0xE8: (EET.MASTER, "TimeSlice"),
0xCC: (EET.UNSIGNED, "LaceNumber"),
0xCD: (EET.UNSIGNED, "FrameNumber"),
0xCB: (EET.UNSIGNED, "BlockAdditionID"),
0xCE: (EET.UNSIGNED, "Delay"),
0xCF: (EET.UNSIGNED, "SliceDuration"),
0xC8: (EET.MASTER, "ReferenceFrame"),
0xC9: (EET.UNSIGNED, "ReferenceOffset"),
0xCA: (EET.UNSIGNED, "ReferenceTimeCode"),
0xAF: (EET.BINARY, "EncryptedBlock"),
0x1654AE6B: (EET.MASTER, "Tracks"),
0xAE: (EET.MASTER, "TrackEntry"),
0xD7: (EET.UNSIGNED, "TrackNumber"),
0x73C5: (EET.UNSIGNED, "TrackUID"),
0x83: (EET.UNSIGNED, "TrackType"),
0xB9: (EET.UNSIGNED, "FlagEnabled"),
0x88: (EET.UNSIGNED, "FlagDefault"),
0x55AA: (EET.UNSIGNED, "FlagForced"),
0x9C: (EET.UNSIGNED, "FlagLacing"),
0x6DE7: (EET.UNSIGNED, "MinCache"),
0x6DF8: (EET.UNSIGNED, "MaxCache"),
0x23E383: (EET.UNSIGNED, "DefaultDuration"),
0x23314F: (EET.FLOAT, "TrackTimecodeScale"),
0x537F: (EET.SIGNED, "TrackOffset"),
0x55EE: (EET.UNSIGNED, "MaxBlockAdditionID"),
0x536E: (EET.TEXTU, "Name"),
0x22B59C: (EET.TEXTA, "Language"),
0x86: (EET.TEXTA, "CodecID"),
0x63A2: (EET.BINARY, "CodecPrivate"),
0x258688: (EET.TEXTU, "CodecName"),
0x7446: (EET.UNSIGNED, "AttachmentLink"),
0x3A9697: (EET.TEXTU, "CodecSettings"),
0x3B4040: (EET.TEXTA, "CodecInfoURL"),
0x26B240: (EET.TEXTA, "CodecDownloadURL"),
0xAA: (EET.UNSIGNED, "CodecDecodeAll"),
0x6FAB: (EET.UNSIGNED, "TrackOverlay"),
0x6624: (EET.MASTER, "TrackTranslate"),
0x66FC: (EET.UNSIGNED, "TrackTranslateEditionUID"),
0x66BF: (EET.UNSIGNED, "TrackTranslateCodec"),
0x66A5: (EET.BINARY, "TrackTranslateTrackID"),
0xE0: (EET.MASTER, "Video"),
0x9A: (EET.UNSIGNED, "FlagInterlaced"),
0x53B8: (EET.UNSIGNED, "StereoMode"),
0x53B9: (EET.UNSIGNED, "OldStereoMode"),
0xB0: (EET.UNSIGNED, "PixelWidth"),
0xBA: (EET.UNSIGNED, "PixelHeight"),
0x54AA: (EET.UNSIGNED, "PixelCropBottom"),
0x54BB: (EET.UNSIGNED, "PixelCropTop"),
0x54CC: (EET.UNSIGNED, "PixelCropLeft"),
0x54DD: (EET.UNSIGNED, "PixelCropRight"),
0x54B0: (EET.UNSIGNED, "DisplayWidth"),
0x54BA: (EET.UNSIGNED, "DisplayHeight"),
0x54B2: (EET.UNSIGNED, "DisplayUnit"),
0x54B3: (EET.UNSIGNED, "AspectRatioType"),
0x2EB524: (EET.BINARY, "ColourSpace"),
0x2FB523: (EET.FLOAT, "GammaValue"),
0x2383E3: (EET.FLOAT, "FrameRate"),
0xE1: (EET.MASTER, "Audio"),
0xB5: (EET.FLOAT, "SamplingFrequency"),
0x78B5: (EET.FLOAT, "OutputSamplingFrequency"),
0x9F: (EET.UNSIGNED, "Channels"),
0x7D7B: (EET.BINARY, "ChannelPositions"),
0x6264: (EET.UNSIGNED, "BitDepth"),
0xE2: (EET.MASTER, "TrackOperation"),
0xE3: (EET.MASTER, "TrackCombinePlanes"),
0xE4: (EET.MASTER, "TrackPlane"),
0xE5: (EET.UNSIGNED, "TrackPlaneUID"),
0xE6: (EET.UNSIGNED, "TrackPlaneType"),
0xE9: (EET.MASTER, "TrackJoinBlocks"),
0xED: (EET.UNSIGNED, "TrackJoinUID"),
0xC0: (EET.UNSIGNED, "TrickTrackUID"),
0xC1: (EET.BINARY, "TrickTrackSegmentUID"),
0xC6: (EET.UNSIGNED, "TrickTrackFlag"),
0xC7: (EET.UNSIGNED, "TrickMasterTrackUID"),
0xC4: (EET.BINARY, "TrickMasterTrackSegmentUID"),
0x6D80: (EET.MASTER, "ContentEncodings"),
0x6240: (EET.MASTER, "ContentEncoding"),
0x5031: (EET.UNSIGNED, "ContentEncodingOrder"),
0x5032: (EET.UNSIGNED, "ContentEncodingScope"),
0x5033: (EET.UNSIGNED, "ContentEncodingType"),
0x5034: (EET.MASTER, "ContentCompression"),
0x4254: (EET.UNSIGNED, "ContentCompAlgo"),
0x4255: (EET.BINARY, "ContentCompSettings"),
0x5035: (EET.MASTER, "ContentEncryption"),
0x47E1: (EET.UNSIGNED, "ContentEncAlgo"),
0x47E2: (EET.BINARY, "ContentEncKeyID"),
0x47E3: (EET.BINARY, "ContentSignature"),
0x47E4: (EET.BINARY, "ContentSigKeyID"),
0x47E5: (EET.UNSIGNED, "ContentSigAlgo"),
0x47E6: (EET.UNSIGNED, "ContentSigHashAlgo"),
0x1C53BB6B: (EET.MASTER, "Cues"),
0xBB: (EET.MASTER, "CuePoint"),
0xB3: (EET.UNSIGNED, "CueTime"),
0xB7: (EET.MASTER, "CueTrackPositions"),
0xF7: (EET.UNSIGNED, "CueTrack"),
0xF1: (EET.UNSIGNED, "CueClusterPosition"),
0x5378: (EET.UNSIGNED, "CueBlockNumber"),
0xEA: (EET.UNSIGNED, "CueCodecState"),
0xDB: (EET.MASTER, "CueReference"),
0x96: (EET.UNSIGNED, "CueRefTime"),
0x97: (EET.UNSIGNED, "CueRefCluster"),
0x535F: (EET.UNSIGNED, "CueRefNumber"),
0xEB: (EET.UNSIGNED, "CueRefCodecState"),
0x1941A469: (EET.MASTER, "Attachments"),
0x61A7: (EET.MASTER, "AttachedFile"),
0x467E: (EET.TEXTU, "FileDescription"),
0x466E: (EET.TEXTU, "FileName"),
0x4660: (EET.TEXTA, "FileMimeType"),
0x465C: (EET.BINARY, "FileData"),
0x46AE: (EET.UNSIGNED, "FileUID"),
0x4675: (EET.BINARY, "FileReferral"),
0x4661: (EET.UNSIGNED, "FileUsedStartTime"),
0x4662: (EET.UNSIGNED, "FileUsedEndTime"),
0x1043A770: (EET.MASTER, "Chapters"),
0x45B9: (EET.MASTER, "EditionEntry"),
0x45BC: (EET.UNSIGNED, "EditionUID"),
0x45BD: (EET.UNSIGNED, "EditionFlagHidden"),
0x45DB: (EET.UNSIGNED, "EditionFlagDefault"),
0x45DD: (EET.UNSIGNED, "EditionFlagOrdered"),
0xB6: (EET.MASTER, "ChapterAtom"),
0x73C4: (EET.UNSIGNED, "ChapterUID"),
0x91: (EET.UNSIGNED, "ChapterTimeStart"),
0x92: (EET.UNSIGNED, "ChapterTimeEnd"),
0x98: (EET.UNSIGNED, "ChapterFlagHidden"),
0x4598: (EET.UNSIGNED, "ChapterFlagEnabled"),
0x6E67: (EET.BINARY, "ChapterSegmentUID"),
0x6EBC: (EET.UNSIGNED, "ChapterSegmentEditionUID"),
0x63C3: (EET.UNSIGNED, "ChapterPhysicalEquiv"),
0x8F: (EET.MASTER, "ChapterTrack"),
0x89: (EET.UNSIGNED, "ChapterTrackNumber"),
0x80: (EET.MASTER, "ChapterDisplay"),
0x85: (EET.TEXTU, "ChapString"),
0x437C: (EET.TEXTA, "ChapLanguage"),
0x437E: (EET.TEXTA, "ChapCountry"),
0x6944: (EET.MASTER, "ChapProcess"),
0x6955: (EET.UNSIGNED, "ChapProcessCodecID"),
0x450D: (EET.BINARY, "ChapProcessPrivate"),
0x6911: (EET.MASTER, "ChapProcessCommand"),
0x6922: (EET.UNSIGNED, "ChapProcessTime"),
0x6933: (EET.BINARY, "ChapProcessData"),
0x1254C367: (EET.MASTER, "Tags"),
0x7373: (EET.MASTER, "Tag"),
0x63C0: (EET.MASTER, "Targets"),
0x68CA: (EET.UNSIGNED, "TargetTypeValue"),
0x63CA: (EET.TEXTA, "TargetType"),
0x63C5: (EET.UNSIGNED, "TagTrackUID"),
0x63C9: (EET.UNSIGNED, "TagEditionUID"),
0x63C4: (EET.UNSIGNED, "TagChapterUID"),
0x63C6: (EET.UNSIGNED, "TagAttachmentUID"),
0x67C8: (EET.MASTER, "SimpleTag"),
0x45A3: (EET.TEXTU, "TagName"),
0x447A: (EET.TEXTA, "TagLanguage"),
0x4484: (EET.UNSIGNED, "TagDefault"),
0x4487: (EET.TEXTU, "TagString"),
0x4485: (EET.BINARY, "TagBinary"),
0x56AA: (EET.UNSIGNED, "CodecDelay"),
0x56BB: (EET.UNSIGNED, "SeekPreRoll"),
0xF0: (EET.UNSIGNED, "CueRelativePosition"),
0x53C0: (EET.UNSIGNED, "AlphaMode"),
0x55B2: (EET.UNSIGNED, "BitsPerChannel"),
0x55B5: (EET.UNSIGNED, "CbSubsamplingHorz"),
0x55B6: (EET.UNSIGNED, "CbSubsamplingVert"),
0x5654: (EET.TEXTU, "ChapterStringUID"),
0x55B7: (EET.UNSIGNED, "ChromaSitingHorz"),
0x55B8: (EET.UNSIGNED, "ChromaSitingVert"),
0x55B3: (EET.UNSIGNED, "ChromaSubsamplingHorz"),
0x55B4: (EET.UNSIGNED, "ChromaSubsamplingVert"),
0x55B0: (EET.MASTER, "Colour"),
0x234E7A: (EET.UNSIGNED, "DefaultDecodedFieldDuration"),
0x75A2: (EET.SIGNED, "DiscardPadding"),
0x9D: (EET.UNSIGNED, "FieldOrder"),
0x55D9: (EET.FLOAT, "LuminanceMax"),
0x55DA: (EET.FLOAT, "LuminanceMin"),
0x55D0: (EET.MASTER, "MasteringMetadata"),
0x55B1: (EET.UNSIGNED, "MatrixCoefficients"),
0x55BC: (EET.UNSIGNED, "MaxCLL"),
0x55BD: (EET.UNSIGNED, "MaxFALL"),
0x55BB: (EET.UNSIGNED, "Primaries"),
0x55D5: (EET.FLOAT, "PrimaryBChromaticityX"),
0x55D6: (EET.FLOAT, "PrimaryBChromaticityY"),
0x55D3: (EET.FLOAT, "PrimaryGChromaticityX"),
0x55D4: (EET.FLOAT, "PrimaryGChromaticityY"),
0x55D1: (EET.FLOAT, "PrimaryRChromaticityX"),
0x55D2: (EET.FLOAT, "PrimaryRChromaticityY"),
0x55B9: (EET.UNSIGNED, "Range"),
0x55BA: (EET.UNSIGNED, "TransferCharacteristics"),
0x55D7: (EET.FLOAT, "WhitePointChromaticityX"),
0x55D8: (EET.FLOAT, "WhitePointChromaticityY"),
}
def read_simple_element(f, type_, size):
date = None
if size==0:
return ""
if type_==EET.UNSIGNED:
data=read_fixedlength_number(f, size, False)
elif type_==EET.SIGNED:
data=read_fixedlength_number(f, size, True)
elif type_==EET.TEXTA:
data=f.read(size)
data = data.replace(b"\x00", b"") # filter out \0, for gstreamer
data = data.decode("ascii")
elif type_==EET.TEXTU:
data=f.read(size)
data = data.replace(b"\x00", b"") # filter out \0, for gstreamer
data = data.decode("UTF-8")
elif type_==EET.MASTER:
data=read_ebml_element_tree(f, size)
elif type_==EET.DATE:
data=read_fixedlength_number(f, size, True)
data*= 1e-9
data+= (datetime.datetime(2001, 1, 1) - datetime.datetime(1970, 1, 1)).total_seconds()
# now should be UNIX date
elif type_==EET.FLOAT:
if size==4:
data = f.read(4)
data = unpack(">f", data)[0]
elif size==8:
data = f.read(8)
data = unpack(">d", data)[0]
else:
data=read_fixedlength_number(f, size, False)
sys.stderr.write("mkvparse: Floating point of size %d is not supported\n" % size)
data = None
else:
data=f.read(size)
return data
def read_ebml_element_tree(f, total_size):
'''
Build tree of elements, reading f until total_size reached
Don't use for the whole segment, it's not Haskell
Returns list of pairs (element_name, element_value).
element_value can also be list of pairs
'''
childs=[]
while(total_size>0):
(id_, size, hsize) = read_ebml_element_header(f)
if size == -1:
sys.stderr.write("mkvparse: Element %x without size? Damaged data? Skipping %d bytes\n" % (id_, size, total_size))
f.read(total_size);
break;
if size>total_size:
sys.stderr.write("mkvparse: Element %x with size %d? Damaged data? Skipping %d bytes\n" % (id_, size, total_size))
f.read(total_size);
break
type_ = EET.BINARY
name = "unknown_%x"%id_
if id_ in element_types_names:
(type_, name) = element_types_names[id_]
data = read_simple_element(f, type_, size)
total_size-=(size+hsize)
childs.append((name, (type_, data)))
return childs
class MatroskaHandler:
""" User for mkvparse should override these methods """
def tracks_available(self):
pass
def segment_info_available(self):
pass
def frame(self, track_id, timestamp, data, more_laced_frames, duration, keyframe, invisible, discardable):
pass
def ebml_top_element(self, id_, name_, type_, data_):
pass
def before_handling_an_element(self):
pass
def begin_handling_ebml_element(self, id_, name, type_, headersize, datasize):
return type_
def element_data_available(self, id_, name, type_, headersize, data):
pass
def handle_block(buffer, buffer_pos, handler, cluster_timecode, timecode_scale=1000000, duration=None, header_removal_headers_for_tracks={}):
'''
Decode a block, handling all lacings, send it to handler with appropriate timestamp, track number
'''
pos=0
(tracknum, pos) = parse_matroska_number(buffer, pos, signed=False)
(tcode, pos) = parse_fixedlength_number(buffer, pos, 2, signed=True)
flags = ord(buffer[pos]); pos+=1
f_keyframe = (flags&0x80 == 0x80)
f_invisible = (flags&0x08 == 0x08)
f_discardable = (flags&0x01 == 0x01)
laceflags=flags&0x06
block_timecode = (cluster_timecode + tcode)*(timecode_scale*0.000000001)
header_removal_prefix = b""
if tracknum in header_removal_headers_for_tracks:
# header_removal_prefix = header_removal_headers_for_tracks[tracknum]
raise NotImplementedError
if laceflags == 0x00: # no lacing
# buf = buffer[pos:]
handler.frame(tracknum, block_timecode, buffer_pos+pos, len(buffer)-pos,
0, duration, f_keyframe, f_invisible, f_discardable)
return
numframes = ord(buffer[pos]); pos+=1
numframes+=1
lengths=[]
if laceflags == 0x02: # Xiph lacing
accumlength=0
for i in range(numframes-1):
(l, pos) = parse_xiph_number(buffer, pos)
lengths.append(l)
accumlength+=l
lengths.append(len(buffer)-pos-accumlength)
elif laceflags == 0x06: # EBML lacing
accumlength=0
if numframes:
(flength, pos) = parse_matroska_number(buffer, pos, signed=False)
lengths.append(flength)
accumlength+=flength
for i in range(numframes-2):
(l, pos) = parse_matroska_number(buffer, pos, signed=True)
flength+=l
lengths.append(flength)
accumlength+=flength
lengths.append(len(buffer)-pos-accumlength)
elif laceflags==0x04: # Fixed size lacing
fl=int((len(buffer)-pos)/numframes)
for i in range(numframes):
lengths.append(fl)
more_laced_frames=numframes-1
for i in lengths:
# buf = buffer[pos:pos+i]
handler.frame(tracknum, block_timecode, buffer_pos+pos, i, more_laced_frames, duration,
f_keyframe, f_invisible, f_discardable)
pos+=i
more_laced_frames-=1
def resync(f):
sys.stderr.write("mvkparse: Resyncing\n")
while True:
b = f.read(1);
if b == b"": return (None, None);
if b == b"\x1F":
b2 = f.read(3);
if b2 == b"\x43\xB6\x75":
(seglen, x) = read_matroska_number(f)
return (0x1F43B675, seglen, x+4) # cluster
if b == b"\x18":
b2 = f.read(3)
if b2 == b"\x53\x80\x67":
(seglen, x) = read_matroska_number(f)
return (0x18538067, seglen, x+4) # segment
if b == b"\x16":
b2 = f.read(3)
if b2 == b"\x54\xAE\x6B":
(seglen ,x )= read_matroska_number(f)
return (0x1654AE6B, seglen, x+4) # tracks
def mkvparse(f, handler):
'''
Read mkv file f and call handler methods when track or segment information is ready or when frame is read.
Handles lacing, timecodes (except of per-track scaling)
'''
timecode_scale = 1000000
current_cluster_timecode = 0
resync_element_id = None
resync_element_size = None
resync_element_headersize = None
header_removal_headers_for_tracks = {}
while f:
(id_, size, hsize) = (None, None, None)
tree = None
data = None
(type_, name) = (None, None)
try:
if not resync_element_id:
try:
handler.before_handling_an_element()
(id_, size, hsize) = read_ebml_element_header(f)
except StopIteration:
break;
if not (id_ in element_types_names):
sys.stderr.write("mkvparse: Unknown element with id %x and size %d\n"%(id_, size))
(resync_element_id, resync_element_size, resync_element_headersize) = resync(f)
if resync_element_id:
continue;
else:
break;
else:
id_ = resync_element_id
size=resync_element_size
hsize=resync_element_headersize
resync_element_id = None
resync_element_size = None
resync_element_headersize = None
(type_, name) = element_types_names[id_]
(type_, name) = element_types_names[id_]
type_ = handler.begin_handling_ebml_element(id_, name, type_, hsize, size)
if type_ == EET.MASTER:
tree = read_ebml_element_tree(f, size)
data = tree
except Exception:
traceback.print_exc()
handler.before_handling_an_element()
(resync_element_id, resync_element_size, resync_element_headersize) = resync(f)
if resync_element_id:
continue;
else:
break;
if name=="EBML" and type(data) == list:
d = dict(tree)
if 'EBMLReadVersion' in d:
if d['EBMLReadVersion'][1]>1: sys.stderr.write("mkvparse: Warning: EBMLReadVersion too big\n")
if 'DocTypeReadVersion' in d:
if d['DocTypeReadVersion'][1]>2: sys.stderr.write("mkvparse: Warning: DocTypeReadVersion too big\n")
dt = d['DocType'][1]
if dt != "matroska" and dt != "webm":
sys.stderr.write("mkvparse: Warning: EBML DocType is not \"matroska\" or \"webm\"")
elif name=="Info" and type(data) == list:
handler.segment_info = tree
handler.segment_info_available()
d = dict(tree)
if "TimecodeScale" in d:
timecode_scale = d["TimecodeScale"][1]
elif name=="Tracks" and type(data) == list:
handler.tracks={}
for (ten, (_t, track)) in tree:
if ten != "TrackEntry": continue
d = dict(track)
n = d['TrackNumber'][1]
handler.tracks[n]=d
tt = d['TrackType'][1]
if tt==0x01: d['type']='video'
elif tt==0x02: d['type']='audio'
elif tt==0x03: d['type']='complex'
elif tt==0x10: d['type']='logo'
elif tt==0x11: d['type']='subtitle'
elif tt==0x12: d['type']='button'
elif tt==0x20: d['type']='control'
if 'TrackTimecodeScale' in d:
sys.stderr.write("mkvparse: Warning: TrackTimecodeScale is not supported\n")
if 'ContentEncodings' in d:
try:
compr = dict(d["ContentEncodings"][1][0][1][1][0][1][1])
if compr["ContentCompAlgo"][1] == 3:
header_removal_headers_for_tracks[n] = compr["ContentCompSettings"][1]
else:
sys.stderr.write("mkvparse: Warning: compression other than " \
"header removal is not supported\n")
except:
sys.stderr.write("mkvparse: Warning: unsuccessfully tried " \
"to handle header removal compression\n")
handler.tracks_available()
# cluster contents:
elif name=="Timecode" and type_ == EET.UNSIGNED:
data=read_fixedlength_number(f, size, False)
current_cluster_timecode = data;
elif name=="SimpleBlock" and type_ == EET.BINARY:
pos = f.tell()
data=f.read(size)
handle_block(data, pos, handler, current_cluster_timecode, timecode_scale, None, header_removal_headers_for_tracks)
elif name=="BlockGroup" and type_ == EET.MASTER:
d2 = dict(tree)
duration=None
raise NotImplementedError
# if 'BlockDuration' in d2:
# duration = d2['BlockDuration'][1]
# duration = duration*0.000000001*timecode_scale
# if 'Block' in d2:
# handle_block(d2['Block'][1], None, handler, current_cluster_timecode, timecode_scale, duration, header_removal_headers_for_tracks)
else:
if type_!=EET.JUST_GO_ON and type_!=EET.MASTER:
data = read_simple_element(f, type_, size)
handler.ebml_top_element(id_, name, type_, data);
if __name__ == '__main__':
print("Run mkvuser.py for the example")