#!/usr/bin/env python import string types = frozenset([ 'CD_DA', 'CD_ROM', 'CD_ROMXA']) def decodestr(i, pos): return decodestrend(i, pos)[0] def decodestrend(i, pos): r = [] bspos = None dqpos = None while True: if bspos is None or bspos == -1 or bspos < pos: bspos = i.find('\\', pos) if dqpos is None or dqpos < pos: dqpos = i.index('"', pos) if bspos >= 0 and bspos < dqpos: r.append(i[pos:bspos]) c = i[bspos + 1] if c == '"': r.append('"') pos = bspos + 2 elif c in string.digits: r.append(unichr(int(i[bspos + 1:bspos + 4], 8))) pos = bspos + 4 elif c == 'n': r.append('\n') pos = bspos + 2 else: raise ValueError('unknown escape char: %s' % `c`) else: r.append(i[pos:dqpos]) break return ''.join(r), dqpos def parsetoc(toc): # state machine info: # 0: header # 1: in CD_TEXT # 2: in LANGUAGE_MAP # 3: in LANGUAGE r = { 'tracks': {} } langmap = {} state = 0 curlang = None textobj = None langobj = None track = 0 for i in toc.split('\n'): i = i.strip() if not i: continue items = i.split() key = items[0] if state == 0: if i in types: r['type'] = i elif key == 'CATALOG': r['catalog'] = decodestr(i, i.index('"') + 1) elif key == 'CD_TEXT': state = 1 if track == 0: textobj = r elif key == 'TRACK': track += 1 textobj = { 'track': track } r['tracks'][track] = textobj elif key == 'TWO_CHANNEL_AUDIO': textobj['channels'] = 2 elif key == 'FOUR_CHANNEL_AUDIO': textobj['channels'] = 4 elif key == 'ISRC': textobj['isrc'] = decodestr(i, i.index('"') + 1) elif key == 'COPY': textobj['copy'] = True elif items[0] == 'NO' and items[1] == 'COPY': textobj['copy'] = False elif key == 'PRE_EMPHASIS': textobj['preemphasis'] = True elif items[0] == 'NO' and items[1] == 'PRE_EMPHASIS': textobj['preemphasis'] = False elif key == 'FILE': pass # XXX elif key == 'START': pass # XXX elif key == '//': pass else: raise ValueError('unknown line: %s' % `i`) elif state == 1: if key == 'LANGUAGE_MAP': state = 2 elif key == 'LANGUAGE': state = 3 langobj = textobj # XXX - don't try to use more than one! #lang = items[1].strip() #textobj[langmap[lang]] = langobj elif key == '}': textobj = None state = 0 elif state == 2: if key == '}': state = 1 else: key, value = (x.strip() for x in i.split(':')) value = int(value) langmap[key] = value elif state == 3: if key == '}': langobj = None state = 1 else: curl = i.find('{') dquo = i.find('"') if curl != -1 and curl < dquo: val = i[i.index('{') + 1:i.index('}')] val = ''.join(chr(int(x)) for x in val.split(',')) else: if dquo == -1: raise ValueError('no dquote') val = decodestr(i, dquo + 1) langobj[key] = val return r if __name__ == '__main__': import sys for i in sys.argv[1:]: print 'file:', `i` print parsetoc(open(i).read())