|
- #!/usr/bin/env python
-
- import string
-
- types = frozenset([ 'CD_DA', 'CD_ROM', 'CD_ROMXA'])
-
- def decodestr(i, pos):
- return decodestrend(i, pos)[0]
-
- def decodestrend(i, pos):
- r = []
- bspos = None
- dqpos = None
- while True:
- if bspos is None or bspos == -1 or bspos < pos:
- bspos = i.find('\\', pos)
- if dqpos is None or dqpos < pos:
- dqpos = i.index('"', pos)
- if bspos >= 0 and bspos < dqpos:
- r.append(i[pos:bspos])
- c = i[bspos + 1]
- if c == '"':
- r.append('"')
- pos = bspos + 2
- elif c in string.digits:
- r.append(chr(int(i[bspos + 1:bspos + 4], 8)))
- pos = bspos + 4
- elif c == 'n':
- r.append('\n')
- pos = bspos + 2
- else:
- raise ValueError('unknown escape char: %s' % repr(c))
- else:
- r.append(i[pos:dqpos])
- break
-
- return ''.join(r), dqpos
-
- def parsetoc(toc):
- # state machine info:
- # 0: header
- # 1: in CD_TEXT
- # 2: in LANGUAGE_MAP
- # 3: in LANGUAGE
-
- r = { 'tracks': {} }
- langmap = {}
- state = 0
- curlang = None
- textobj = None
- langobj = None
- track = 0
- for i in toc.split('\n'):
- i = i.strip()
- if not i:
- continue
-
- items = i.split()
- key = items[0]
-
- if state == 0:
- if i in types:
- r['type'] = i
- elif key == 'CATALOG':
- r['catalog'] = decodestr(i, i.index('"') + 1)
- elif key == 'CD_TEXT':
- state = 1
- if track == 0:
- textobj = r
- elif key == 'TRACK':
- track += 1
- textobj = { 'track': track }
- r['tracks'][track] = textobj
- elif key == 'TWO_CHANNEL_AUDIO':
- textobj['channels'] = 2
- elif key == 'FOUR_CHANNEL_AUDIO':
- textobj['channels'] = 4
- elif key == 'ISRC':
- textobj['isrc'] = decodestr(i, i.index('"') + 1)
- elif key == 'COPY':
- textobj['copy'] = True
- elif items[0] == 'NO' and items[1] == 'COPY':
- textobj['copy'] = False
- elif key == 'PRE_EMPHASIS':
- textobj['preemphasis'] = True
- elif items[0] == 'NO' and items[1] == 'PRE_EMPHASIS':
- textobj['preemphasis'] = False
- elif key == 'FILE':
- pass # XXX
- elif key == 'START':
- pass # XXX
- elif key == '//':
- pass
- else:
- raise ValueError('unknown line: %s' % repr(i))
- elif state == 1:
- if key == 'LANGUAGE_MAP':
- state = 2
- elif key == 'LANGUAGE':
- state = 3
- langobj = textobj
- # XXX - don't try to use more than one!
- #lang = items[1].strip()
- #textobj[langmap[lang]] = langobj
- elif key == '}':
- textobj = None
- state = 0
- elif state == 2:
- if key == '}':
- state = 1
- else:
- key, value = (x.strip() for x in i.split(':'))
- value = int(value)
- langmap[key] = value
- elif state == 3:
- if key == '}':
- langobj = None
- state = 1
- else:
- curl = i.find('{')
- dquo = i.find('"')
- if curl != -1 and curl < dquo:
- val = i[i.index('{') + 1:i.index('}')]
- val = ''.join(chr(int(x)) for x in
- val.split(','))
- else:
- if dquo == -1:
- raise ValueError('no dquote')
- val = decodestr(i, dquo + 1)
- langobj[key] = val
-
- return r
-
- if __name__ == '__main__':
- import sys
-
- for i in sys.argv[1:]:
- print('file:', repr(i))
- print(parsetoc(open(i).read()))
|