#!/usr/bin/env python '''A Pure Python ASN.1 encoder/decoder w/ a calling interface in the spirit of pickle. The default dumps/loads uses a profile of ASN.1 that supports serialization of key/value pairs. This is non-standard. Instantiate the class ASN1Coder to get a pure ASN.1 serializer/deserializer. All lengths must be specified. That is that End-of-contents octets MUST NOT be used. The shorted form of length encoding MUST be used. A longer length encoding MUST be rejected.''' __author__ = 'John-Mark Gurney' __copyright__ = 'Copyright 2016 John-Mark Gurney. All rights reserved.' __license__ = '2-clause BSD license' # Copyright 2016, John-Mark Gurney # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # The views and conclusions contained in the software and documentation are those # of the authors and should not be interpreted as representing official policies, # either expressed or implied, of the Project. import datetime import math import mock import os import pdb import sys import unittest __all__ = [ 'dumps', 'loads', 'ASN1Coder' ] def _numtostr(n): hs = '%x' % n if len(hs) & 1 == 1: hs = '0' + hs bs = hs.decode('hex') return bs def _encodelen(l): '''Takes l as a length value, and returns a byte string that represents l per ASN.1 rules.''' if l < 128: return chr(l) bs = _numtostr(l) return chr(len(bs) | 0x80) + bs def _decodelen(d, pos=0): '''Returns the length, and number of bytes required.''' odp = ord(d[pos]) if odp < 128: return ord(d[pos]), 1 else: l = odp & 0x7f return int(d[pos + 1:pos + 1 + l].encode('hex'), 16), l + 1 class Test_codelen(unittest.TestCase): _testdata = [ (2, '\x02'), (127, '\x7f'), (128, '\x81\x80'), (255, '\x81\xff'), (256, '\x82\x01\x00'), (65536-1, '\x82\xff\xff'), (65536, '\x83\x01\x00\x00'), ] def test_el(self): for i, j in self._testdata: self.assertEqual(_encodelen(i), j) self.assertEqual(_decodelen(j), (i, len(j))) def _splitfloat(f): m, e = math.frexp(f) # XXX - less than ideal while m != math.trunc(m): m *= 2 e -= 1 return m, e class TestSplitFloat(unittest.TestCase): def test_sf(self): for a, b in [ (0x2421, -32), (0x5382f, 238), (0x1fa8c3b094adf1, 971) ]: self.assertEqual(_splitfloat(a * 2**b), (a, b)) class ASN1Coder(object): '''A class that contains an PASN.1 encoder/decoder. Exports two methods, loads and dumps.''' def __init__(self, coerce=None): '''If the arg coerce is provided, when dumping the object, if the type is not found, the coerce function will be called with the obj. It is expected to return a tuple of a string and an object that has the method w/ the string as defined: 'bool': __nonzero__ 'float': compatible w/ float 'int': compatible w/ int 'list': __iter__ 'set': __iter__ 'bytes': __str__ 'null': no method needed 'unicode': encode method returns UTF-8 encoded bytes 'datetime': strftime and microsecond ''' self.coerce = coerce _typemap = { bool: 'bool', float: 'float', int: 'int', list: 'list', long: 'int', set: 'set', str: 'bytes', type(None): 'null', unicode: 'unicode', #decimal.Decimal: 'float', datetime.datetime: 'datetime', #datetime.timedelta: 'timedelta', } _tagmap = { '\x01': 'bool', '\x02': 'int', '\x04': 'bytes', '\x05': 'null', '\x09': 'float', '\x0c': 'unicode', '\x18': 'datetime', '\x30': 'list', '\x31': 'set', } _typetag = dict((v, k) for k, v in _tagmap.iteritems()) @staticmethod def enc_int(obj): l = obj.bit_length() l += 1 # space for sign bit l = (l + 7) // 8 if obj < 0: obj += 1 << (l * 8) # twos-complement conversion v = _numtostr(obj) if len(v) != l: # XXX - is this a problem for signed values? v = '\x00' + v # add sign octect return _encodelen(l) + v @staticmethod def dec_int(d, pos, end): if pos == end: return 0, end v = int(d[pos:end].encode('hex'), 16) av = 1 << ((end - pos) * 8 - 1) # sign bit if v > av: v -= av * 2 # twos-complement conversion return v, end @staticmethod def enc_bool(obj): return '\x01' + ('\xff' if obj else '\x00') def dec_bool(self, d, pos, end): v = self.dec_int(d, pos, end)[0] if v not in (-1, 0): raise ValueError('invalid bool value: %d' % v) return bool(v), end @staticmethod def enc_null(obj): return '\x00' @staticmethod def dec_null(d, pos, end): return None, end def enc_list(self, obj): r = ''.join(self.dumps(x) for x in obj) return _encodelen(len(r)) + r def dec_list(self, d, pos, end): r = [] vend = pos while pos < end: v, vend = self._loads(d, pos, end) if vend > end: raise ValueError('load past end') r.append(v) pos = vend return r, vend enc_set = enc_list def dec_set(self, d, pos, end): r, end = self.dec_list(d, pos, end) return set(r), end @staticmethod def enc_bytes(obj): return _encodelen(len(obj)) + bytes(obj) @staticmethod def dec_bytes(d, pos, end): return d[pos:end], end @staticmethod def enc_unicode(obj): encobj = obj.encode('utf-8') return _encodelen(len(encobj)) + encobj def dec_unicode(self, d, pos, end): return d[pos:end].decode('utf-8'), end @staticmethod def enc_float(obj): s = math.copysign(1, obj) if math.isnan(obj): return _encodelen(1) + chr(0b01000010) elif math.isinf(obj): if s == 1: return _encodelen(1) + chr(0b01000000) else: return _encodelen(1) + chr(0b01000001) elif obj == 0: if s == 1: return _encodelen(0) else: return _encodelen(1) + chr(0b01000011) m, e = _splitfloat(obj) # Binary encoding val = 0x80 if m < 0: val |= 0x40 m = -m # Base 2 el = (e.bit_length() + 7 + 1) // 8 # + 1 is sign bit if el > 2: raise ValueError('exponent too large') if e < 0: e += 256**el # convert negative to twos-complement v = el - 1 encexp = _numtostr(e) val |= v r = chr(val) + encexp + _numtostr(m) return _encodelen(len(r)) + r def dec_float(self, d, pos, end): if pos == end: return float(0), end v = ord(d[pos]) if v == 0b01000000: return float('inf'), end elif v == 0b01000001: return float('-inf'), end elif v == 0b01000010: return float('nan'), end elif v == 0b01000011: return float('-0'), end elif v & 0b110000: raise ValueError('base must be 2') elif v & 0b1100: raise ValueError('scaling factor must be 0') elif v & 0b11000000 == 0: raise ValueError('decimal encoding not supported') #elif v & 0b11000000 == 0b01000000: # raise ValueError('invalid encoding') if (v & 3) >= 2: raise ValueError('large exponents not supported') pexp = pos + 1 eexp = pos + 1 + (v & 3) + 1 exp = self.dec_int(d, pexp, eexp)[0] n = float(int(d[eexp:end].encode('hex'), 16)) r = n * 2 ** exp if v & 0b1000000: r = -r return r, end def dumps(self, obj): '''Convert obj into an array of bytes.''' try: tf = self._typemap[type(obj)] except KeyError: if self.coerce is None: raise TypeError('unhandled object: %s' % `obj`) tf, obj = self.coerce(obj) fun = getattr(self, 'enc_%s' % tf) return self._typetag[tf] + fun(obj) def _loads(self, data, pos, end): tag = data[pos] l, b = _decodelen(data, pos + 1) if len(data) < pos + 1 + b + l: raise ValueError('string not long enough') # XXX - enforce that len(data) == end? end = pos + 1 + b + l t = self._tagmap[tag] fun = getattr(self, 'dec_%s' % t) return fun(data, pos + 1 + b, end) def enc_datetime(self, obj): ts = obj.strftime('%Y%m%d%H%M%S') if obj.microsecond: ts += ('.%06d' % obj.microsecond).rstrip('0') ts += 'Z' return _encodelen(len(ts)) + ts def dec_datetime(self, data, pos, end): ts = data[pos:end] if ts[-1] != 'Z': raise ValueError('last character must be Z') # Real bug is in strptime, but work around it here. if ' ' in data: raise ValueError('no spaces are allowed') if '.' in ts: fstr = '%Y%m%d%H%M%S.%fZ' if ts.endswith('0Z'): raise ValueError('invalid trailing zeros') else: fstr = '%Y%m%d%H%M%SZ' return datetime.datetime.strptime(ts, fstr), end def loads(self, data, pos=0, end=None, consume=False): '''Load from data, starting at pos (optional), and ending at end (optional). If it is required to consume the whole string (not the default), set consume to True, and a ValueError will be raised if the string is not completely consumed. The second item in ValueError will be the possition that was the detected end.''' if end is None: end = len(data) r, e = self._loads(data, pos, end) if consume and e != end: raise ValueError('entire string not consumed', e) return r class ASN1DictCoder(ASN1Coder): '''This adds support for the non-standard dict serialization. The coerce method also supports the following type: 'dict': iteritems ''' _typemap = ASN1Coder._typemap.copy() _typemap[dict] = 'dict' _tagmap = ASN1Coder._tagmap.copy() _tagmap['\xe0'] = 'dict' _typetag = dict((v, k) for k, v in _tagmap.iteritems()) def enc_dict(self, obj): #it = list(obj.iteritems()) #it.sort() r = ''.join(self.dumps(k) + self.dumps(v) for k, v in obj.iteritems()) return _encodelen(len(r)) + r def dec_dict(self, d, pos, end): r = {} vend = pos while pos < end: k, kend = self._loads(d, pos, end) #if kend > end: # raise ValueError('key past end') v, vend = self._loads(d, kend, end) if vend > end: raise ValueError('value past end') r[k] = v pos = vend return r, vend _coder = ASN1DictCoder() dumps = _coder.dumps loads = _coder.loads def deeptypecmp(obj, o): #print 'dtc:', `obj`, `o` if type(obj) != type(o): return False if type(obj) in (str, unicode): return True if type(obj) in (list, set): for i, j in zip(obj, o): if not deeptypecmp(i, j): return False if type(obj) in (dict,): itms = obj.items() itms.sort() nitms = o.items() nitms.sort() for (k, v), (nk, nv) in zip(itms, nitms): if not deeptypecmp(k, nk): return False if not deeptypecmp(v, nv): return False return True class Test_deeptypecmp(unittest.TestCase): def test_true(self): for i in ((1,1), ('sldkfj', 'sldkfj') ): self.assertTrue(deeptypecmp(*i)) def test_false(self): for i in (([[]], [{}]), ([1], ['str']), ([], set()), ({1: 2, 5: u'sdlkfj'}, {1: 2, 5: 'sdlkfj'}), ({1: 2, u'sdlkfj': 5}, {1: 2, 'sdlkfj': 5}), ): self.assertFalse(deeptypecmp(*i)) def genfailures(obj): s = dumps(obj) for i in xrange(len(s)): for j in (chr(x) for x in xrange(256)): ts = s[:i] + j + s[i + 1:] if ts == s: continue try: o = loads(ts, consume=True) if o != obj or not deeptypecmp(o, obj): raise ValueError except (ValueError, KeyError, IndexError, TypeError): pass else: raise AssertionError('uncaught modification: %s, byte %d, orig: %02x' % (ts.encode('hex'), i, ord(s[i]))) class TestCode(unittest.TestCase): def test_primv(self): self.assertEqual(dumps(-257), '0202feff'.decode('hex')) self.assertEqual(dumps(-256), '0202ff00'.decode('hex')) self.assertEqual(dumps(-255), '0202ff01'.decode('hex')) self.assertEqual(dumps(-1), '0201ff'.decode('hex')) self.assertEqual(dumps(5), '020105'.decode('hex')) self.assertEqual(dumps(128), '02020080'.decode('hex')) self.assertEqual(dumps(256), '02020100'.decode('hex')) self.assertEqual(dumps(False), '010100'.decode('hex')) self.assertEqual(dumps(True), '0101ff'.decode('hex')) self.assertEqual(dumps(None), '0500'.decode('hex')) self.assertEqual(dumps(.15625), '090380fb05'.decode('hex')) def test_fuzzing(self): # Make sure that when a failure is detected here, that it # gets added to test_invalids, so that this function may be # disabled. genfailures(float(1)) genfailures([ 1, 2, 'sdlkfj' ]) genfailures({ 1: 2, 5: 'sdlkfj' }) genfailures(set([ 1, 2, 'sdlkfj' ])) genfailures(True) genfailures(datetime.datetime.utcnow()) def test_invalids(self): # Add tests for base 8, 16 floats among others for v in [ '010101', '0903040001', # float scaling factor '0903840001', # float scaling factor '0903100001', # float base '0903900001', # float base '0903000001', # float decimal encoding '0903830001', # float exponent encoding '090b827fffcc0df505d0fa58f7', # float large exponent '3007020101020102040673646c6b666a', # list short string still valid 'e007020101020102020105040673646c6b666a', # dict short value still valid '181632303136303231353038343031362e3539303839305a', #datetime w/ trailing zero '181632303136303231373136343034372e3035343433367a', #datetime w/ lower z '181632303136313220383031303933302e3931353133385a', #datetime w/ space ]: self.assertRaises(ValueError, loads, v.decode('hex')) def test_invalid_floats(self): with mock.patch('math.frexp', return_value=(.87232, 1 << 23)): self.assertRaises(ValueError, dumps, 1.1) def test_consume(self): b = dumps(5) self.assertRaises(ValueError, loads, b + '398473', consume=True) # XXX - still possible that an internal data member # doesn't consume all # XXX - test that sets are ordered properly # XXX - test that dicts are ordered properly.. def test_nan(self): s = dumps(float('nan')) v = loads(s) self.assertTrue(math.isnan(v)) def test_cryptoutilasn1(self): '''Test DER sequences generated by Crypto.Util.asn1.''' for s, v in [ ('\x02\x03$\x8a\xf9', 2394873), ('\x05\x00', None), ('\x02\x03\x00\x96I', 38473), ('\x04\x81\xc8' + '\x00' * 200, '\x00' * 200), ]: self.assertEqual(loads(s), v) def test_longstrings(self): for i in (203, 65484): s = os.urandom(i) v = dumps(s) self.assertEqual(loads(v), s) def test_invaliddate(self): pass # XXX - add test to reject datetime w/ tzinfo, or that it # handles it properly def test_dumps(self): for i in [ None, True, False, -1, 0, 1, 255, 256, -255, -256, 23498732498723, -2398729387234, (1<<2383) + 23984734, (-1<<1983) + 23984723984, float(0), float('-0'), float('inf'), float('-inf'), float(1.0), float(-1.0), float('353.3487'), float('2.38723873e+307'), float('2.387349e-317'), sys.float_info.max, sys.float_info.min, float('.15625'), 'weoifjwef', u'\U0001f4a9', [], [ 1,2,3 ], {}, { 5: 10, 'adfkj': 34 }, set(), set((1,2,3)), set((1,'sjlfdkj', None, float('inf'))), datetime.datetime.utcnow(), datetime.datetime.utcnow().replace(microsecond=0), datetime.datetime.utcnow().replace(microsecond=1000), ]: s = dumps(i) o = loads(s) self.assertEqual(i, o) tobj = { 1: 'dflkj', 5: u'sdlkfj', 'float': 1, 'largeint': 1<<342, 'list': [ 1, 2, u'str', 'str' ] } out = dumps(tobj) self.assertEqual(tobj, loads(out)) def test_coerce(self): class Foo: pass class Bar: pass class Baz: pass def coerce(obj): if isinstance(obj, Foo): return 'list', obj.lst elif isinstance(obj, Baz): return 'bytes', obj.s raise TypeError('unknown type') ac = ASN1Coder(coerce) v = [1, 2, 3] o = Foo() o.lst = v self.assertEqual(ac.loads(ac.dumps(o)), v) self.assertRaises(TypeError, ac.dumps, Bar()) v = u'oiejfd' o = Baz() o.s = v es = ac.dumps(o) self.assertEqual(ac.loads(es), v) self.assertIsInstance(es, bytes) self.assertRaises(TypeError, dumps, o) def test_loads(self): self.assertRaises(ValueError, loads, '\x00\x02\x00') def test_nodict(self): '''Verify that ASN1Coder does not support dict.''' self.assertRaises(KeyError, ASN1Coder().loads, dumps({}))