From 39c16fd527feebf9d16565848e969f60b3bb9843 Mon Sep 17 00:00:00 2001 From: John-Mark Gurney Date: Tue, 8 Oct 2024 17:53:56 -0700 Subject: [PATCH] add support for parent_refs... updates db.. --- ui/fixtures/cmd.parent_obj.json | 53 ++++++ ui/fixtures/test_bb98c5a2e486.sqlite | Bin 0 -> 77824 bytes ...dd_parent_mapping_table_for_parent_refs.py | 57 ++++++ ui/medashare/cli.py | 163 ++++++++++++++++-- ui/medashare/orm.py | 9 + 5 files changed, 263 insertions(+), 19 deletions(-) create mode 100644 ui/fixtures/cmd.parent_obj.json create mode 100644 ui/fixtures/test_bb98c5a2e486.sqlite create mode 100644 ui/medashare/alembic/versions/f514caeb3f39_add_parent_mapping_table_for_parent_refs.py diff --git a/ui/fixtures/cmd.parent_obj.json b/ui/fixtures/cmd.parent_obj.json new file mode 100644 index 0000000..b5d2b0d --- /dev/null +++ b/ui/fixtures/cmd.parent_obj.json @@ -0,0 +1,53 @@ +[ +{ + "title": "gen ident", + "cmd": [ "genident", "name=A Test User" ], + "exit": 0 +}, +{ + "title": "add tag", + "cmd": [ "modify", "+tag=foo", "newfile.txt" ] +}, +{ + "title": "that a new object can be created w/o a file", + "cmd": [ "new", "ms:tag=random", "some=tag" ], + "store": [ "newuuid", "stdout" ], + "stdout_re": "^[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}\n$" +}, +{ + "special": "verify store object cnt", + "comment": "and the object was stored", + "count": 3 +}, +{ + "title": "that thew new object can be added as a parent", + "format": [ "cmd" ], + "cmd": [ "modify", "+parent_refs={newuuid}", "newfile.txt" ] +}, +{ + "title": "newfile.txt has parent tags", + "cmd": [ "list", "newfile.txt" ], + "format": [ "stdout_re" ], + "stdout_re": "hashes:\tsha512:90f8342520f0ac57fb5a779f5d331c2fa87aa40f8799940257f9ba619940951e67143a8d746535ed0284924b2b7bc1478f095198800ba96d01847d7b56ca465c\nms:tag:\trandom\nparent_refs:\t{newuuid}\nsig:\t.*\nsig:\t.*\nsome:\ttag\ntag:\tfoo\n" +}, +{ + "title": "search includes newfile.txt ", + "cmd": [ "search", "file", "+ms:tag=random" ], + "stdout_re": "newfile.txt\n$" +}, +{ + "title": "that thew new object can be added as a parent", + "format": [ "cmd" ], + "cmd": [ "modify", "+some=tag", "test.txt" ] +}, +{ + "title": "search excludes newfile.txt ", + "cmd": [ "search", "file", "-ms:tag=random" ], + "stdout_re": "^.*test.txt\n$" +}, +{ + "title": "search excludes newfile.txt ", + "cmd": [ "search", "file", "+some=tag", "-ms:tag=random" ], + "stdout_re": "^.*test.txt\n$" +} +] diff --git a/ui/fixtures/test_bb98c5a2e486.sqlite b/ui/fixtures/test_bb98c5a2e486.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..cdd0e4ac625bf0dcfd63aa7d8bf5578c87f265a9 GIT binary patch literal 77824 zcmeI)?{6D-9l-HBpMS)^a%sajsuFrtn>JCC;opuYO)O2l5-mTdlT@q^u+H|Gn8vZ^ zIHj48n2Qj5i24XkJmTS=3?^+88xq?eFo_3DKn18z$|MkIgCPxR5BT2s2RWy%JET6S z^*%Md``&%;`~7@g-?P)NufB~dE0w0Hmurox(bVJ8ge1$-GrBHGk}Q5E#ZPy!%I)#QP?r-jat2G)H|zVoz^xNFRozExzfBOC9pd+yUqt@_ST-{iX40N?t zs+22cNxvj6#LeZ^tTlMnHL71+S-&`^TbEt;c|jz|w585*S+FlP>fLP>|c+H=#gWOu&z!jAYib>7%-*6ecraG&|;;nH~Uo|O9B zFLG)Am{(goIjFnNOE4epY#a}LSsJrCnO7flTIb@*M>`u6>Zk3~z83Uq8}oyD>mT}j zw0}>irHuKsQ>WzCmOW70wY{cu!|xcP-f?Sutb%h49M|>m5o$LeF7T+|t7T6ON~70l zKHA$1v`%<^+L<$Q>!25}^}Z+GL##TTP2Z8=I|u%s@EyrHUbO8z=5#w_kxtU*)t1jZ zD4k9i?Swjou~x|K(@vk3Tg7%t#@=@OC9~bz@;DRf?YHw0rS9v}5vz4ocMlwSwL23L zP0Z<5eUU}nMjxV_7YSp;^!-$Ra| z>|k26vQN8kLB8d;ZHyhWTBsEBH_XOfrMBBwbU8ZfJ7{&=IW51tUmXauw7#~nnOhX$ z>3uEa>(`6ABg8}P3Am?SUhT?-L2&(zMf-Lr;@!Z0O9+b%S^Y>7e^?Me009ILKmY** z5I_I{1Q0-ALxV|3_qSNdo}{5I_I{1Q0*~0R#|00D+Ex zs!aP2&;Q%^|38z|&mcko0R#|0009ILKmY**5I_KdQ50~=F3Fnzi!c4NAb(fB*srAb009ILKmY**5I_I{1Q6iP2q1s}0tg_000Iag zfB*ucFTnl((H~k<`qCGzR-=(fIu=hxdktO-1?xt`+-+Lt??ukKC(hry zcKd4N()O!!Mfu{o;df8G^ZxbZ*1uNnytez#_g?tIw>}BEn+J7MG;KDGlF>9m?*08r z$vts1{_aoaURk~V`iFn5|MacfjqdT*lFQ25HB~4SMYi!|GFgr%iY3D+r_6G!7)?i` ziKJOh49Yv6xqI*sxhmyaE#xg24GZYOiN`b724&HoZ7+*wWRXSddsDHrnM@j)STYrl zBoe6w!-y5dW;78?lvB}2W>6Nf%-tIgn}rzT#*h?ZgHz}q!fp!I{eMNhEvbK0Z>w)T znv0xP5kLR|1Q0*~0R#|0009ILK%igXtgPS#C1kl^$+`Gg75$L`%^{$ z0R#|0009ILKmY**5I|t`1o-~{=#42EA%Fk^2q1s}0tg_000Iag&@W)!|M#fBmDEqw zkJNkWpTs5$0tg_000IagfB*srAbsF*m@#F))qDUv&4?*y{ z>|G77EY{-T2-f|7kNSb6eysjQ{j>Uk`k%)jK|(|T0R#|0009ILKmY**5I_I{Ng7k0 zkgfT@$EQrnV*c+AC?~|)dI+M@-qmnkWJSpR4`+|Cc5859-V6FCJZj zw2S}(2q1s}0tg_000IagfB*uLG~*G||A(0WE9X68`agXB?>cAI8~XVF&iwz5q<*4) wsNP{D5I_I{1Q0*~0R#|0009ILK;ZEb2q{x?)odCiqiHCI3$CCt None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('parentmap', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('obj', medashare.orm.UUID(length=32), nullable=False), + sa.Column('parentid', medashare.orm.UUID(length=32), nullable=False), + sa.ForeignKeyConstraint(['obj'], ['metadata_objects.uuid'], ), + sa.ForeignKeyConstraint(['parentid'], ['metadata_objects.uuid'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_index('idx_modified', 'metadata_objects', ['modified'], unique=False) + # ### end Alembic commands ### + + connection = op.get_bind() + + mdo = sa.schema.MetaData() + mdotbl = sa.Table('metadata_objects', mdo, autoload_with=connection.engine) + + stmt = sa.select(mdotbl.c.data).where(mdotbl.c.type == 'metadata') + with Session(connection) as session: + strcache = StringCache(session) + + for (data, ) in connection.execute(stmt): + obj = mdb.MDBase.decode(data) + #print('mig:', repr(obj)) + if 'parent_refs' in obj: + ObjectStore._update_metadata_indexes(session, obj, strcache) + + session.commit() + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index('idx_modified', table_name='metadata_objects') + op.drop_table('parentmap') + # ### end Alembic commands ### diff --git a/ui/medashare/cli.py b/ui/medashare/cli.py index fd689e0..2ceb197 100644 --- a/ui/medashare/cli.py +++ b/ui/medashare/cli.py @@ -21,7 +21,7 @@ if False: from .utils import _debprint, enable_debug, disable_debug -def _getquery(q, objstr): +def _getquery(q, objstr): #pragma: no cover return repr(str(q.compile(objstr._engine, compile_kwargs={"literal_binds": True})).replace('\n', ' ')) @@ -49,11 +49,12 @@ import io import itertools import json import libarchive -import magic +from . import magic import operator import os.path import pathlib import pasn1 +import aiosqlite import re import shutil import socket @@ -89,7 +90,7 @@ def _keyordering(x): return (2**32, k, v) def _iterdictlist(obj, **kwargs): - l = list(sorted(obj.items(**kwargs), key=_keyordering)) + l = sorted(obj.items(**kwargs), key=_keyordering) for k, v in l: if isinstance(v, list): for i in sorted(v): @@ -318,6 +319,9 @@ class ObjectStore(object): config = Config() config.set_main_option("script_location", "medashare:alembic") + #print('f:', repr(command.history(config))) + #print('s:', repr(command.show(config, 'head'))) + with self._engine.begin() as connection: config.attributes['engine'] = self._engine command.upgrade(config, version) @@ -434,12 +438,25 @@ class ObjectStore(object): orm.PropertyMapping.obj == obj.uuid) session.execute(stmt) + try: + # Pre upgrade may not have this. + stmt = delete(orm.ParentMapping).where( + orm.ParentMapping.obj == obj.uuid) + session.execute(stmt) + except sqlalchemy.exc.OperationalError: + pass + props = [ x for x in obj.items() if x[0] not in { 'hashes', 'sig', - 'parent_refs', } ] for k, vids in props: + if k == 'parent_refs': + for v in vids: + #print('adding:', repr(v)) + session.add(orm.ParentMapping(obj=obj.uuid, parentid=v)) + continue + kid = strcache[k] if not isinstance(vids, list): @@ -1005,7 +1022,7 @@ def cmd_modify(options, persona, objstr, cache): sys.exit(1) badtags = list(x[1] for x in props if x[1] in (MDBase._common_names | - MDBase._common_optional)) + MDBase._common_optional - { 'parent_refs' })) if any(badtags): print('ERROR: invalid tag%s: %s.' % ( 's' if len(badtags) > 1 else '', repr(badtags)), file=sys.stderr) @@ -1064,6 +1081,19 @@ def cmd_modify(options, persona, objstr, cache): objstr.loadobj(nobj) +@init_datastructs +def cmd_new(options, persona, objstr, cache): + props = dict(x.split('=', 1) for x in options.tagvalues) + + obj = persona.MetaData(**props) + + nobj = MDBase.create_obj(obj) + + objstr.loadobj(nobj) + + # Print out newly created UUID + print(nobj.uuid) + def printhost(host): print('%s\t%s' % (host.name, host.hostuuid)) @@ -1431,12 +1461,24 @@ def cmd_list(options, persona, objstr, cache): exit = 1 continue - for j in objstr.by_file(i): - if options.json: + objs = objstr.by_file(i) + + # XXX - json encoding includes common properties, and will include + # parent refs as whole objects + if options.json: + for j in objs: print(j.encode('json')) - else: - for k, v in _iterdictlist(j): - print('%s:\t%s' % (k, v)) + else: + alltags = [ _iterdictlist(x) for x in objs ] + + # XXX - includes multiple sig lines when + # including parent object, drop these? + + alltags += [ _iterdictlist(objstr.by_id(y)) for x in objs if + hasattr(x, 'parent_refs') for y in x['parent_refs'] ] + + for k, v in sorted(itertools.chain(*alltags)): + print('%s:\t%s' % (k, v)) if exit: sys.exit(exit) @@ -1660,6 +1702,7 @@ def cmd_search(options, persona, objstr, cache): value = None # handle meta tree + # meta doesn't look at parent objs, should it? if key.startswith('meta:'): mat = _metaopre.match(origvalue) if not mat: @@ -1683,6 +1726,15 @@ def cmd_search(options, persona, objstr, cache): subq = subq.where(propmapsub.valueid == svaluemap.id, svaluemap.str == value) + # subq should have the metadata objs selected, now to walk + # them to the top level MetaData objs. + subq = subq.cte(recursive=True) + + subq = subq.union_all(select(orm.ParentMapping.obj).where( + subq.c.obj == orm.ParentMapping.parentid)) + + subq = select(subq.c.obj) + #subq = subq.subquery() if op == '+': @@ -1693,7 +1745,8 @@ def cmd_search(options, persona, objstr, cache): else: raise ValueError('unhandled op: %s' % repr(op)) - # propobj should have all the ones we need selected, map back to + + # propobj should have all the metadata objs we need selected, map back to # the object we need # base object (file) @@ -1805,13 +1858,19 @@ def main(): # used so that - isn't treated as an option parser_mod = subparsers.add_parser('modify', - help='modify tags on file(s)', prefix_chars='@') + help='modify tags on file(s) or create a new MetaData object', prefix_chars='@') parser_mod.add_argument('modtagvalues', nargs='+', help='add (+) or delete (-) the tag=[value], for the specified files') - parser_mod.add_argument('files', nargs='+', + parser_mod.add_argument('files', nargs='*', help='files to modify') parser_mod.set_defaults(func=cmd_modify) + parser_new = subparsers.add_parser('new', + help='create a new object, outputs the UUID') + parser_new.add_argument('tagvalues', nargs='+', + help='adds tag=value to the new object') + parser_new.set_defaults(func=cmd_new) + parser_auto = subparsers.add_parser('auto', help='automatic detection of file properties') parser_auto.add_argument('files', nargs='+', @@ -1926,10 +1985,12 @@ class _TestCononicalCoder(unittest.TestCase): # they are now encoded the same self.assertEqual(astr, bstr) -class _TestMigrations(unittest.TestCase): +class _TestMigrations(unittest.IsolatedAsyncioTestCase): def setUp(self): + # file:memdb1?mode=memory&cache=shared self._engine = create_engine('sqlite+pysqlite:///:memory:', echo=_sql_verbose, future=True) + #print(repr(self._engine.raw_connection().backup)) def test_f2131(self): # That an object store generated at the start @@ -1964,10 +2025,11 @@ class _TestMigrations(unittest.TestCase): # for i in session.query(orm.MetaDataObject).all(): # _debprint('c:', repr(i)) + #@mock.patch('medashare.orm.MetaDataObject') def test_dff0d(self): - # That an object store generated at the start + # That an object store generated at the previous step + # XXX - this isn't testing what I think it is. objstr = ObjectStore(self._engine, 'dff0d9ed0be1') - pers = Persona() pers.generate_key() objstr.loadobj(pers.get_identity()) @@ -1992,6 +2054,57 @@ class _TestMigrations(unittest.TestCase): session.execute(select(orm.PropertyMapping)) } self.assertEqual(pm, { (obj.uuid, other, baz) }) + async def test_f514c(self): + + tf = pathlib.Path('fixtures/test_bb98c5a2e486.sqlite').absolute() + #print(repr(tf)) + + if False: + # That an object store generated at the previous step + objstr = ObjectStore(self._engine, 'bb98c5a2e486') + + persona = Persona() + parobja = MetaData(foo='bar', created_by_ref=persona.uuid) + parobjb = MetaData(foo='barbaz', created_by_ref=persona.uuid) + obj = MetaData(baz='bleh', parent_refs=[ parobja.uuid, parobjb.uuid ], created_by_ref=persona.uuid) + + objstr.loadobj(parobja) + objstr.loadobj(parobjb) + objstr.loadobj(obj) + + objcon = self._engine.raw_connection() + + # make sure we start clean + tf.unlink(missing_ok=True) + engine_file = sqlalchemy.create_engine('sqlite+pysqlite:///' + str(tf)) + raw_connection_file = engine_file.raw_connection() + objcon.backup(raw_connection_file.connection) + raw_connection_file.close() + engine_file.dispose() + objcon.close() + + self.assertTrue(False) + return + + #restore data + objcon = self._engine.raw_connection() + engine_file = sqlalchemy.create_engine('sqlite+pysqlite:///' + str(tf)) + raw_connection_file = engine_file.raw_connection() + raw_connection_file.connection.backup(objcon.connection) + raw_connection_file.close() + engine_file.dispose() + objcon.close() + + objstr = ObjectStore(self._engine, 'bb98c5a2e486') + + objstr._handle_migration('head') + + # validate upgrade + with objstr._ses() as session: + c = session.query(orm.ParentMapping.id).count() + + self.assertEqual(c, 2) + class _TestCases(unittest.TestCase): def setUp(self): self.fixtures = pathlib.Path('fixtures').resolve() @@ -2277,7 +2390,7 @@ class _TestCases(unittest.TestCase): oobj = objst.by_id(oid) odict = dict(list(oobj.items())) - # that is has the overlays property + # that it has the overlays property self.assertEqual(odict['parent_refs'], [ bid ]) # that it doesn't have a common property @@ -2550,6 +2663,7 @@ class _TestCases(unittest.TestCase): newtestfname = os.path.join(self.tempdir, 'newfile.txt') patches = [] + variables = {} for idx, cmd in enumerate(cmds): try: @@ -2559,11 +2673,12 @@ class _TestCases(unittest.TestCase): pass for i in cmd.get('format', []): + vars = locals().copy() + vars.update(variables) if i in { 'cmd', 'files' }: - vars = locals() cmd[i] = [ x.format(**vars) for x in cmd[i] ] else: - cmd[i] = cmd[i].format(**locals()) + cmd[i] = cmd[i].format(**vars) try: special = cmd['special'] @@ -2692,6 +2807,16 @@ class _TestCases(unittest.TestCase): self.assertEqual(cm.exception.code, cmd.get('exit', 0)) + # any store commands: + if 'store' in cmd: + st = cmd['store'] + if st[1] == 'stdout': + tostore = stdout.getvalue().strip() + else: #pragma: no cover + raise RuntimeError('unknown store: %s' % repr(st[1])) + + variables[st[0]] = tostore + patches.reverse() for i in patches: i.stop() diff --git a/ui/medashare/orm.py b/ui/medashare/orm.py index 09f6e9e..4d19821 100644 --- a/ui/medashare/orm.py +++ b/ui/medashare/orm.py @@ -77,6 +77,7 @@ class MetaDataObject(Base): data = Column(MDBaseType) Index("idx_type", type) + Index("idx_modified", modified) def __repr__(self): return \ @@ -84,6 +85,14 @@ class MetaDataObject(Base): ' data=%s)' % (repr(self.uuid), repr(self.type), repr(self.modified), repr(self.data)) +class ParentMapping(Base): + __tablename__ = 'parentmap' + + id = Column(Integer, primary_key=True) + + obj = Column(UUID, ForeignKey(MetaDataObject.uuid), nullable=False) + parentid = Column(UUID, ForeignKey(MetaDataObject.uuid), nullable=False) + class PropertyMapping(Base): __tablename__ = 'propmap'