diff --git a/ui/fixtures/cmd.parent_obj.json b/ui/fixtures/cmd.parent_obj.json new file mode 100644 index 0000000..b5d2b0d --- /dev/null +++ b/ui/fixtures/cmd.parent_obj.json @@ -0,0 +1,53 @@ +[ +{ + "title": "gen ident", + "cmd": [ "genident", "name=A Test User" ], + "exit": 0 +}, +{ + "title": "add tag", + "cmd": [ "modify", "+tag=foo", "newfile.txt" ] +}, +{ + "title": "that a new object can be created w/o a file", + "cmd": [ "new", "ms:tag=random", "some=tag" ], + "store": [ "newuuid", "stdout" ], + "stdout_re": "^[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}\n$" +}, +{ + "special": "verify store object cnt", + "comment": "and the object was stored", + "count": 3 +}, +{ + "title": "that thew new object can be added as a parent", + "format": [ "cmd" ], + "cmd": [ "modify", "+parent_refs={newuuid}", "newfile.txt" ] +}, +{ + "title": "newfile.txt has parent tags", + "cmd": [ "list", "newfile.txt" ], + "format": [ "stdout_re" ], + "stdout_re": "hashes:\tsha512:90f8342520f0ac57fb5a779f5d331c2fa87aa40f8799940257f9ba619940951e67143a8d746535ed0284924b2b7bc1478f095198800ba96d01847d7b56ca465c\nms:tag:\trandom\nparent_refs:\t{newuuid}\nsig:\t.*\nsig:\t.*\nsome:\ttag\ntag:\tfoo\n" +}, +{ + "title": "search includes newfile.txt ", + "cmd": [ "search", "file", "+ms:tag=random" ], + "stdout_re": "newfile.txt\n$" +}, +{ + "title": "that thew new object can be added as a parent", + "format": [ "cmd" ], + "cmd": [ "modify", "+some=tag", "test.txt" ] +}, +{ + "title": "search excludes newfile.txt ", + "cmd": [ "search", "file", "-ms:tag=random" ], + "stdout_re": "^.*test.txt\n$" +}, +{ + "title": "search excludes newfile.txt ", + "cmd": [ "search", "file", "+some=tag", "-ms:tag=random" ], + "stdout_re": "^.*test.txt\n$" +} +] diff --git a/ui/fixtures/test_bb98c5a2e486.sqlite b/ui/fixtures/test_bb98c5a2e486.sqlite new file mode 100644 index 0000000..cdd0e4a Binary files /dev/null and b/ui/fixtures/test_bb98c5a2e486.sqlite differ diff --git a/ui/medashare/alembic/versions/f514caeb3f39_add_parent_mapping_table_for_parent_refs.py b/ui/medashare/alembic/versions/f514caeb3f39_add_parent_mapping_table_for_parent_refs.py new file mode 100644 index 0000000..18ddc85 --- /dev/null +++ b/ui/medashare/alembic/versions/f514caeb3f39_add_parent_mapping_table_for_parent_refs.py @@ -0,0 +1,57 @@ +"""add parent mapping table for parent_refs + +Revision ID: f514caeb3f39 +Revises: bb98c5a2e486 +Create Date: 2024-10-07 16:55:59.531649 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.orm import Session +import medashare +from medashare import mdb +from medashare.cli import StringCache, ObjectStore + + +# revision identifiers, used by Alembic. +revision = 'f514caeb3f39' +down_revision = 'bb98c5a2e486' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('parentmap', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('obj', medashare.orm.UUID(length=32), nullable=False), + sa.Column('parentid', medashare.orm.UUID(length=32), nullable=False), + sa.ForeignKeyConstraint(['obj'], ['metadata_objects.uuid'], ), + sa.ForeignKeyConstraint(['parentid'], ['metadata_objects.uuid'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_index('idx_modified', 'metadata_objects', ['modified'], unique=False) + # ### end Alembic commands ### + + connection = op.get_bind() + + mdo = sa.schema.MetaData() + mdotbl = sa.Table('metadata_objects', mdo, autoload_with=connection.engine) + + stmt = sa.select(mdotbl.c.data).where(mdotbl.c.type == 'metadata') + with Session(connection) as session: + strcache = StringCache(session) + + for (data, ) in connection.execute(stmt): + obj = mdb.MDBase.decode(data) + #print('mig:', repr(obj)) + if 'parent_refs' in obj: + ObjectStore._update_metadata_indexes(session, obj, strcache) + + session.commit() + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index('idx_modified', table_name='metadata_objects') + op.drop_table('parentmap') + # ### end Alembic commands ### diff --git a/ui/medashare/cli.py b/ui/medashare/cli.py index fd689e0..2ceb197 100644 --- a/ui/medashare/cli.py +++ b/ui/medashare/cli.py @@ -21,7 +21,7 @@ if False: from .utils import _debprint, enable_debug, disable_debug -def _getquery(q, objstr): +def _getquery(q, objstr): #pragma: no cover return repr(str(q.compile(objstr._engine, compile_kwargs={"literal_binds": True})).replace('\n', ' ')) @@ -49,11 +49,12 @@ import io import itertools import json import libarchive -import magic +from . import magic import operator import os.path import pathlib import pasn1 +import aiosqlite import re import shutil import socket @@ -89,7 +90,7 @@ def _keyordering(x): return (2**32, k, v) def _iterdictlist(obj, **kwargs): - l = list(sorted(obj.items(**kwargs), key=_keyordering)) + l = sorted(obj.items(**kwargs), key=_keyordering) for k, v in l: if isinstance(v, list): for i in sorted(v): @@ -318,6 +319,9 @@ class ObjectStore(object): config = Config() config.set_main_option("script_location", "medashare:alembic") + #print('f:', repr(command.history(config))) + #print('s:', repr(command.show(config, 'head'))) + with self._engine.begin() as connection: config.attributes['engine'] = self._engine command.upgrade(config, version) @@ -434,12 +438,25 @@ class ObjectStore(object): orm.PropertyMapping.obj == obj.uuid) session.execute(stmt) + try: + # Pre upgrade may not have this. + stmt = delete(orm.ParentMapping).where( + orm.ParentMapping.obj == obj.uuid) + session.execute(stmt) + except sqlalchemy.exc.OperationalError: + pass + props = [ x for x in obj.items() if x[0] not in { 'hashes', 'sig', - 'parent_refs', } ] for k, vids in props: + if k == 'parent_refs': + for v in vids: + #print('adding:', repr(v)) + session.add(orm.ParentMapping(obj=obj.uuid, parentid=v)) + continue + kid = strcache[k] if not isinstance(vids, list): @@ -1005,7 +1022,7 @@ def cmd_modify(options, persona, objstr, cache): sys.exit(1) badtags = list(x[1] for x in props if x[1] in (MDBase._common_names | - MDBase._common_optional)) + MDBase._common_optional - { 'parent_refs' })) if any(badtags): print('ERROR: invalid tag%s: %s.' % ( 's' if len(badtags) > 1 else '', repr(badtags)), file=sys.stderr) @@ -1064,6 +1081,19 @@ def cmd_modify(options, persona, objstr, cache): objstr.loadobj(nobj) +@init_datastructs +def cmd_new(options, persona, objstr, cache): + props = dict(x.split('=', 1) for x in options.tagvalues) + + obj = persona.MetaData(**props) + + nobj = MDBase.create_obj(obj) + + objstr.loadobj(nobj) + + # Print out newly created UUID + print(nobj.uuid) + def printhost(host): print('%s\t%s' % (host.name, host.hostuuid)) @@ -1431,12 +1461,24 @@ def cmd_list(options, persona, objstr, cache): exit = 1 continue - for j in objstr.by_file(i): - if options.json: + objs = objstr.by_file(i) + + # XXX - json encoding includes common properties, and will include + # parent refs as whole objects + if options.json: + for j in objs: print(j.encode('json')) - else: - for k, v in _iterdictlist(j): - print('%s:\t%s' % (k, v)) + else: + alltags = [ _iterdictlist(x) for x in objs ] + + # XXX - includes multiple sig lines when + # including parent object, drop these? + + alltags += [ _iterdictlist(objstr.by_id(y)) for x in objs if + hasattr(x, 'parent_refs') for y in x['parent_refs'] ] + + for k, v in sorted(itertools.chain(*alltags)): + print('%s:\t%s' % (k, v)) if exit: sys.exit(exit) @@ -1660,6 +1702,7 @@ def cmd_search(options, persona, objstr, cache): value = None # handle meta tree + # meta doesn't look at parent objs, should it? if key.startswith('meta:'): mat = _metaopre.match(origvalue) if not mat: @@ -1683,6 +1726,15 @@ def cmd_search(options, persona, objstr, cache): subq = subq.where(propmapsub.valueid == svaluemap.id, svaluemap.str == value) + # subq should have the metadata objs selected, now to walk + # them to the top level MetaData objs. + subq = subq.cte(recursive=True) + + subq = subq.union_all(select(orm.ParentMapping.obj).where( + subq.c.obj == orm.ParentMapping.parentid)) + + subq = select(subq.c.obj) + #subq = subq.subquery() if op == '+': @@ -1693,7 +1745,8 @@ def cmd_search(options, persona, objstr, cache): else: raise ValueError('unhandled op: %s' % repr(op)) - # propobj should have all the ones we need selected, map back to + + # propobj should have all the metadata objs we need selected, map back to # the object we need # base object (file) @@ -1805,13 +1858,19 @@ def main(): # used so that - isn't treated as an option parser_mod = subparsers.add_parser('modify', - help='modify tags on file(s)', prefix_chars='@') + help='modify tags on file(s) or create a new MetaData object', prefix_chars='@') parser_mod.add_argument('modtagvalues', nargs='+', help='add (+) or delete (-) the tag=[value], for the specified files') - parser_mod.add_argument('files', nargs='+', + parser_mod.add_argument('files', nargs='*', help='files to modify') parser_mod.set_defaults(func=cmd_modify) + parser_new = subparsers.add_parser('new', + help='create a new object, outputs the UUID') + parser_new.add_argument('tagvalues', nargs='+', + help='adds tag=value to the new object') + parser_new.set_defaults(func=cmd_new) + parser_auto = subparsers.add_parser('auto', help='automatic detection of file properties') parser_auto.add_argument('files', nargs='+', @@ -1926,10 +1985,12 @@ class _TestCononicalCoder(unittest.TestCase): # they are now encoded the same self.assertEqual(astr, bstr) -class _TestMigrations(unittest.TestCase): +class _TestMigrations(unittest.IsolatedAsyncioTestCase): def setUp(self): + # file:memdb1?mode=memory&cache=shared self._engine = create_engine('sqlite+pysqlite:///:memory:', echo=_sql_verbose, future=True) + #print(repr(self._engine.raw_connection().backup)) def test_f2131(self): # That an object store generated at the start @@ -1964,10 +2025,11 @@ class _TestMigrations(unittest.TestCase): # for i in session.query(orm.MetaDataObject).all(): # _debprint('c:', repr(i)) + #@mock.patch('medashare.orm.MetaDataObject') def test_dff0d(self): - # That an object store generated at the start + # That an object store generated at the previous step + # XXX - this isn't testing what I think it is. objstr = ObjectStore(self._engine, 'dff0d9ed0be1') - pers = Persona() pers.generate_key() objstr.loadobj(pers.get_identity()) @@ -1992,6 +2054,57 @@ class _TestMigrations(unittest.TestCase): session.execute(select(orm.PropertyMapping)) } self.assertEqual(pm, { (obj.uuid, other, baz) }) + async def test_f514c(self): + + tf = pathlib.Path('fixtures/test_bb98c5a2e486.sqlite').absolute() + #print(repr(tf)) + + if False: + # That an object store generated at the previous step + objstr = ObjectStore(self._engine, 'bb98c5a2e486') + + persona = Persona() + parobja = MetaData(foo='bar', created_by_ref=persona.uuid) + parobjb = MetaData(foo='barbaz', created_by_ref=persona.uuid) + obj = MetaData(baz='bleh', parent_refs=[ parobja.uuid, parobjb.uuid ], created_by_ref=persona.uuid) + + objstr.loadobj(parobja) + objstr.loadobj(parobjb) + objstr.loadobj(obj) + + objcon = self._engine.raw_connection() + + # make sure we start clean + tf.unlink(missing_ok=True) + engine_file = sqlalchemy.create_engine('sqlite+pysqlite:///' + str(tf)) + raw_connection_file = engine_file.raw_connection() + objcon.backup(raw_connection_file.connection) + raw_connection_file.close() + engine_file.dispose() + objcon.close() + + self.assertTrue(False) + return + + #restore data + objcon = self._engine.raw_connection() + engine_file = sqlalchemy.create_engine('sqlite+pysqlite:///' + str(tf)) + raw_connection_file = engine_file.raw_connection() + raw_connection_file.connection.backup(objcon.connection) + raw_connection_file.close() + engine_file.dispose() + objcon.close() + + objstr = ObjectStore(self._engine, 'bb98c5a2e486') + + objstr._handle_migration('head') + + # validate upgrade + with objstr._ses() as session: + c = session.query(orm.ParentMapping.id).count() + + self.assertEqual(c, 2) + class _TestCases(unittest.TestCase): def setUp(self): self.fixtures = pathlib.Path('fixtures').resolve() @@ -2277,7 +2390,7 @@ class _TestCases(unittest.TestCase): oobj = objst.by_id(oid) odict = dict(list(oobj.items())) - # that is has the overlays property + # that it has the overlays property self.assertEqual(odict['parent_refs'], [ bid ]) # that it doesn't have a common property @@ -2550,6 +2663,7 @@ class _TestCases(unittest.TestCase): newtestfname = os.path.join(self.tempdir, 'newfile.txt') patches = [] + variables = {} for idx, cmd in enumerate(cmds): try: @@ -2559,11 +2673,12 @@ class _TestCases(unittest.TestCase): pass for i in cmd.get('format', []): + vars = locals().copy() + vars.update(variables) if i in { 'cmd', 'files' }: - vars = locals() cmd[i] = [ x.format(**vars) for x in cmd[i] ] else: - cmd[i] = cmd[i].format(**locals()) + cmd[i] = cmd[i].format(**vars) try: special = cmd['special'] @@ -2692,6 +2807,16 @@ class _TestCases(unittest.TestCase): self.assertEqual(cm.exception.code, cmd.get('exit', 0)) + # any store commands: + if 'store' in cmd: + st = cmd['store'] + if st[1] == 'stdout': + tostore = stdout.getvalue().strip() + else: #pragma: no cover + raise RuntimeError('unknown store: %s' % repr(st[1])) + + variables[st[0]] = tostore + patches.reverse() for i in patches: i.stop() diff --git a/ui/medashare/orm.py b/ui/medashare/orm.py index 09f6e9e..4d19821 100644 --- a/ui/medashare/orm.py +++ b/ui/medashare/orm.py @@ -77,6 +77,7 @@ class MetaDataObject(Base): data = Column(MDBaseType) Index("idx_type", type) + Index("idx_modified", modified) def __repr__(self): return \ @@ -84,6 +85,14 @@ class MetaDataObject(Base): ' data=%s)' % (repr(self.uuid), repr(self.type), repr(self.modified), repr(self.data)) +class ParentMapping(Base): + __tablename__ = 'parentmap' + + id = Column(Integer, primary_key=True) + + obj = Column(UUID, ForeignKey(MetaDataObject.uuid), nullable=False) + parentid = Column(UUID, ForeignKey(MetaDataObject.uuid), nullable=False) + class PropertyMapping(Base): __tablename__ = 'propmap'