Browse Source

significantly improve search results, especially in the exclusion case..

The original query applied a complicated test, which sqlite couldn't
tell if it applied to all..

In the case of any inclusion, it's easy, only search metadata, and match
to files.

If all exclusion, make two parts, the part w/ a metadata object that
doesn't have the exclusions, and the part w/o any metadata objects..

Both of these later two queries can be satified more simply and with
proper indices..

The old query might have worked fine on a more advanced DB, but was
necessary for decent performance..
main
John-Mark Gurney 1 year ago
parent
commit
9279a559f5
1 changed files with 84 additions and 32 deletions
  1. +84
    -32
      ui/medashare/cli.py

+ 84
- 32
ui/medashare/cli.py View File

@@ -1552,26 +1552,17 @@ def cmd_search(options, persona, objstr, cache):
_type = args.pop(0) _type = args.pop(0)


searches = [ (x[0], ) + tuple(x[1:].split('=', 1)) for x in args ] searches = [ (x[0], ) + tuple(x[1:].split('=', 1)) for x in args ]
#print(repr(searches), file=_real_stderr)
#_debprint(repr(searches))


# base object
mdofile = aliased(orm.MetaDataObject)
# hashes of base object
htfile = aliased(orm.HashTable)
propmap = aliased(orm.PropertyMapping)


sel = select(mdofile.data).execution_options(yield_per=10).where(
# we are operating on files
mdofile.type == 'file',
# we get all the hashes for the files
mdofile.uuid == htfile.uuid)
# propobj only returns what can match query
propobj = select(propmap.obj)


for i in searches:
# hashes to metadata objects
htmd = aliased(orm.HashTable)
# metadataobjects
mdomd = aliased(orm.MetaDataObject)
onlyexclusions = True


propmap = aliased(orm.PropertyMapping)
for i in searches:
propmapsub = aliased(orm.PropertyMapping)
skeymap = aliased(orm.StringTable) skeymap = aliased(orm.StringTable)
svaluemap = aliased(orm.StringTable) svaluemap = aliased(orm.StringTable)


@@ -1581,32 +1572,79 @@ def cmd_search(options, persona, objstr, cache):
op, key = i op, key = i
value = None value = None


subq = select(func.count(propmap.id)).where(
# match to metadata objects
htfile.hash == htmd.hash,
# we get all the metadata for those hashes
htmd.uuid == mdomd.uuid,
mdomd.type == 'metadata',

# find their properties
mdomd.uuid == propmap.obj,

subq = select(propmapsub.obj).where(
# that match the key # that match the key
propmap.keyid == skeymap.id, skeymap.str == key)
propmapsub.keyid == skeymap.id, skeymap.str == key)


if value is not None: if value is not None:
subq = subq.where(propmap.valueid == svaluemap.id,
subq = subq.where(propmapsub.valueid == svaluemap.id,
svaluemap.str == value) svaluemap.str == value)


subq = subq.scalar_subquery()
#subq = subq.subquery()


if op == '+': if op == '+':
sel = sel.where(subq != 0)
onlyexclusions = False
propobj = propobj.where(propmap.obj.in_(subq))
elif op == '-': elif op == '-':
sel = sel.where(subq == 0)
propobj = propobj.where(propmap.obj.notin_(subq))
else: else:
raise ValueError('unhandled op: %s' % repr(op)) raise ValueError('unhandled op: %s' % repr(op))


# propobj should have all the ones we need selected, map back to
# the object we need

# base object (file)
mdofile = aliased(orm.MetaDataObject)
# hashes of base object
htfile = aliased(orm.HashTable)

# hashes to metadata objects
htmd = aliased(orm.HashTable)
# metadataobjects
mdomd = aliased(orm.MetaDataObject)

sel = select(mdofile.data).where(
# we are operating on files
mdofile.type == 'file',
# we get all the hashes for the files
mdofile.uuid == htfile.uuid,
htfile.hash == htmd.hash,
mdomd.uuid == htmd.uuid,
mdomd.type == 'metadata',
)

if onlyexclusions:
sel = sel.where(mdomd.uuid.in_(propobj))

# base object (file)
mdofile = aliased(orm.MetaDataObject)
# hashes of base object
htfile = aliased(orm.HashTable)

# hashes to metadata objects
htmd = aliased(orm.HashTable)
# metadataobjects
mdomd = aliased(orm.MetaDataObject)

# query for hashes w/o metadata
selwomd = select(mdofile.data).where(
# we are operating on files
mdofile.type == 'file',
# we get all the hashes for the files
mdofile.uuid == htfile.uuid,
htfile.hash.notin_(
select(htmd.hash).where(
htmd.uuid == mdomd.uuid, mdomd.type == 'metadata'
)
)
)

sel = sel.union(selwomd)
else:
sel = sel.where(mdomd.uuid.in_(propobj))

sel = sel.execution_options(yield_per=10)

#_debprint('sel:', _getquery(sel, objstr)) #_debprint('sel:', _getquery(sel, objstr))


with objstr._ses() as session: with objstr._ses() as session:
@@ -2348,12 +2386,26 @@ class _TestCases(unittest.TestCase):
_debprint('remaining objs:', repr(fullobjs)) _debprint('remaining objs:', repr(fullobjs))
self.fail('Unable to find objects %s in dump' % missing) self.fail('Unable to find objects %s in dump' % missing)


def save_db(f): # pragma: no cover
@functools.wraps(f)
def wrapper(self, *args, **kwargs):
try:
return f(self, *args, **kwargs)
except:
shutil.copyfile(self.storefname,
'/tmp/failure.sqlite3')
raise

return wrapper

#@save_db
def run_command_file(self, f): def run_command_file(self, f):
with open(f) as fp: with open(f) as fp:
cmds = json.load(fp) cmds = json.load(fp)


# setup object store # setup object store
storefname = self.tempdir / 'storefname' storefname = self.tempdir / 'storefname'
self.storefname = storefname
identfname = self.tempdir / 'identfname' identfname = self.tempdir / 'identfname'
cachefname = self.tempdir / 'cachefname' cachefname = self.tempdir / 'cachefname'


@@ -2474,7 +2526,7 @@ class _TestCases(unittest.TestCase):
with self.assertRaises(SystemExit) as cm: with self.assertRaises(SystemExit) as cm:
try: try:
main() main()
except AssertionError:
except AssertionError: # pragma: no cover
# used to nab a copy of the # used to nab a copy of the
# failed sqlite3 db # failed sqlite3 db
if False: if False:


Loading…
Cancel
Save