Browse Source

optimize searching for files... takes 300s query down to 15s.

SQLite3 does not do joins in a sane manner, so we emulate them
w/ subqueries for a large boost.  Not sure if adding distinct would
improve things or not, the query plan does not change between the
two (but the lower ops may), but in a quick test, it didn't seem
to make a difference (not evaluated statistically)...
main
John-Mark Gurney 3 months ago
parent
commit
bb2209ff2e
1 changed files with 16 additions and 8 deletions
  1. +16
    -8
      ui/medashare/cli.py

+ 16
- 8
ui/medashare/cli.py View File

@@ -1622,7 +1622,7 @@ def cmd_search(options, persona, objstr, cache):
propmap = aliased(orm.PropertyMapping) propmap = aliased(orm.PropertyMapping)


# propobj only returns what can match query # propobj only returns what can match query
propobj = select(propmap.obj)
propobj = select(propmap.obj).distinct()


onlyexclusions = True onlyexclusions = True


@@ -1668,18 +1668,28 @@ def cmd_search(options, persona, objstr, cache):
# metadataobjects # metadataobjects
mdomd = aliased(orm.MetaDataObject) mdomd = aliased(orm.MetaDataObject)


# Don't know if distinct is needed/warrented for the
# in_ sub queries
sel = select(mdofile.data).where( sel = select(mdofile.data).where(
# we are operating on files # we are operating on files
mdofile.type == 'file', mdofile.type == 'file',
# we get all the hashes for the files # we get all the hashes for the files
mdofile.uuid == htfile.uuid,
htfile.hash == htmd.hash,
mdomd.uuid == htmd.uuid,
mdomd.type == 'metadata',
mdofile.uuid.in_(
select(htfile.uuid).where(
htfile.hash == htmd.hash,
# we get all the hashes for selected metadata
htmd.uuid.in_(
select(mdomd.uuid).where(
mdomd.type == 'metadata',
mdomd.uuid.in_(propobj)
)
)
)
)
) )


if onlyexclusions: if onlyexclusions:
sel = sel.where(mdomd.uuid.in_(propobj))
# add in all the files that doesn't have metadata


# base object (file) # base object (file)
mdofile = aliased(orm.MetaDataObject) mdofile = aliased(orm.MetaDataObject)
@@ -1705,8 +1715,6 @@ def cmd_search(options, persona, objstr, cache):
) )


sel = sel.union(selwomd) sel = sel.union(selwomd)
else:
sel = sel.where(mdomd.uuid.in_(propobj))


sel = sel.execution_options(yield_per=10) sel = sel.execution_options(yield_per=10)




Loading…
Cancel
Save