Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/storage/db.py
Show First 20 Lines • Show All 275 Lines • ▼ Show 20 Lines | def content_metadata_missing_from_list(self, metadata, cur=None): | ||||
"""List missing metadata. | """List missing metadata. | ||||
""" | """ | ||||
yield from self._missing_from_list( | yield from self._missing_from_list( | ||||
'content_metadata', metadata, self.content_metadata_hash_keys, | 'content_metadata', metadata, self.content_metadata_hash_keys, | ||||
cur=cur) | cur=cur) | ||||
content_metadata_cols = [ | content_metadata_cols = [ | ||||
'id', 'translated_metadata', | 'id', 'metadata', | ||||
'tool_id', 'tool_name', 'tool_version', 'tool_configuration'] | 'tool_id', 'tool_name', 'tool_version', 'tool_configuration'] | ||||
@stored_procedure('swh_mktemp_content_metadata') | @stored_procedure('swh_mktemp_content_metadata') | ||||
def mktemp_content_metadata(self, cur=None): pass | def mktemp_content_metadata(self, cur=None): pass | ||||
def content_metadata_add_from_temp(self, conflict_update, cur=None): | def content_metadata_add_from_temp(self, conflict_update, cur=None): | ||||
self._cursor(cur).execute("SELECT swh_content_metadata_add(%s)", | self._cursor(cur).execute("SELECT swh_content_metadata_add(%s)", | ||||
(conflict_update, )) | (conflict_update, )) | ||||
def content_metadata_get_from_list(self, ids, cur=None): | def content_metadata_get_from_list(self, ids, cur=None): | ||||
yield from self._get_from_list( | yield from self._get_from_list( | ||||
'content_metadata', ids, self.content_metadata_cols, cur=cur) | 'content_metadata', ids, self.content_metadata_cols, cur=cur) | ||||
revision_metadata_hash_keys = [ | revision_intrinsic_metadata_hash_keys = [ | ||||
'id', 'indexer_configuration_id'] | 'id', 'indexer_configuration_id'] | ||||
def revision_metadata_missing_from_list(self, metadata, cur=None): | def revision_intrinsic_metadata_missing_from_list( | ||||
self, metadata, cur=None): | |||||
"""List missing metadata. | """List missing metadata. | ||||
""" | """ | ||||
yield from self._missing_from_list( | yield from self._missing_from_list( | ||||
'revision_metadata', metadata, self.revision_metadata_hash_keys, | 'revision_intrinsic_metadata', metadata, | ||||
cur=cur) | self.revision_intrinsic_metadata_hash_keys, cur=cur) | ||||
revision_metadata_cols = [ | revision_intrinsic_metadata_cols = [ | ||||
'id', 'translated_metadata', 'mappings', | 'id', 'metadata', 'mappings', | ||||
'tool_id', 'tool_name', 'tool_version', 'tool_configuration'] | 'tool_id', 'tool_name', 'tool_version', 'tool_configuration'] | ||||
@stored_procedure('swh_mktemp_revision_metadata') | @stored_procedure('swh_mktemp_revision_intrinsic_metadata') | ||||
def mktemp_revision_metadata(self, cur=None): pass | def mktemp_revision_intrinsic_metadata(self, cur=None): pass | ||||
def revision_metadata_add_from_temp(self, conflict_update, cur=None): | def revision_intrinsic_metadata_add_from_temp( | ||||
self._cursor(cur).execute("SELECT swh_revision_metadata_add(%s)", | self, conflict_update, cur=None): | ||||
self._cursor(cur).execute( | |||||
"SELECT swh_revision_intrinsic_metadata_add(%s)", | |||||
(conflict_update, )) | (conflict_update, )) | ||||
def revision_metadata_delete( | def revision_intrinsic_metadata_delete( | ||||
self, entries, cur=None): | self, entries, cur=None): | ||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
cur.execute( | cur.execute( | ||||
"DELETE from revision_metadata " | "DELETE from revision_intrinsic_metadata " | ||||
"WHERE (id, indexer_configuration_id) IN " | "WHERE (id, indexer_configuration_id) IN " | ||||
" (VALUES %s)" % (', '.join('%s' for _ in entries)), | " (VALUES %s)" % (', '.join('%s' for _ in entries)), | ||||
tuple((e['id'], e['indexer_configuration_id']) | tuple((e['id'], e['indexer_configuration_id']) | ||||
for e in entries),) | for e in entries),) | ||||
def revision_metadata_get_from_list(self, ids, cur=None): | def revision_intrinsic_metadata_get_from_list(self, ids, cur=None): | ||||
yield from self._get_from_list( | yield from self._get_from_list( | ||||
'revision_metadata', ids, self.revision_metadata_cols, cur=cur) | 'revision_intrinsic_metadata', ids, | ||||
self.revision_intrinsic_metadata_cols, cur=cur) | |||||
origin_intrinsic_metadata_cols = [ | origin_intrinsic_metadata_cols = [ | ||||
'origin_id', 'metadata', 'from_revision', 'mappings', | 'id', 'metadata', 'from_revision', 'mappings', | ||||
'tool_id', 'tool_name', 'tool_version', 'tool_configuration'] | 'tool_id', 'tool_name', 'tool_version', 'tool_configuration'] | ||||
origin_intrinsic_metadata_regconfig = 'pg_catalog.simple' | origin_intrinsic_metadata_regconfig = 'pg_catalog.simple' | ||||
"""The dictionary used to normalize 'metadata' and queries. | """The dictionary used to normalize 'metadata' and queries. | ||||
'pg_catalog.simple' provides no stopword, so it should be suitable | 'pg_catalog.simple' provides no stopword, so it should be suitable | ||||
for proper names and non-English content. | for proper names and non-English content. | ||||
When updating this value, make sure to add a new index on | When updating this value, make sure to add a new index on | ||||
origin_intrinsic_metadata.metadata.""" | origin_intrinsic_metadata.metadata.""" | ||||
@stored_procedure('swh_mktemp_origin_intrinsic_metadata') | @stored_procedure('swh_mktemp_origin_intrinsic_metadata') | ||||
def mktemp_origin_intrinsic_metadata(self, cur=None): pass | def mktemp_origin_intrinsic_metadata(self, cur=None): pass | ||||
def origin_intrinsic_metadata_add_from_temp( | def origin_intrinsic_metadata_add_from_temp( | ||||
self, conflict_update, cur=None): | self, conflict_update, cur=None): | ||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
cur.execute( | cur.execute( | ||||
"SELECT swh_origin_intrinsic_metadata_add(%s)", | "SELECT swh_origin_intrinsic_metadata_add(%s)", | ||||
(conflict_update, )) | (conflict_update, )) | ||||
def origin_intrinsic_metadata_delete( | def origin_intrinsic_metadata_delete( | ||||
self, entries, cur=None): | self, entries, cur=None): | ||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
cur.execute( | cur.execute( | ||||
"DELETE from origin_intrinsic_metadata " | "DELETE from origin_intrinsic_metadata " | ||||
"WHERE (origin_id, indexer_configuration_id) IN" | "WHERE (id, indexer_configuration_id) IN" | ||||
" (VALUES %s)" % (', '.join('%s' for _ in entries)), | " (VALUES %s)" % (', '.join('%s' for _ in entries)), | ||||
tuple((e['origin_id'], e['indexer_configuration_id']) | tuple((e['id'], e['indexer_configuration_id']) | ||||
for e in entries),) | for e in entries),) | ||||
def origin_intrinsic_metadata_get_from_list(self, orig_ids, cur=None): | def origin_intrinsic_metadata_get_from_list(self, orig_ids, cur=None): | ||||
yield from self._get_from_list( | yield from self._get_from_list( | ||||
'origin_intrinsic_metadata', orig_ids, | 'origin_intrinsic_metadata', orig_ids, | ||||
self.origin_intrinsic_metadata_cols, cur=cur, | self.origin_intrinsic_metadata_cols, cur=cur, | ||||
id_col='origin_id') | id_col='id') | ||||
def origin_intrinsic_metadata_search_fulltext(self, terms, *, limit, cur): | def origin_intrinsic_metadata_search_fulltext(self, terms, *, limit, cur): | ||||
regconfig = self.origin_intrinsic_metadata_regconfig | regconfig = self.origin_intrinsic_metadata_regconfig | ||||
tsquery_template = ' && '.join("plainto_tsquery('%s', %%s)" % regconfig | tsquery_template = ' && '.join("plainto_tsquery('%s', %%s)" % regconfig | ||||
for _ in terms) | for _ in terms) | ||||
tsquery_args = [(term,) for term in terms] | tsquery_args = [(term,) for term in terms] | ||||
keys = map(self._convert_key, self.origin_intrinsic_metadata_cols) | keys = (self._convert_key(col, 'oim') for col in | ||||
self.origin_intrinsic_metadata_cols) | |||||
vlorentz: debugging leftover? ^^ | |||||
query = ("SELECT {keys} FROM origin_intrinsic_metadata AS oim " | query = ("SELECT {keys} FROM origin_intrinsic_metadata AS oim " | ||||
Done Inline ActionsCan be rewritten as a generator expression, which I find more readable: keys = (convert_key(col, 'oim') for col in self.origin_intrinsic_metadata_cols) (same below) vlorentz: Can be rewritten as a generator expression, which I find more readable:
```
keys =… | |||||
"INNER JOIN indexer_configuration AS i " | "INNER JOIN indexer_configuration AS i " | ||||
"ON oim.indexer_configuration_id=i.id " | "ON oim.indexer_configuration_id=i.id " | ||||
"JOIN LATERAL (SELECT {tsquery_template}) AS s(tsq) ON true " | "JOIN LATERAL (SELECT {tsquery_template}) AS s(tsq) ON true " | ||||
"WHERE oim.metadata_tsvector @@ tsq " | "WHERE oim.metadata_tsvector @@ tsq " | ||||
"ORDER BY ts_rank(oim.metadata_tsvector, tsq, 1) DESC " | "ORDER BY ts_rank(oim.metadata_tsvector, tsq, 1) DESC " | ||||
"LIMIT %s;" | "LIMIT %s;" | ||||
).format(keys=', '.join(keys), | ).format(keys=', '.join(keys), | ||||
regconfig=regconfig, | regconfig=regconfig, | ||||
tsquery_template=tsquery_template) | tsquery_template=tsquery_template) | ||||
cur.execute(query, tsquery_args + [limit]) | cur.execute(query, tsquery_args + [limit]) | ||||
yield from cur | yield from cur | ||||
def origin_intrinsic_metadata_search_by_producer( | def origin_intrinsic_metadata_search_by_producer( | ||||
self, start, end, limit, ids_only, mappings, tool_ids, cur): | self, start, end, limit, ids_only, mappings, tool_ids, cur): | ||||
if ids_only: | if ids_only: | ||||
keys = 'oim.origin_id' | keys = 'oim.id' | ||||
else: | else: | ||||
keys = ', '.join(map(self._convert_key, | keys = ', '.join((self._convert_key(col, 'oim') for col in | ||||
self.origin_intrinsic_metadata_cols)) | self.origin_intrinsic_metadata_cols)) | ||||
query_parts = [ | query_parts = [ | ||||
"SELECT %s" % keys, | "SELECT %s" % keys, | ||||
"FROM origin_intrinsic_metadata AS oim", | "FROM origin_intrinsic_metadata AS oim", | ||||
"INNER JOIN indexer_configuration AS i", | "INNER JOIN indexer_configuration AS i", | ||||
"ON oim.indexer_configuration_id=i.id", | "ON oim.indexer_configuration_id=i.id", | ||||
] | ] | ||||
args = [] | args = [] | ||||
where = [] | where = [] | ||||
if start: | if start: | ||||
where.append('oim.origin_id >= %s') | where.append('oim.id >= %s') | ||||
args.append(start) | args.append(start) | ||||
if end: | if end: | ||||
where.append('oim.origin_id <= %s') | where.append('oim.id <= %s') | ||||
args.append(end) | args.append(end) | ||||
if mappings is not None: | if mappings is not None: | ||||
where.append('oim.mappings && %s') | where.append('oim.mappings && %s') | ||||
args.append(mappings) | args.append(mappings) | ||||
if tool_ids is not None: | if tool_ids is not None: | ||||
where.append('oim.indexer_configuration_id = ANY(%s)') | where.append('oim.indexer_configuration_id = ANY(%s)') | ||||
args.append(tool_ids) | args.append(tool_ids) | ||||
if where: | if where: | ||||
Show All 35 Lines |
debugging leftover? ^^