Page MenuHomeSoftware Heritage

D1226.id3907.diff
No OneTemporary

D1226.id3907.diff

diff --git a/sql/upgrades/123.sql b/sql/upgrades/123.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/123.sql
@@ -0,0 +1,103 @@
+-- SWH Indexer DB schema upgrade
+-- from_version: 122
+-- to_version: 123
+-- description: fix heterogeneity of names in metadata tables
+
+insert into dbversion(version, release, description)
+values(123, now(), 'Work In Progress');
+
+create or replace function swh_content_metadata_add(conflict_update boolean)
+ returns void
+ language plpgsql
+as $$
+begin
+ if conflict_update then
+ insert into content_metadata (id, metadata, indexer_configuration_id)
+ select id, metadata, indexer_configuration_id
+ from tmp_content_metadata tcm
+ on conflict(id, indexer_configuration_id)
+ do update set metadata = excluded.metadata;
+
+ else
+ insert into content_metadata (id, metadata, indexer_configuration_id)
+ select id, metadata, indexer_configuration_id
+ from tmp_content_metadata tcm
+ on conflict(id, indexer_configuration_id)
+ do nothing;
+ end if;
+ return;
+end
+$$;
+
+alter function swh_revision_metadata_add rename to swh_revision_intrinsic_metadata_add;
+create or replace function swh_revision_intrinsic_metadata_add(conflict_update boolean)
+ returns void
+ language plpgsql
+as $$
+begin
+ if conflict_update then
+ insert into revision_intrinsic_metadata (id, metadata, mappings, indexer_configuration_id)
+ select id, metadata, mappings, indexer_configuration_id
+ from tmp_revision_intrinsic_metadata tcm
+ on conflict(id, indexer_configuration_id)
+ do update set
+ metadata = excluded.metadata,
+ mappings = excluded.mappings;
+
+ else
+ insert into revision_intrinsic_metadata (id, metadata, mappings, indexer_configuration_id)
+ select id, metadata, mappings, indexer_configuration_id
+ from tmp_revision_intrinsic_metadata tcm
+ on conflict(id, indexer_configuration_id)
+ do nothing;
+ end if;
+ return;
+end
+$$;
+
+alter function swh_mktemp_revision_metadata rename to swh_mktemp_revision_intrinsic_metadata;
+create or replace function swh_mktemp_revision_intrinsic_metadata()
+ returns void
+ language sql
+as $$
+ create temporary table tmp_revision_intrinsic_metadata (
+ like revision_intrinsic_metadata including defaults
+ ) on commit drop;
+$$;
+
+create or replace function swh_origin_intrinsic_metadata_add(
+ conflict_update boolean)
+ returns void
+ language plpgsql
+as $$
+begin
+ perform swh_origin_intrinsic_metadata_compute_tsvector();
+ if conflict_update then
+ insert into origin_intrinsic_metadata (id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
+ select id, metadata, indexer_configuration_id, from_revision,
+ metadata_tsvector, mappings
+ from tmp_origin_intrinsic_metadata
+ on conflict(id, indexer_configuration_id)
+ do update set
+ metadata = excluded.metadata,
+ mappings = excluded.mappings;
+
+ else
+ insert into origin_intrinsic_metadata (id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
+ select id, metadata, indexer_configuration_id, from_revision,
+ metadata_tsvector, mappings
+ from tmp_origin_intrinsic_metadata
+ on conflict(id, indexer_configuration_id)
+ do nothing;
+ end if;
+ return;
+end
+$$;
+
+alter index revision_metadata_pkey rename to revision_intrinsic_metadata_pkey;
+
+alter table revision_metadata rename column translated_metadata to metadata;
+alter table content_metadata rename column translated_metadata to metadata;
+alter table origin_intrinsic_metadata rename column origin_id to id;
+
+alter table revision_metadata rename to revision_intrinsic_metadata;
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -22,7 +22,7 @@
- filtering out content already indexed in content_metadata
- reading content from objstorage with the content's id sha1
- - computing translated_metadata by given context
+ - computing metadata by given context
- using the metadata_dictionary as the 'swh-metadata-translator' tool
- store result in content_metadata table
@@ -46,25 +46,25 @@
Returns:
dict: dictionary representing a content_metadata. If the
- translation wasn't successful the translated_metadata keys will
+ translation wasn't successful the metadata keys will
be returned as None
"""
result = {
'id': id,
'indexer_configuration_id': self.tool['id'],
- 'translated_metadata': None
+ 'metadata': None
}
try:
mapping_name = self.tool['tool_configuration']['context']
log_suffix += ', content_id=%s' % hashutil.hash_to_hex(id)
- result['translated_metadata'] = \
+ result['metadata'] = \
MAPPINGS[mapping_name](log_suffix).translate(data)
except Exception:
self.log.exception(
"Problem during metadata translation "
"for content %s" % hashutil.hash_to_hex(id))
- if result['translated_metadata'] is None:
+ if result['metadata'] is None:
return None
return result
@@ -75,7 +75,7 @@
results ([dict]): list of content_metadata, dict with the
following keys:
- id (bytes): content's identifier (sha1)
- - translated_metadata (jsonb): detected metadata
+ - metadata (jsonb): detected metadata
policy_update ([str]): either 'update-dups' or 'ignore-dups' to
respectively update duplicates or ignore them
@@ -89,8 +89,8 @@
This indexer is in charge of:
- - filtering revisions already indexed in revision_metadata table with
- defined computation tool
+ - filtering revisions already indexed in revision_intrinsic_metadata table
+ with defined computation tool
- retrieve all entry_files in root directory
- use metadata_detector for file_names containing metadata
- compute metadata translation if necessary and possible (depends on tool)
@@ -111,7 +111,7 @@
"""Filter out known sha1s and return only missing ones.
"""
- yield from self.idx_storage.revision_metadata_missing((
+ yield from self.idx_storage.revision_intrinsic_metadata_missing((
{
'id': sha1_git,
'indexer_configuration_id': self.tool['id'],
@@ -130,18 +130,19 @@
rev (dict): revision artifact from storage
Returns:
- dict: dictionary representing a revision_metadata, with keys:
+ dict: dictionary representing a revision_intrinsic_metadata, with
+ keys:
- id (str): rev's identifier (sha1_git)
- indexer_configuration_id (bytes): tool used
- - translated_metadata: dict of retrieved metadata
+ - metadata: dict of retrieved metadata
"""
result = {
'id': rev['id'],
'indexer_configuration_id': self.tool['id'],
'mappings': None,
- 'translated_metadata': None
+ 'metadata': None
}
try:
@@ -149,11 +150,11 @@
dir_ls = self.storage.directory_ls(root_dir, recursive=False)
files = [entry for entry in dir_ls if entry['type'] == 'file']
detected_files = detect_metadata(files)
- (mappings, metadata) = self.translate_revision_metadata(
+ (mappings, metadata) = self.translate_revision_intrinsic_metadata(
detected_files,
log_suffix='revision=%s' % hashutil.hash_to_hex(rev['id']))
result['mappings'] = mappings
- result['translated_metadata'] = metadata
+ result['metadata'] = metadata
except Exception as e:
self.log.exception(
'Problem when indexing rev: %r', e)
@@ -172,11 +173,13 @@
respectively update duplicates or ignore them
"""
- # TODO: add functions in storage to keep data in revision_metadata
- self.idx_storage.revision_metadata_add(
+ # TODO: add functions in storage to keep data in
+ # revision_intrinsic_metadata
+ self.idx_storage.revision_intrinsic_metadata_add(
results, conflict_update=(policy_update == 'update-dups'))
- def translate_revision_metadata(self, detected_files, log_suffix):
+ def translate_revision_intrinsic_metadata(
+ self, detected_files, log_suffix):
"""
Determine plan of action to translate metadata when containing
one or multiple detected files:
@@ -191,7 +194,7 @@
"""
used_mappings = [MAPPINGS[context].name for context in detected_files]
- translated_metadata = []
+ metadata = []
tool = {
'name': 'swh-metadata-translator',
'version': '0.0.2',
@@ -215,13 +218,13 @@
metadata_generator = self.idx_storage.content_metadata_get(
detected_files[context])
for c in metadata_generator:
- # extracting translated_metadata
+ # extracting metadata
sha1 = c['id']
sha1s_in_storage.append(sha1)
- local_metadata = c['translated_metadata']
+ local_metadata = c['metadata']
# local metadata is aggregated
if local_metadata:
- translated_metadata.append(local_metadata)
+ metadata.append(local_metadata)
sha1s_filtered = [item for item in detected_files[context]
if item not in sha1s_in_storage]
@@ -234,15 +237,15 @@
log_suffix=log_suffix)
# on the fly possibility:
for result in c_metadata_indexer.results:
- local_metadata = result['translated_metadata']
- translated_metadata.append(local_metadata)
+ local_metadata = result['metadata']
+ metadata.append(local_metadata)
except Exception:
self.log.exception(
"Exception while indexing metadata on contents")
- # transform translated_metadata into min set with swh-metadata-detector
- min_metadata = extract_minimal_metadata_dict(translated_metadata)
+ # transform metadata into min set with swh-metadata-detector
+ min_metadata = extract_minimal_metadata_dict(metadata)
return (used_mappings, min_metadata)
@@ -278,8 +281,8 @@
rev_metadata = self.revision_metadata_indexer.index(rev)
orig_metadata = {
'from_revision': rev_metadata['id'],
- 'origin_id': origin['id'],
- 'metadata': rev_metadata['translated_metadata'],
+ 'id': origin['id'],
+ 'metadata': rev_metadata['metadata'],
'mappings': rev_metadata['mappings'],
'indexer_configuration_id':
rev_metadata['indexer_configuration_id'],
@@ -311,7 +314,7 @@
origs_to_delete.append(orig_item)
if rev_metadata:
- self.idx_storage.revision_metadata_add(
+ self.idx_storage.revision_intrinsic_metadata_add(
rev_metadata, conflict_update=conflict_update)
if orig_metadata:
self.idx_storage.origin_intrinsic_metadata_add(
@@ -324,4 +327,4 @@
if origs_to_delete:
self.idx_storage.origin_intrinsic_metadata_delete(origs_to_delete)
if revs_to_delete:
- self.idx_storage.revision_metadata_delete(revs_to_delete)
+ self.idx_storage.revision_intrinsic_metadata_delete(revs_to_delete)
diff --git a/swh/indexer/sql/30-swh-schema.sql b/swh/indexer/sql/30-swh-schema.sql
--- a/swh/indexer/sql/30-swh-schema.sql
+++ b/swh/indexer/sql/30-swh-schema.sql
@@ -103,33 +103,33 @@
-- identified as potentially containning metadata with a translation tool (indexer_configuration_id)
create table content_metadata(
id sha1 not null,
- translated_metadata jsonb not null,
+ metadata jsonb not null,
indexer_configuration_id bigint not null
);
comment on table content_metadata is 'metadata semantically translated from a content file';
comment on column content_metadata.id is 'sha1 of content file';
-comment on column content_metadata.translated_metadata is 'result of translation with defined format';
+comment on column content_metadata.metadata is 'result of translation with defined format';
comment on column content_metadata.indexer_configuration_id is 'tool used for translation';
--- The table revision_metadata provides a minimal set of intrinsic metadata
--- detected with the detection tool (indexer_configuration_id) and aggregated
--- from the content_metadata translation.
-create table revision_metadata(
+-- The table revision_intrinsic_metadata provides a minimal set of intrinsic
+-- metadata detected with the detection tool (indexer_configuration_id) and
+-- aggregated from the content_metadata translation.
+create table revision_intrinsic_metadata(
id sha1_git not null,
- translated_metadata jsonb not null,
+ metadata jsonb not null,
indexer_configuration_id bigint not null,
mappings text array not null
);
-comment on table revision_metadata is 'metadata semantically detected and translated in a revision';
-comment on column revision_metadata.id is 'sha1_git of revision';
-comment on column revision_metadata.translated_metadata is 'result of detection and translation with defined format';
-comment on column revision_metadata.indexer_configuration_id is 'tool used for detection';
-comment on column revision_metadata.mappings is 'type of metadata files used to obtain this metadata (eg. pkg-info, npm)';
+comment on table revision_intrinsic_metadata is 'metadata semantically detected and translated in a revision';
+comment on column revision_intrinsic_metadata.id is 'sha1_git of revision';
+comment on column revision_intrinsic_metadata.metadata is 'result of detection and translation with defined format';
+comment on column revision_intrinsic_metadata.indexer_configuration_id is 'tool used for detection';
+comment on column revision_intrinsic_metadata.mappings is 'type of metadata files used to obtain this metadata (eg. pkg-info, npm)';
create table origin_intrinsic_metadata(
- origin_id bigserial not null,
+ id bigserial not null,
metadata jsonb,
indexer_configuration_id bigint not null,
from_revision sha1_git not null,
@@ -138,7 +138,7 @@
);
comment on table origin_intrinsic_metadata is 'keeps intrinsic metadata for an origin';
-comment on column origin_intrinsic_metadata.origin_id is 'the entry id in origin';
+comment on column origin_intrinsic_metadata.id is 'the entry id in origin';
comment on column origin_intrinsic_metadata.metadata is 'metadata extracted from a revision';
comment on column origin_intrinsic_metadata.indexer_configuration_id is 'tool used to generate this metadata';
comment on column origin_intrinsic_metadata.from_revision is 'sha1 of the revision this metadata was copied from.';
diff --git a/swh/indexer/sql/40-swh-func.sql b/swh/indexer/sql/40-swh-func.sql
--- a/swh/indexer/sql/40-swh-func.sql
+++ b/swh/indexer/sql/40-swh-func.sql
@@ -267,15 +267,15 @@
as $$
begin
if conflict_update then
- insert into content_metadata (id, translated_metadata, indexer_configuration_id)
- select id, translated_metadata, indexer_configuration_id
+ insert into content_metadata (id, metadata, indexer_configuration_id)
+ select id, metadata, indexer_configuration_id
from tmp_content_metadata tcm
on conflict(id, indexer_configuration_id)
- do update set translated_metadata = excluded.translated_metadata;
+ do update set metadata = excluded.metadata;
else
- insert into content_metadata (id, translated_metadata, indexer_configuration_id)
- select id, translated_metadata, indexer_configuration_id
+ insert into content_metadata (id, metadata, indexer_configuration_id)
+ select id, metadata, indexer_configuration_id
from tmp_content_metadata tcm
on conflict(id, indexer_configuration_id)
do nothing;
@@ -300,33 +300,34 @@
-- end content_metadata functions
--- add tmp_revision_metadata entries to revision_metadata, overwriting
--- duplicates if conflict_update is true, skipping duplicates otherwise.
+-- add tmp_revision_intrinsic_metadata entries to revision_intrinsic_metadata,
+-- overwriting duplicates if conflict_update is true, skipping duplicates
+-- otherwise.
--
-- If filtering duplicates is in order, the call to
--- swh_revision_metadata_missing must take place before calling this
+-- swh_revision_intrinsic_metadata_missing must take place before calling this
-- function.
--
-- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
--- tmp_revision_metadata, 2. call this function
-create or replace function swh_revision_metadata_add(conflict_update boolean)
+-- tmp_revision_intrinsic_metadata, 2. call this function
+create or replace function swh_revision_intrinsic_metadata_add(conflict_update boolean)
returns void
language plpgsql
as $$
begin
if conflict_update then
- insert into revision_metadata (id, translated_metadata, mappings, indexer_configuration_id)
- select id, translated_metadata, mappings, indexer_configuration_id
- from tmp_revision_metadata tcm
+ insert into revision_intrinsic_metadata (id, metadata, mappings, indexer_configuration_id)
+ select id, metadata, mappings, indexer_configuration_id
+ from tmp_revision_intrinsic_metadata tcm
on conflict(id, indexer_configuration_id)
do update set
- translated_metadata = excluded.translated_metadata,
+ metadata = excluded.metadata,
mappings = excluded.mappings;
else
- insert into revision_metadata (id, translated_metadata, mappings, indexer_configuration_id)
- select id, translated_metadata, mappings, indexer_configuration_id
- from tmp_revision_metadata tcm
+ insert into revision_intrinsic_metadata (id, metadata, mappings, indexer_configuration_id)
+ select id, metadata, mappings, indexer_configuration_id
+ from tmp_revision_intrinsic_metadata tcm
on conflict(id, indexer_configuration_id)
do nothing;
end if;
@@ -334,19 +335,19 @@
end
$$;
-comment on function swh_revision_metadata_add(boolean) IS 'Add new revision metadata';
+comment on function swh_revision_intrinsic_metadata_add(boolean) IS 'Add new revision intrinsic metadata';
--- create a temporary table for retrieving revision_metadata
-create or replace function swh_mktemp_revision_metadata()
+-- create a temporary table for retrieving revision_intrinsic_metadata
+create or replace function swh_mktemp_revision_intrinsic_metadata()
returns void
language sql
as $$
- create temporary table tmp_revision_metadata (
- like revision_metadata including defaults
+ create temporary table tmp_revision_intrinsic_metadata (
+ like revision_intrinsic_metadata including defaults
) on commit drop;
$$;
-comment on function swh_mktemp_revision_metadata() is 'Helper table to add revision metadata';
+comment on function swh_mktemp_revision_intrinsic_metadata() is 'Helper table to add revision intrinsic metadata';
-- create a temporary table for retrieving origin_intrinsic_metadata
create or replace function swh_mktemp_origin_intrinsic_metadata()
@@ -412,21 +413,21 @@
begin
perform swh_origin_intrinsic_metadata_compute_tsvector();
if conflict_update then
- insert into origin_intrinsic_metadata (origin_id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
- select origin_id, metadata, indexer_configuration_id, from_revision,
+ insert into origin_intrinsic_metadata (id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
+ select id, metadata, indexer_configuration_id, from_revision,
metadata_tsvector, mappings
from tmp_origin_intrinsic_metadata
- on conflict(origin_id, indexer_configuration_id)
+ on conflict(id, indexer_configuration_id)
do update set
metadata = excluded.metadata,
mappings = excluded.mappings;
else
- insert into origin_intrinsic_metadata (origin_id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
- select origin_id, metadata, indexer_configuration_id, from_revision,
+ insert into origin_intrinsic_metadata (id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
+ select id, metadata, indexer_configuration_id, from_revision,
metadata_tsvector, mappings
from tmp_origin_intrinsic_metadata
- on conflict(origin_id, indexer_configuration_id)
+ on conflict(id, indexer_configuration_id)
do nothing;
end if;
return;
diff --git a/swh/indexer/sql/60-swh-indexes.sql b/swh/indexer/sql/60-swh-indexes.sql
--- a/swh/indexer/sql/60-swh-indexes.sql
+++ b/swh/indexer/sql/60-swh-indexes.sql
@@ -25,12 +25,12 @@
alter table content_metadata add constraint content_metadata_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
alter table content_metadata validate constraint content_metadata_indexer_configuration_id_fkey;
--- revision_metadata
-create unique index revision_metadata_pkey on revision_metadata(id, indexer_configuration_id);
-alter table revision_metadata add primary key using index revision_metadata_pkey;
+-- revision_intrinsic_metadata
+create unique index revision_intrinsic_metadata_pkey on revision_intrinsic_metadata(id, indexer_configuration_id);
+alter table revision_intrinsic_metadata add primary key using index revision_intrinsic_metadata_pkey;
-alter table revision_metadata add constraint revision_metadata_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
-alter table revision_metadata validate constraint revision_metadata_indexer_configuration_id_fkey;
+alter table revision_intrinsic_metadata add constraint revision_intrinsic_metadata_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
+alter table revision_intrinsic_metadata validate constraint revision_intrinsic_metadata_indexer_configuration_id_fkey;
-- content_mimetype
create unique index content_mimetype_pkey on content_mimetype(id, indexer_configuration_id);
@@ -57,12 +57,12 @@
alter table content_fossology_license validate constraint content_fossology_license_indexer_configuration_id_fkey;
-- origin_intrinsic_metadata
-create unique index origin_intrinsic_metadata_pkey on origin_intrinsic_metadata(origin_id, indexer_configuration_id);
+create unique index origin_intrinsic_metadata_pkey on origin_intrinsic_metadata(id, indexer_configuration_id);
alter table origin_intrinsic_metadata add primary key using index origin_intrinsic_metadata_pkey;
alter table origin_intrinsic_metadata add constraint origin_intrinsic_metadata_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
alter table origin_intrinsic_metadata validate constraint origin_intrinsic_metadata_indexer_configuration_id_fkey;
-alter table origin_intrinsic_metadata add constraint origin_intrinsic_metadata_revision_metadata_fkey foreign key (from_revision, indexer_configuration_id) references revision_metadata(id, indexer_configuration_id) not valid;
+alter table origin_intrinsic_metadata add constraint origin_intrinsic_metadata_revision_metadata_fkey foreign key (from_revision, indexer_configuration_id) references revision_intrinsic_metadata(id, indexer_configuration_id) not valid;
alter table origin_intrinsic_metadata validate constraint origin_intrinsic_metadata_revision_metadata_fkey;
create index origin_intrinsic_metadata_fulltext_idx on origin_intrinsic_metadata using gin (metadata_tsvector);
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -51,32 +51,30 @@
return IndexerStorage(**args)
-def _check_duplicates(data, key):
+def _check_id_duplicates(data):
"""
- If any two dictionaries in `data` have the same value for the
- key, raises a `ValueError`.
+ If any two dictionaries in `data` have the same id, raises
+ a `ValueError`.
Values associated to the key must be hashable.
Args:
data (List[dict]): List of dictionaries to be inserted
- key (str): Name of the key that acts as id.
- >>> _check_duplicates([
+ >>> _check_id_duplicates([
... {'id': 'foo', 'data': 'spam'},
... {'id': 'bar', 'data': 'egg'},
- ... ], 'id')
- >>> _check_duplicates([
+ ... ])
+ >>> _check_id_duplicates([
... {'id': 'foo', 'data': 'spam'},
... {'id': 'foo', 'data': 'egg'},
- ... ], 'id')
+ ... ])
Traceback (most recent call last):
...
ValueError: The same id is present more than once.
"""
- if len({item[key] for item in data}) < len(data):
- raise ValueError(
- 'The same {} is present more than once.'.format(key))
+ if len({item['id'] for item in data}) < len(data):
+ raise ValueError('The same id is present more than once.')
class IndexerStorage:
@@ -246,7 +244,7 @@
default)
"""
- _check_duplicates(mimetypes, 'id')
+ _check_id_duplicates(mimetypes)
mimetypes.sort(key=lambda m: m['id'])
db.mktemp_content_mimetype(cur)
db.copy_to(mimetypes, 'tmp_content_mimetype',
@@ -332,7 +330,7 @@
default)
"""
- _check_duplicates(languages, 'id')
+ _check_id_duplicates(languages)
languages.sort(key=lambda m: m['id'])
db.mktemp_content_language(cur)
# empty language is mapped to 'unknown'
@@ -403,7 +401,7 @@
line, lang
"""
- _check_duplicates(ctags, 'id')
+ _check_id_duplicates(ctags)
ctags.sort(key=lambda m: m['id'])
def _convert_ctags(__ctags):
@@ -487,7 +485,7 @@
list: content_license entries which failed due to unknown licenses
"""
- _check_duplicates(licenses, 'id')
+ _check_id_duplicates(licenses)
licenses.sort(key=lambda m: m['id'])
db.mktemp_content_fossology_license(cur)
db.copy_to(
@@ -562,7 +560,7 @@
dictionaries with the following keys:
id (bytes)
- translated_metadata (str): associated metadata
+ metadata (str): associated metadata
tool (dict): tool used to compute metadata
"""
@@ -580,25 +578,25 @@
metadata (iterable): dictionaries with keys:
- **id**: sha1
- - **translated_metadata**: arbitrary dict
+ - **metadata**: arbitrary dict
conflict_update: Flag to determine if we want to overwrite (true)
or skip duplicates (false, the default)
"""
- _check_duplicates(metadata, 'id')
+ _check_id_duplicates(metadata)
metadata.sort(key=lambda m: m['id'])
db.mktemp_content_metadata(cur)
db.copy_to(metadata, 'tmp_content_metadata',
- ['id', 'translated_metadata', 'indexer_configuration_id'],
+ ['id', 'metadata', 'indexer_configuration_id'],
cur)
db.content_metadata_add_from_temp(conflict_update, cur)
- @remote_api_endpoint('revision_metadata/missing')
+ @remote_api_endpoint('revision_intrinsic_metadata/missing')
@db_transaction_generator()
- def revision_metadata_missing(self, metadata, db=None, cur=None):
+ def revision_intrinsic_metadata_missing(self, metadata, db=None, cur=None):
"""List metadata missing from storage.
Args:
@@ -612,12 +610,13 @@
missing ids
"""
- for obj in db.revision_metadata_missing_from_list(metadata, cur):
+ for obj in db.revision_intrinsic_metadata_missing_from_list(
+ metadata, cur):
yield obj[0]
- @remote_api_endpoint('revision_metadata')
+ @remote_api_endpoint('revision_intrinsic_metadata')
@db_transaction_generator()
- def revision_metadata_get(self, ids, db=None, cur=None):
+ def revision_intrinsic_metadata_get(self, ids, db=None, cur=None):
"""Retrieve revision metadata per id.
Args:
@@ -627,27 +626,27 @@
dictionaries with the following keys:
- **id** (bytes)
- - **translated_metadata** (str): associated metadata
+ - **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
these metadata
"""
- for c in db.revision_metadata_get_from_list(ids, cur):
+ for c in db.revision_intrinsic_metadata_get_from_list(ids, cur):
yield converters.db_to_metadata(
- dict(zip(db.revision_metadata_cols, c)))
+ dict(zip(db.revision_intrinsic_metadata_cols, c)))
- @remote_api_endpoint('revision_metadata/add')
+ @remote_api_endpoint('revision_intrinsic_metadata/add')
@db_transaction()
- def revision_metadata_add(self, metadata, conflict_update=False, db=None,
- cur=None):
+ def revision_intrinsic_metadata_add(self, metadata, conflict_update=False,
+ db=None, cur=None):
"""Add metadata not present in storage.
Args:
metadata (iterable): dictionaries with keys:
- **id**: sha1_git of revision
- - **translated_metadata**: arbitrary dict
+ - **metadata**: arbitrary dict
- **indexer_configuration_id**: tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
these metadata
@@ -656,20 +655,20 @@
or skip duplicates (false, the default)
"""
- _check_duplicates(metadata, 'id')
+ _check_id_duplicates(metadata)
metadata.sort(key=lambda m: m['id'])
- db.mktemp_revision_metadata(cur)
+ db.mktemp_revision_intrinsic_metadata(cur)
- db.copy_to(metadata, 'tmp_revision_metadata',
- ['id', 'translated_metadata', 'mappings',
+ db.copy_to(metadata, 'tmp_revision_intrinsic_metadata',
+ ['id', 'metadata', 'mappings',
'indexer_configuration_id'],
cur)
- db.revision_metadata_add_from_temp(conflict_update, cur)
+ db.revision_intrinsic_metadata_add_from_temp(conflict_update, cur)
- @remote_api_endpoint('revision_metadata/delete')
+ @remote_api_endpoint('revision_intrinsic_metadata/delete')
@db_transaction()
- def revision_metadata_delete(self, entries, db=None, cur=None):
+ def revision_intrinsic_metadata_delete(self, entries, db=None, cur=None):
"""Remove revision metadata from the storage.
Args:
@@ -678,7 +677,7 @@
- **indexer_configuration_id** (int): tool used to compute
metadata
"""
- db.revision_metadata_delete(entries, cur)
+ db.revision_intrinsic_metadata_delete(entries, cur)
@remote_api_endpoint('origin_intrinsic_metadata')
@db_transaction_generator()
@@ -691,7 +690,7 @@
Yields:
list: dictionaries with the following keys:
- - **origin_id** (int)
+ - **id** (int)
- **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
@@ -712,7 +711,7 @@
Args:
metadata (iterable): dictionaries with keys:
- - **origin_id**: origin identifier
+ - **id**: origin identifier
- **from_revision**: sha1 id of the revision used to generate
these metadata.
- **metadata**: arbitrary dict
@@ -724,13 +723,13 @@
or skip duplicates (false, the default)
"""
- _check_duplicates(metadata, 'origin_id')
- metadata.sort(key=lambda m: m['origin_id'])
+ _check_id_duplicates(metadata)
+ metadata.sort(key=lambda m: m['id'])
db.mktemp_origin_intrinsic_metadata(cur)
db.copy_to(metadata, 'tmp_origin_intrinsic_metadata',
- ['origin_id', 'metadata', 'indexer_configuration_id',
+ ['id', 'metadata', 'indexer_configuration_id',
'from_revision', 'mappings'],
cur)
db.origin_intrinsic_metadata_add_from_temp(conflict_update, cur)
diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py
--- a/swh/indexer/storage/db.py
+++ b/swh/indexer/storage/db.py
@@ -281,7 +281,7 @@
cur=cur)
content_metadata_cols = [
- 'id', 'translated_metadata',
+ 'id', 'metadata',
'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
@stored_procedure('swh_mktemp_content_metadata')
@@ -295,44 +295,48 @@
yield from self._get_from_list(
'content_metadata', ids, self.content_metadata_cols, cur=cur)
- revision_metadata_hash_keys = [
+ revision_intrinsic_metadata_hash_keys = [
'id', 'indexer_configuration_id']
- def revision_metadata_missing_from_list(self, metadata, cur=None):
+ def revision_intrinsic_metadata_missing_from_list(
+ self, metadata, cur=None):
"""List missing metadata.
"""
yield from self._missing_from_list(
- 'revision_metadata', metadata, self.revision_metadata_hash_keys,
- cur=cur)
+ 'revision_intrinsic_metadata', metadata,
+ self.revision_intrinsic_metadata_hash_keys, cur=cur)
- revision_metadata_cols = [
- 'id', 'translated_metadata', 'mappings',
+ revision_intrinsic_metadata_cols = [
+ 'id', 'metadata', 'mappings',
'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
- @stored_procedure('swh_mktemp_revision_metadata')
- def mktemp_revision_metadata(self, cur=None): pass
+ @stored_procedure('swh_mktemp_revision_intrinsic_metadata')
+ def mktemp_revision_intrinsic_metadata(self, cur=None): pass
- def revision_metadata_add_from_temp(self, conflict_update, cur=None):
- self._cursor(cur).execute("SELECT swh_revision_metadata_add(%s)",
- (conflict_update, ))
+ def revision_intrinsic_metadata_add_from_temp(
+ self, conflict_update, cur=None):
+ self._cursor(cur).execute(
+ "SELECT swh_revision_intrinsic_metadata_add(%s)",
+ (conflict_update, ))
- def revision_metadata_delete(
+ def revision_intrinsic_metadata_delete(
self, entries, cur=None):
cur = self._cursor(cur)
cur.execute(
- "DELETE from revision_metadata "
+ "DELETE from revision_intrinsic_metadata "
"WHERE (id, indexer_configuration_id) IN "
" (VALUES %s)" % (', '.join('%s' for _ in entries)),
tuple((e['id'], e['indexer_configuration_id'])
for e in entries),)
- def revision_metadata_get_from_list(self, ids, cur=None):
+ def revision_intrinsic_metadata_get_from_list(self, ids, cur=None):
yield from self._get_from_list(
- 'revision_metadata', ids, self.revision_metadata_cols, cur=cur)
+ 'revision_intrinsic_metadata', ids,
+ self.revision_intrinsic_metadata_cols, cur=cur)
origin_intrinsic_metadata_cols = [
- 'origin_id', 'metadata', 'from_revision', 'mappings',
+ 'id', 'metadata', 'from_revision', 'mappings',
'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
origin_intrinsic_metadata_regconfig = 'pg_catalog.simple'
@@ -357,23 +361,25 @@
cur = self._cursor(cur)
cur.execute(
"DELETE from origin_intrinsic_metadata "
- "WHERE (origin_id, indexer_configuration_id) IN"
+ "WHERE (id, indexer_configuration_id) IN"
" (VALUES %s)" % (', '.join('%s' for _ in entries)),
- tuple((e['origin_id'], e['indexer_configuration_id'])
+ tuple((e['id'], e['indexer_configuration_id'])
for e in entries),)
def origin_intrinsic_metadata_get_from_list(self, orig_ids, cur=None):
yield from self._get_from_list(
'origin_intrinsic_metadata', orig_ids,
self.origin_intrinsic_metadata_cols, cur=cur,
- id_col='origin_id')
+ id_col='id')
def origin_intrinsic_metadata_search_fulltext(self, terms, *, limit, cur):
regconfig = self.origin_intrinsic_metadata_regconfig
tsquery_template = ' && '.join("plainto_tsquery('%s', %%s)" % regconfig
for _ in terms)
tsquery_args = [(term,) for term in terms]
- keys = map(self._convert_key, self.origin_intrinsic_metadata_cols)
+ keys = (self._convert_key(col, 'oim') for col in
+ self.origin_intrinsic_metadata_cols)
+
query = ("SELECT {keys} FROM origin_intrinsic_metadata AS oim "
"INNER JOIN indexer_configuration AS i "
"ON oim.indexer_configuration_id=i.id "
@@ -390,10 +396,10 @@
def origin_intrinsic_metadata_search_by_producer(
self, start, end, limit, ids_only, mappings, tool_ids, cur):
if ids_only:
- keys = 'oim.origin_id'
+ keys = 'oim.id'
else:
- keys = ', '.join(map(self._convert_key,
- self.origin_intrinsic_metadata_cols))
+ keys = ', '.join((self._convert_key(col, 'oim') for col in
+ self.origin_intrinsic_metadata_cols))
query_parts = [
"SELECT %s" % keys,
"FROM origin_intrinsic_metadata AS oim",
@@ -404,10 +410,10 @@
where = []
if start:
- where.append('oim.origin_id >= %s')
+ where.append('oim.id >= %s')
args.append(start)
if end:
- where.append('oim.origin_id <= %s')
+ where.append('oim.id <= %s')
args.append(end)
if mappings is not None:
where.append('oim.mappings && %s')
diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py
--- a/swh/indexer/storage/in_memory.py
+++ b/swh/indexer/storage/in_memory.py
@@ -196,7 +196,7 @@
self._content_ctags = SubStorage(self._tools)
self._licenses = SubStorage(self._tools)
self._content_metadata = SubStorage(self._tools)
- self._revision_metadata = SubStorage(self._tools)
+ self._revision_intrinsic_metadata = SubStorage(self._tools)
self._origin_intrinsic_metadata = SubStorage(self._tools)
def content_mimetype_missing(self, mimetypes):
@@ -513,7 +513,7 @@
dictionaries with the following keys:
- **id** (bytes)
- - **translated_metadata** (str): associated metadata
+ - **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
"""
@@ -526,7 +526,7 @@
metadata (iterable): dictionaries with keys:
- **id**: sha1
- - **translated_metadata**: arbitrary dict
+ - **metadata**: arbitrary dict
- **indexer_configuration_id**: tool used to compute the
results
@@ -538,7 +538,7 @@
raise TypeError('identifiers must be bytes.')
self._content_metadata.add(metadata, conflict_update)
- def revision_metadata_missing(self, metadata):
+ def revision_intrinsic_metadata_missing(self, metadata):
"""List metadata missing from storage.
Args:
@@ -552,9 +552,9 @@
missing ids
"""
- yield from self._revision_metadata.missing(metadata)
+ yield from self._revision_intrinsic_metadata.missing(metadata)
- def revision_metadata_get(self, ids):
+ def revision_intrinsic_metadata_get(self, ids):
"""Retrieve revision metadata per id.
Args:
@@ -564,22 +564,22 @@
dictionaries with the following keys:
- **id** (bytes)
- - **translated_metadata** (str): associated metadata
+ - **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
these metadata
"""
- yield from self._revision_metadata.get(ids)
+ yield from self._revision_intrinsic_metadata.get(ids)
- def revision_metadata_add(self, metadata, conflict_update=False):
+ def revision_intrinsic_metadata_add(self, metadata, conflict_update=False):
"""Add metadata not present in storage.
Args:
metadata (iterable): dictionaries with keys:
- **id**: sha1_git of revision
- - **translated_metadata**: arbitrary dict
+ - **metadata**: arbitrary dict
- **indexer_configuration_id**: tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
these metadata
@@ -590,9 +590,9 @@
"""
if not all(isinstance(x['id'], bytes) for x in metadata):
raise TypeError('identifiers must be bytes.')
- self._revision_metadata.add(metadata, conflict_update)
+ self._revision_intrinsic_metadata.add(metadata, conflict_update)
- def revision_metadata_delete(self, entries):
+ def revision_intrinsic_metadata_delete(self, entries):
"""Remove revision metadata from the storage.
Args:
@@ -600,7 +600,7 @@
- **revision** (int): origin identifier
- **id** (int): tool used to compute metadata
"""
- self._revision_metadata.delete(entries)
+ self._revision_intrinsic_metadata.delete(entries)
def origin_intrinsic_metadata_get(self, ids):
"""Retrieve origin metadata per id.
@@ -611,16 +611,14 @@
Yields:
list: dictionaries with the following keys:
- - **origin_id** (int)
- - **translated_metadata** (str): associated metadata
+ - **id** (int)
+ - **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
these metadata
"""
- for item in self._origin_intrinsic_metadata.get(ids):
- item['origin_id'] = item.pop('id')
- yield item
+ yield from self._origin_intrinsic_metadata.get(ids)
def origin_intrinsic_metadata_add(self, metadata,
conflict_update=False):
@@ -629,7 +627,7 @@
Args:
metadata (iterable): dictionaries with keys:
- - **origin_id**: origin identifier
+ - **id**: origin identifier
- **from_revision**: sha1 id of the revision used to generate
these metadata.
- **metadata**: arbitrary dict
@@ -641,29 +639,18 @@
or skip duplicates (false, the default)
"""
-
- items = []
- for item in metadata:
- item = item.copy()
- item['id'] = item.pop('origin_id')
- items.append(item)
- self._origin_intrinsic_metadata.add(items, conflict_update)
+ self._origin_intrinsic_metadata.add(metadata, conflict_update)
def origin_intrinsic_metadata_delete(self, entries):
"""Remove origin metadata from the storage.
Args:
entries (dict): dictionaries with the following keys:
- - **origin_id** (int): origin identifier
+ - **id** (int): origin identifier
- **indexer_configuration_id** (int): tool used to compute
metadata
"""
- items = []
- for entry in entries:
- item = entry.copy()
- item['id'] = item.pop('origin_id')
- items.append(item)
- self._origin_intrinsic_metadata.delete(items)
+ self._origin_intrinsic_metadata.delete(entries)
def origin_intrinsic_metadata_search_fulltext(
self, conjunction, limit=100):
@@ -712,8 +699,6 @@
results.sort(key=operator.itemgetter(0), # Don't try to order 'data'
reverse=True)
for (rank_, result) in results[:limit]:
- result = result.copy()
- result['origin_id'] = result.pop('id')
yield result
def origin_intrinsic_metadata_search_by_producer(
@@ -759,8 +744,6 @@
if ids_only:
yield entry['id']
else:
- entry = entry.copy()
- entry['origin_id'] = entry.pop('id')
yield entry
nb_results += 1
diff --git a/swh/indexer/tests/conftest.py b/swh/indexer/tests/conftest.py
--- a/swh/indexer/tests/conftest.py
+++ b/swh/indexer/tests/conftest.py
@@ -12,7 +12,7 @@
from .utils import fill_storage, fill_obj_storage
-TASK_NAMES = ['revision_metadata', 'origin_intrinsic_metadata']
+TASK_NAMES = ['revision_intrinsic_metadata', 'origin_intrinsic_metadata']
@pytest.fixture
diff --git a/swh/indexer/tests/storage/test_converters.py b/swh/indexer/tests/storage/test_converters.py
--- a/swh/indexer/tests/storage/test_converters.py
+++ b/swh/indexer/tests/storage/test_converters.py
@@ -169,12 +169,12 @@
'tool_name': 'some-toolname',
'tool_version': 'some-toolversion',
'tool_configuration': {},
- 'translated_metadata': b'translated_metadata',
+ 'metadata': b'metadata',
}
expected_metadata = {
'id': b'some-id',
- 'translated_metadata': b'translated_metadata',
+ 'metadata': b'metadata',
'tool': {
'id': 20,
'name': 'some-toolname',
diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py
--- a/swh/indexer/tests/storage/test_storage.py
+++ b/swh/indexer/tests/storage/test_storage.py
@@ -931,7 +931,7 @@
endpoint_type='content_metadata',
tool_name='swh-metadata-detector',
example_data1={
- 'translated_metadata': {
+ 'metadata': {
'other': {},
'codeRepository': {
'type': 'git',
@@ -943,7 +943,7 @@
},
},
example_data2={
- 'translated_metadata': {
+ 'metadata': {
'other': {},
'name': 'test_metadata',
'version': '0.0.1'
@@ -951,21 +951,21 @@
},
)
- # revision_metadata tests
+ # revision_intrinsic_metadata tests
(
- test_revision_metadata_missing,
- test_revision_metadata_add__drop_duplicate,
- test_revision_metadata_add__update_in_place_duplicate,
- test_revision_metadata_add__update_in_place_deadlock,
- test_revision_metadata_add__duplicate_twice,
- test_revision_metadata_get,
- test_revision_metadata_delete,
- test_revision_metadata_delete_nonexisting,
+ test_revision_intrinsic_metadata_missing,
+ test_revision_intrinsic_metadata_add__drop_duplicate,
+ test_revision_intrinsic_metadata_add__update_in_place_duplicate,
+ test_revision_intrinsic_metadata_add__update_in_place_deadlock,
+ test_revision_intrinsic_metadata_add__duplicate_twice,
+ test_revision_intrinsic_metadata_get,
+ test_revision_intrinsic_metadata_delete,
+ test_revision_intrinsic_metadata_delete_nonexisting,
) = gen_generic_endpoint_tests(
- endpoint_type='revision_metadata',
+ endpoint_type='revision_intrinsic_metadata',
tool_name='swh-metadata-detector',
example_data1={
- 'translated_metadata': {
+ 'metadata': {
'other': {},
'codeRepository': {
'type': 'git',
@@ -978,7 +978,7 @@
'mappings': ['mapping1'],
},
example_data2={
- 'translated_metadata': {
+ 'metadata': {
'other': {},
'name': 'test_metadata',
'version': '0.0.1'
@@ -997,12 +997,12 @@
}
metadata_rev = {
'id': self.revision_id_2,
- 'translated_metadata': metadata,
+ 'metadata': metadata,
'mappings': ['mapping1'],
'indexer_configuration_id': tool_id,
}
metadata_origin = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata,
'indexer_configuration_id': tool_id,
'mappings': ['mapping1'],
@@ -1010,7 +1010,7 @@
}
# when
- self.storage.revision_metadata_add([metadata_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata_rev])
self.storage.origin_intrinsic_metadata_add([metadata_origin])
# then
@@ -1018,7 +1018,7 @@
[self.origin_id_1, 42]))
expected_metadata = [{
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_2,
@@ -1037,28 +1037,28 @@
}
metadata_rev = {
'id': self.revision_id_2,
- 'translated_metadata': metadata,
+ 'metadata': metadata,
'mappings': ['mapping1'],
'indexer_configuration_id': tool_id,
}
metadata_origin = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata,
'indexer_configuration_id': tool_id,
'mappings': ['mapping1'],
'from_revision': self.revision_id_2,
}
metadata_origin2 = metadata_origin.copy()
- metadata_origin2['origin_id'] = self.origin_id_2
+ metadata_origin2['id'] = self.origin_id_2
# when
- self.storage.revision_metadata_add([metadata_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata_rev])
self.storage.origin_intrinsic_metadata_add([
metadata_origin, metadata_origin2])
self.storage.origin_intrinsic_metadata_delete([
{
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'indexer_configuration_id': tool_id
}
])
@@ -1074,7 +1074,7 @@
tool_id = self.tools['swh-metadata-detector']['id']
self.storage.origin_intrinsic_metadata_delete([
{
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'indexer_configuration_id': tool_id
}
])
@@ -1089,12 +1089,12 @@
}
metadata_rev_v1 = {
'id': self.revision_id_1,
- 'translated_metadata': metadata_v1.copy(),
+ 'metadata': metadata_v1.copy(),
'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata_origin_v1 = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata_v1.copy(),
'indexer_configuration_id': tool_id,
'mappings': [],
@@ -1102,7 +1102,7 @@
}
# given
- self.storage.revision_metadata_add([metadata_rev_v1])
+ self.storage.revision_intrinsic_metadata_add([metadata_rev_v1])
self.storage.origin_intrinsic_metadata_add([metadata_origin_v1])
# when
@@ -1110,7 +1110,7 @@
[self.origin_id_1, 42]))
expected_metadata_v1 = [{
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata_v1,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_1,
@@ -1127,10 +1127,10 @@
})
metadata_rev_v2 = metadata_rev_v1.copy()
metadata_origin_v2 = metadata_origin_v1.copy()
- metadata_rev_v2['translated_metadata'] = metadata_v2
- metadata_origin_v2['translated_metadata'] = metadata_v2
+ metadata_rev_v2['metadata'] = metadata_v2
+ metadata_origin_v2['metadata'] = metadata_v2
- self.storage.revision_metadata_add([metadata_rev_v2])
+ self.storage.revision_intrinsic_metadata_add([metadata_rev_v2])
self.storage.origin_intrinsic_metadata_add([metadata_origin_v2])
# then
@@ -1150,12 +1150,12 @@
}
metadata_rev_v1 = {
'id': self.revision_id_2,
- 'translated_metadata': metadata_v1,
+ 'metadata': metadata_v1,
'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata_origin_v1 = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata_v1.copy(),
'indexer_configuration_id': tool_id,
'mappings': [],
@@ -1163,7 +1163,7 @@
}
# given
- self.storage.revision_metadata_add([metadata_rev_v1])
+ self.storage.revision_intrinsic_metadata_add([metadata_rev_v1])
self.storage.origin_intrinsic_metadata_add([metadata_origin_v1])
# when
@@ -1172,7 +1172,7 @@
# then
expected_metadata_v1 = [{
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata_v1,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_2,
@@ -1188,19 +1188,19 @@
})
metadata_rev_v2 = metadata_rev_v1.copy()
metadata_origin_v2 = metadata_origin_v1.copy()
- metadata_rev_v2['translated_metadata'] = metadata_v2
+ metadata_rev_v2['metadata'] = metadata_v2
metadata_origin_v2['metadata'] = metadata_v2
- self.storage.revision_metadata_add([metadata_rev_v2],
- conflict_update=True)
- self.storage.origin_intrinsic_metadata_add([metadata_origin_v2],
- conflict_update=True)
+ self.storage.revision_intrinsic_metadata_add(
+ [metadata_rev_v2], conflict_update=True)
+ self.storage.origin_intrinsic_metadata_add(
+ [metadata_origin_v2], conflict_update=True)
actual_metadata = list(self.storage.origin_intrinsic_metadata_get(
[self.origin_id_1]))
expected_metadata_v2 = [{
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata_v2,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_2,
@@ -1233,7 +1233,7 @@
metadata_rev_v1 = {
'id': self.revision_id_2,
- 'translated_metadata': {
+ 'metadata': {
'version': None,
'name': None,
},
@@ -1243,7 +1243,7 @@
data_v1 = [
{
- 'origin_id': id_,
+ 'id': id_,
'from_revision': self.revision_id_2,
**example_data1,
'indexer_configuration_id': tool_id,
@@ -1252,7 +1252,7 @@
]
data_v2 = [
{
- 'origin_id': id_,
+ 'id': id_,
'from_revision': self.revision_id_2,
**example_data2,
'indexer_configuration_id': tool_id,
@@ -1266,7 +1266,7 @@
data_v2b = list(reversed(data_v2[0:-1]))
# given
- self.storage.revision_metadata_add([metadata_rev_v1])
+ self.storage.revision_intrinsic_metadata_add([metadata_rev_v1])
self.storage.origin_intrinsic_metadata_add(data_v1)
# when
@@ -1274,7 +1274,7 @@
expected_data_v1 = [
{
- 'origin_id': id_,
+ 'id': id_,
'from_revision': self.revision_id_2,
**example_data1,
'tool': self.tools['swh-metadata-detector'],
@@ -1306,7 +1306,7 @@
expected_data_v2 = [
{
- 'origin_id': id_,
+ 'id': id_,
'from_revision': self.revision_id_2,
**example_data2,
'tool': self.tools['swh-metadata-detector'],
@@ -1327,12 +1327,12 @@
}
metadata_rev = {
'id': self.revision_id_2,
- 'translated_metadata': metadata,
+ 'metadata': metadata,
'mappings': ['mapping1'],
'indexer_configuration_id': tool_id,
}
metadata_origin = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata,
'indexer_configuration_id': tool_id,
'mappings': ['mapping1'],
@@ -1340,7 +1340,7 @@
}
# when
- self.storage.revision_metadata_add([metadata_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata_rev])
with self.assertRaises(ValueError):
self.storage.origin_intrinsic_metadata_add([
@@ -1355,12 +1355,12 @@
}
metadata1_rev = {
'id': self.revision_id_1,
- 'translated_metadata': metadata1,
+ 'metadata': metadata1,
'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata1_origin = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata1,
'mappings': [],
'indexer_configuration_id': tool_id,
@@ -1371,12 +1371,12 @@
}
metadata2_rev = {
'id': self.revision_id_2,
- 'translated_metadata': metadata2,
+ 'metadata': metadata2,
'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata2_origin = {
- 'origin_id': self.origin_id_2,
+ 'id': self.origin_id_2,
'metadata': metadata2,
'mappings': [],
'indexer_configuration_id': tool_id,
@@ -1384,24 +1384,24 @@
}
# when
- self.storage.revision_metadata_add([metadata1_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata1_rev])
self.storage.origin_intrinsic_metadata_add([metadata1_origin])
- self.storage.revision_metadata_add([metadata2_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata2_rev])
self.storage.origin_intrinsic_metadata_add([metadata2_origin])
# then
search = self.storage.origin_intrinsic_metadata_search_fulltext
self.assertCountEqual(
- [res['origin_id'] for res in search(['Doe'])],
+ [res['id'] for res in search(['Doe'])],
[self.origin_id_1, self.origin_id_2])
self.assertEqual(
- [res['origin_id'] for res in search(['John', 'Doe'])],
+ [res['id'] for res in search(['John', 'Doe'])],
[self.origin_id_1])
self.assertEqual(
- [res['origin_id'] for res in search(['John'])],
+ [res['id'] for res in search(['John'])],
[self.origin_id_1])
self.assertEqual(
- [res['origin_id'] for res in search(['John', 'Jane'])],
+ [res['id'] for res in search(['John', 'Jane'])],
[])
def test_origin_intrinsic_metadata_search_fulltext_rank(self):
@@ -1421,12 +1421,12 @@
}
metadata1_rev = {
'id': self.revision_id_1,
- 'translated_metadata': metadata1,
+ 'metadata': metadata1,
'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata1_origin = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata1,
'mappings': [],
'indexer_configuration_id': tool_id,
@@ -1440,12 +1440,12 @@
}
metadata2_rev = {
'id': self.revision_id_2,
- 'translated_metadata': metadata2,
+ 'metadata': metadata2,
'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata2_origin = {
- 'origin_id': self.origin_id_2,
+ 'id': self.origin_id_2,
'metadata': metadata2,
'mappings': [],
'indexer_configuration_id': tool_id,
@@ -1453,27 +1453,27 @@
}
# when
- self.storage.revision_metadata_add([metadata1_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata1_rev])
self.storage.origin_intrinsic_metadata_add([metadata1_origin])
- self.storage.revision_metadata_add([metadata2_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata2_rev])
self.storage.origin_intrinsic_metadata_add([metadata2_origin])
# then
search = self.storage.origin_intrinsic_metadata_search_fulltext
self.assertEqual(
- [res['origin_id'] for res in search(['Doe'])],
+ [res['id'] for res in search(['Doe'])],
[self.origin_id_1, self.origin_id_2])
self.assertEqual(
- [res['origin_id'] for res in search(['Doe'], limit=1)],
+ [res['id'] for res in search(['Doe'], limit=1)],
[self.origin_id_1])
self.assertEqual(
- [res['origin_id'] for res in search(['John'])],
+ [res['id'] for res in search(['John'])],
[self.origin_id_1])
self.assertEqual(
- [res['origin_id'] for res in search(['Jane'])],
+ [res['id'] for res in search(['Jane'])],
[self.origin_id_2, self.origin_id_1])
self.assertEqual(
- [res['origin_id'] for res in search(['John', 'Jane'])],
+ [res['id'] for res in search(['John', 'Jane'])],
[self.origin_id_1])
def _fill_origin_intrinsic_metadata(self):
@@ -1486,12 +1486,12 @@
}
metadata1_rev = {
'id': self.revision_id_1,
- 'translated_metadata': metadata1,
+ 'metadata': metadata1,
'mappings': ['npm'],
'indexer_configuration_id': tool1_id,
}
metadata1_origin = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata1,
'mappings': ['npm'],
'indexer_configuration_id': tool1_id,
@@ -1503,12 +1503,12 @@
}
metadata2_rev = {
'id': self.revision_id_2,
- 'translated_metadata': metadata2,
+ 'metadata': metadata2,
'mappings': ['npm', 'gemspec'],
'indexer_configuration_id': tool2_id,
}
metadata2_origin = {
- 'origin_id': self.origin_id_2,
+ 'id': self.origin_id_2,
'metadata': metadata2,
'mappings': ['npm', 'gemspec'],
'indexer_configuration_id': tool2_id,
@@ -1519,23 +1519,23 @@
}
metadata3_rev = {
'id': self.revision_id_3,
- 'translated_metadata': metadata3,
+ 'metadata': metadata3,
'mappings': ['npm', 'gemspec'],
'indexer_configuration_id': tool2_id,
}
metadata3_origin = {
- 'origin_id': self.origin_id_3,
+ 'id': self.origin_id_3,
'metadata': metadata3,
'mappings': ['pkg-info'],
'indexer_configuration_id': tool2_id,
'from_revision': self.revision_id_3,
}
- self.storage.revision_metadata_add([metadata1_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata1_rev])
self.storage.origin_intrinsic_metadata_add([metadata1_origin])
- self.storage.revision_metadata_add([metadata2_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata2_rev])
self.storage.origin_intrinsic_metadata_add([metadata2_origin])
- self.storage.revision_metadata_add([metadata3_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata3_rev])
self.storage.origin_intrinsic_metadata_add([metadata3_origin])
def test_origin_intrinsic_metadata_search_by_producer(self):
@@ -1597,7 +1597,7 @@
# test ids_only=False
self.assertEqual(list(endpoint(mappings=['gemspec'])), [{
- 'origin_id': self.origin_id_2,
+ 'id': self.origin_id_2,
'metadata': {
'@context': 'foo',
'author': 'Jane Doe',
diff --git a/swh/indexer/tests/tasks.py b/swh/indexer/tests/tasks.py
--- a/swh/indexer/tests/tasks.py
+++ b/swh/indexer/tests/tasks.py
@@ -41,7 +41,7 @@
@app.task
-def revision_metadata(*args, **kwargs):
+def revision_intrinsic_metadata(*args, **kwargs):
indexer = RevisionMetadataTestIndexer()
indexer.run(*args, **kwargs)
print('REV RESULT=', indexer.results)
diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py
--- a/swh/indexer/tests/test_cli.py
+++ b/swh/indexer/tests/test_cli.py
@@ -41,7 +41,7 @@
origin_metadata = [
{
- 'origin_id': origin_id,
+ 'id': origin_id,
'from_revision': hash_to_bytes('abcd{:0>4}'.format(origin_id)),
'indexer_configuration_id': tools[origin_id % 2]['id'],
'metadata': {'name': 'origin %d' % origin_id},
@@ -59,7 +59,7 @@
for origin_id in range(nb_rows)
]
- idx_storage.revision_metadata_add(revision_metadata)
+ idx_storage.revision_intrinsic_metadata_add(revision_metadata)
idx_storage.origin_intrinsic_metadata_add(origin_metadata)
return [tool['id'] for tool in tools]
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -251,7 +251,7 @@
sha1s))
expected_results = [{
- 'translated_metadata': {
+ 'metadata': {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
'codeRepository':
@@ -262,7 +262,7 @@
},
'id': hash_to_bytes('26a9f72a7c87cc9205725cfd879f514ff4f3d8d5'),
}, {
- 'translated_metadata': {
+ 'metadata': {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
'issueTracker':
@@ -1111,7 +1111,7 @@
metadata_indexer.idx_storage.content_metadata_add([{
'indexer_configuration_id': tool['id'],
'id': b'cde',
- 'translated_metadata': YARN_PARSER_METADATA,
+ 'metadata': YARN_PARSER_METADATA,
}])
sha1_gits = [
@@ -1119,13 +1119,14 @@
]
metadata_indexer.run(sha1_gits, 'update-dups')
- results = list(metadata_indexer.idx_storage.revision_metadata_get(
- sha1_gits))
+ results = list(
+ metadata_indexer.idx_storage.
+ revision_intrinsic_metadata_get(sha1_gits))
expected_results = [{
'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
'tool': TRANSLATOR_TOOL,
- 'translated_metadata': YARN_PARSER_METADATA,
+ 'metadata': YARN_PARSER_METADATA,
'mappings': ['npm'],
}]
diff --git a/swh/indexer/tests/test_origin_head.py b/swh/indexer/tests/test_origin_head.py
--- a/swh/indexer/tests/test_origin_head.py
+++ b/swh/indexer/tests/test_origin_head.py
@@ -18,7 +18,7 @@
'configuration': {},
},
'tasks': {
- 'revision_metadata': None,
+ 'revision_intrinsic_metadata': None,
'origin_intrinsic_metadata': None,
}
}
diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py
--- a/swh/indexer/tests/test_origin_metadata.py
+++ b/swh/indexer/tests/test_origin_metadata.py
@@ -26,17 +26,18 @@
rev_metadata = {
'id': rev_id,
- 'translated_metadata': YARN_PARSER_METADATA,
+ 'metadata': YARN_PARSER_METADATA,
'mappings': ['npm'],
}
origin_metadata = {
- 'origin_id': origin['id'],
+ 'id': origin['id'],
'from_revision': rev_id,
'metadata': YARN_PARSER_METADATA,
'mappings': ['npm'],
}
- results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ results = list(
+ indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
for result in results:
del result['tool']
assert results == [rev_metadata]
@@ -62,7 +63,8 @@
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
- results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ results = list(
+ indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
assert len(results) == 1
results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
@@ -112,17 +114,18 @@
rev_metadata = {
'id': rev_id,
- 'translated_metadata': YARN_PARSER_METADATA,
+ 'metadata': YARN_PARSER_METADATA,
'mappings': ['npm'],
}
origin_metadata = {
- 'origin_id': origin2['id'],
+ 'id': origin2['id'],
'from_revision': rev_id,
'metadata': YARN_PARSER_METADATA,
'mappings': ['npm'],
}
- results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ results = list(
+ indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
for result in results:
del result['tool']
assert results == [rev_metadata]
@@ -151,7 +154,8 @@
assert origin1['id'] != origin2['id']
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
- results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ results = list(
+ indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
assert len(results) == 1
results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
@@ -172,7 +176,8 @@
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
- results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ results = list(
+ indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
assert results == []
results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
@@ -191,7 +196,8 @@
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
- results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ results = list(
+ indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
assert results != []
results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
@@ -202,7 +208,8 @@
b'foo.json'):
indexer.run(["git+https://github.com/librariesio/yarn-parser"])
- results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ results = list(
+ indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
assert results == []
results = list(indexer.idx_storage.origin_intrinsic_metadata_get([

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 30, 9:41 AM (22 h, 17 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216178

Event Timeline