Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7163509
D1226.id3907.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
72 KB
Subscribers
None
D1226.id3907.diff
View Options
diff --git a/sql/upgrades/123.sql b/sql/upgrades/123.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/123.sql
@@ -0,0 +1,103 @@
+-- SWH Indexer DB schema upgrade
+-- from_version: 122
+-- to_version: 123
+-- description: fix heterogeneity of names in metadata tables
+
+insert into dbversion(version, release, description)
+values(123, now(), 'Work In Progress');
+
+create or replace function swh_content_metadata_add(conflict_update boolean)
+ returns void
+ language plpgsql
+as $$
+begin
+ if conflict_update then
+ insert into content_metadata (id, metadata, indexer_configuration_id)
+ select id, metadata, indexer_configuration_id
+ from tmp_content_metadata tcm
+ on conflict(id, indexer_configuration_id)
+ do update set metadata = excluded.metadata;
+
+ else
+ insert into content_metadata (id, metadata, indexer_configuration_id)
+ select id, metadata, indexer_configuration_id
+ from tmp_content_metadata tcm
+ on conflict(id, indexer_configuration_id)
+ do nothing;
+ end if;
+ return;
+end
+$$;
+
+alter function swh_revision_metadata_add rename to swh_revision_intrinsic_metadata_add;
+create or replace function swh_revision_intrinsic_metadata_add(conflict_update boolean)
+ returns void
+ language plpgsql
+as $$
+begin
+ if conflict_update then
+ insert into revision_intrinsic_metadata (id, metadata, mappings, indexer_configuration_id)
+ select id, metadata, mappings, indexer_configuration_id
+ from tmp_revision_intrinsic_metadata tcm
+ on conflict(id, indexer_configuration_id)
+ do update set
+ metadata = excluded.metadata,
+ mappings = excluded.mappings;
+
+ else
+ insert into revision_intrinsic_metadata (id, metadata, mappings, indexer_configuration_id)
+ select id, metadata, mappings, indexer_configuration_id
+ from tmp_revision_intrinsic_metadata tcm
+ on conflict(id, indexer_configuration_id)
+ do nothing;
+ end if;
+ return;
+end
+$$;
+
+alter function swh_mktemp_revision_metadata rename to swh_mktemp_revision_intrinsic_metadata;
+create or replace function swh_mktemp_revision_intrinsic_metadata()
+ returns void
+ language sql
+as $$
+ create temporary table tmp_revision_intrinsic_metadata (
+ like revision_intrinsic_metadata including defaults
+ ) on commit drop;
+$$;
+
+create or replace function swh_origin_intrinsic_metadata_add(
+ conflict_update boolean)
+ returns void
+ language plpgsql
+as $$
+begin
+ perform swh_origin_intrinsic_metadata_compute_tsvector();
+ if conflict_update then
+ insert into origin_intrinsic_metadata (id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
+ select id, metadata, indexer_configuration_id, from_revision,
+ metadata_tsvector, mappings
+ from tmp_origin_intrinsic_metadata
+ on conflict(id, indexer_configuration_id)
+ do update set
+ metadata = excluded.metadata,
+ mappings = excluded.mappings;
+
+ else
+ insert into origin_intrinsic_metadata (id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
+ select id, metadata, indexer_configuration_id, from_revision,
+ metadata_tsvector, mappings
+ from tmp_origin_intrinsic_metadata
+ on conflict(id, indexer_configuration_id)
+ do nothing;
+ end if;
+ return;
+end
+$$;
+
+alter index revision_metadata_pkey rename to revision_intrinsic_metadata_pkey;
+
+alter table revision_metadata rename column translated_metadata to metadata;
+alter table content_metadata rename column translated_metadata to metadata;
+alter table origin_intrinsic_metadata rename column origin_id to id;
+
+alter table revision_metadata rename to revision_intrinsic_metadata;
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -22,7 +22,7 @@
- filtering out content already indexed in content_metadata
- reading content from objstorage with the content's id sha1
- - computing translated_metadata by given context
+ - computing metadata by given context
- using the metadata_dictionary as the 'swh-metadata-translator' tool
- store result in content_metadata table
@@ -46,25 +46,25 @@
Returns:
dict: dictionary representing a content_metadata. If the
- translation wasn't successful the translated_metadata keys will
+ translation wasn't successful the metadata keys will
be returned as None
"""
result = {
'id': id,
'indexer_configuration_id': self.tool['id'],
- 'translated_metadata': None
+ 'metadata': None
}
try:
mapping_name = self.tool['tool_configuration']['context']
log_suffix += ', content_id=%s' % hashutil.hash_to_hex(id)
- result['translated_metadata'] = \
+ result['metadata'] = \
MAPPINGS[mapping_name](log_suffix).translate(data)
except Exception:
self.log.exception(
"Problem during metadata translation "
"for content %s" % hashutil.hash_to_hex(id))
- if result['translated_metadata'] is None:
+ if result['metadata'] is None:
return None
return result
@@ -75,7 +75,7 @@
results ([dict]): list of content_metadata, dict with the
following keys:
- id (bytes): content's identifier (sha1)
- - translated_metadata (jsonb): detected metadata
+ - metadata (jsonb): detected metadata
policy_update ([str]): either 'update-dups' or 'ignore-dups' to
respectively update duplicates or ignore them
@@ -89,8 +89,8 @@
This indexer is in charge of:
- - filtering revisions already indexed in revision_metadata table with
- defined computation tool
+ - filtering revisions already indexed in revision_intrinsic_metadata table
+ with defined computation tool
- retrieve all entry_files in root directory
- use metadata_detector for file_names containing metadata
- compute metadata translation if necessary and possible (depends on tool)
@@ -111,7 +111,7 @@
"""Filter out known sha1s and return only missing ones.
"""
- yield from self.idx_storage.revision_metadata_missing((
+ yield from self.idx_storage.revision_intrinsic_metadata_missing((
{
'id': sha1_git,
'indexer_configuration_id': self.tool['id'],
@@ -130,18 +130,19 @@
rev (dict): revision artifact from storage
Returns:
- dict: dictionary representing a revision_metadata, with keys:
+ dict: dictionary representing a revision_intrinsic_metadata, with
+ keys:
- id (str): rev's identifier (sha1_git)
- indexer_configuration_id (bytes): tool used
- - translated_metadata: dict of retrieved metadata
+ - metadata: dict of retrieved metadata
"""
result = {
'id': rev['id'],
'indexer_configuration_id': self.tool['id'],
'mappings': None,
- 'translated_metadata': None
+ 'metadata': None
}
try:
@@ -149,11 +150,11 @@
dir_ls = self.storage.directory_ls(root_dir, recursive=False)
files = [entry for entry in dir_ls if entry['type'] == 'file']
detected_files = detect_metadata(files)
- (mappings, metadata) = self.translate_revision_metadata(
+ (mappings, metadata) = self.translate_revision_intrinsic_metadata(
detected_files,
log_suffix='revision=%s' % hashutil.hash_to_hex(rev['id']))
result['mappings'] = mappings
- result['translated_metadata'] = metadata
+ result['metadata'] = metadata
except Exception as e:
self.log.exception(
'Problem when indexing rev: %r', e)
@@ -172,11 +173,13 @@
respectively update duplicates or ignore them
"""
- # TODO: add functions in storage to keep data in revision_metadata
- self.idx_storage.revision_metadata_add(
+ # TODO: add functions in storage to keep data in
+ # revision_intrinsic_metadata
+ self.idx_storage.revision_intrinsic_metadata_add(
results, conflict_update=(policy_update == 'update-dups'))
- def translate_revision_metadata(self, detected_files, log_suffix):
+ def translate_revision_intrinsic_metadata(
+ self, detected_files, log_suffix):
"""
Determine plan of action to translate metadata when containing
one or multiple detected files:
@@ -191,7 +194,7 @@
"""
used_mappings = [MAPPINGS[context].name for context in detected_files]
- translated_metadata = []
+ metadata = []
tool = {
'name': 'swh-metadata-translator',
'version': '0.0.2',
@@ -215,13 +218,13 @@
metadata_generator = self.idx_storage.content_metadata_get(
detected_files[context])
for c in metadata_generator:
- # extracting translated_metadata
+ # extracting metadata
sha1 = c['id']
sha1s_in_storage.append(sha1)
- local_metadata = c['translated_metadata']
+ local_metadata = c['metadata']
# local metadata is aggregated
if local_metadata:
- translated_metadata.append(local_metadata)
+ metadata.append(local_metadata)
sha1s_filtered = [item for item in detected_files[context]
if item not in sha1s_in_storage]
@@ -234,15 +237,15 @@
log_suffix=log_suffix)
# on the fly possibility:
for result in c_metadata_indexer.results:
- local_metadata = result['translated_metadata']
- translated_metadata.append(local_metadata)
+ local_metadata = result['metadata']
+ metadata.append(local_metadata)
except Exception:
self.log.exception(
"Exception while indexing metadata on contents")
- # transform translated_metadata into min set with swh-metadata-detector
- min_metadata = extract_minimal_metadata_dict(translated_metadata)
+ # transform metadata into min set with swh-metadata-detector
+ min_metadata = extract_minimal_metadata_dict(metadata)
return (used_mappings, min_metadata)
@@ -278,8 +281,8 @@
rev_metadata = self.revision_metadata_indexer.index(rev)
orig_metadata = {
'from_revision': rev_metadata['id'],
- 'origin_id': origin['id'],
- 'metadata': rev_metadata['translated_metadata'],
+ 'id': origin['id'],
+ 'metadata': rev_metadata['metadata'],
'mappings': rev_metadata['mappings'],
'indexer_configuration_id':
rev_metadata['indexer_configuration_id'],
@@ -311,7 +314,7 @@
origs_to_delete.append(orig_item)
if rev_metadata:
- self.idx_storage.revision_metadata_add(
+ self.idx_storage.revision_intrinsic_metadata_add(
rev_metadata, conflict_update=conflict_update)
if orig_metadata:
self.idx_storage.origin_intrinsic_metadata_add(
@@ -324,4 +327,4 @@
if origs_to_delete:
self.idx_storage.origin_intrinsic_metadata_delete(origs_to_delete)
if revs_to_delete:
- self.idx_storage.revision_metadata_delete(revs_to_delete)
+ self.idx_storage.revision_intrinsic_metadata_delete(revs_to_delete)
diff --git a/swh/indexer/sql/30-swh-schema.sql b/swh/indexer/sql/30-swh-schema.sql
--- a/swh/indexer/sql/30-swh-schema.sql
+++ b/swh/indexer/sql/30-swh-schema.sql
@@ -103,33 +103,33 @@
-- identified as potentially containning metadata with a translation tool (indexer_configuration_id)
create table content_metadata(
id sha1 not null,
- translated_metadata jsonb not null,
+ metadata jsonb not null,
indexer_configuration_id bigint not null
);
comment on table content_metadata is 'metadata semantically translated from a content file';
comment on column content_metadata.id is 'sha1 of content file';
-comment on column content_metadata.translated_metadata is 'result of translation with defined format';
+comment on column content_metadata.metadata is 'result of translation with defined format';
comment on column content_metadata.indexer_configuration_id is 'tool used for translation';
--- The table revision_metadata provides a minimal set of intrinsic metadata
--- detected with the detection tool (indexer_configuration_id) and aggregated
--- from the content_metadata translation.
-create table revision_metadata(
+-- The table revision_intrinsic_metadata provides a minimal set of intrinsic
+-- metadata detected with the detection tool (indexer_configuration_id) and
+-- aggregated from the content_metadata translation.
+create table revision_intrinsic_metadata(
id sha1_git not null,
- translated_metadata jsonb not null,
+ metadata jsonb not null,
indexer_configuration_id bigint not null,
mappings text array not null
);
-comment on table revision_metadata is 'metadata semantically detected and translated in a revision';
-comment on column revision_metadata.id is 'sha1_git of revision';
-comment on column revision_metadata.translated_metadata is 'result of detection and translation with defined format';
-comment on column revision_metadata.indexer_configuration_id is 'tool used for detection';
-comment on column revision_metadata.mappings is 'type of metadata files used to obtain this metadata (eg. pkg-info, npm)';
+comment on table revision_intrinsic_metadata is 'metadata semantically detected and translated in a revision';
+comment on column revision_intrinsic_metadata.id is 'sha1_git of revision';
+comment on column revision_intrinsic_metadata.metadata is 'result of detection and translation with defined format';
+comment on column revision_intrinsic_metadata.indexer_configuration_id is 'tool used for detection';
+comment on column revision_intrinsic_metadata.mappings is 'type of metadata files used to obtain this metadata (eg. pkg-info, npm)';
create table origin_intrinsic_metadata(
- origin_id bigserial not null,
+ id bigserial not null,
metadata jsonb,
indexer_configuration_id bigint not null,
from_revision sha1_git not null,
@@ -138,7 +138,7 @@
);
comment on table origin_intrinsic_metadata is 'keeps intrinsic metadata for an origin';
-comment on column origin_intrinsic_metadata.origin_id is 'the entry id in origin';
+comment on column origin_intrinsic_metadata.id is 'the entry id in origin';
comment on column origin_intrinsic_metadata.metadata is 'metadata extracted from a revision';
comment on column origin_intrinsic_metadata.indexer_configuration_id is 'tool used to generate this metadata';
comment on column origin_intrinsic_metadata.from_revision is 'sha1 of the revision this metadata was copied from.';
diff --git a/swh/indexer/sql/40-swh-func.sql b/swh/indexer/sql/40-swh-func.sql
--- a/swh/indexer/sql/40-swh-func.sql
+++ b/swh/indexer/sql/40-swh-func.sql
@@ -267,15 +267,15 @@
as $$
begin
if conflict_update then
- insert into content_metadata (id, translated_metadata, indexer_configuration_id)
- select id, translated_metadata, indexer_configuration_id
+ insert into content_metadata (id, metadata, indexer_configuration_id)
+ select id, metadata, indexer_configuration_id
from tmp_content_metadata tcm
on conflict(id, indexer_configuration_id)
- do update set translated_metadata = excluded.translated_metadata;
+ do update set metadata = excluded.metadata;
else
- insert into content_metadata (id, translated_metadata, indexer_configuration_id)
- select id, translated_metadata, indexer_configuration_id
+ insert into content_metadata (id, metadata, indexer_configuration_id)
+ select id, metadata, indexer_configuration_id
from tmp_content_metadata tcm
on conflict(id, indexer_configuration_id)
do nothing;
@@ -300,33 +300,34 @@
-- end content_metadata functions
--- add tmp_revision_metadata entries to revision_metadata, overwriting
--- duplicates if conflict_update is true, skipping duplicates otherwise.
+-- add tmp_revision_intrinsic_metadata entries to revision_intrinsic_metadata,
+-- overwriting duplicates if conflict_update is true, skipping duplicates
+-- otherwise.
--
-- If filtering duplicates is in order, the call to
--- swh_revision_metadata_missing must take place before calling this
+-- swh_revision_intrinsic_metadata_missing must take place before calling this
-- function.
--
-- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
--- tmp_revision_metadata, 2. call this function
-create or replace function swh_revision_metadata_add(conflict_update boolean)
+-- tmp_revision_intrinsic_metadata, 2. call this function
+create or replace function swh_revision_intrinsic_metadata_add(conflict_update boolean)
returns void
language plpgsql
as $$
begin
if conflict_update then
- insert into revision_metadata (id, translated_metadata, mappings, indexer_configuration_id)
- select id, translated_metadata, mappings, indexer_configuration_id
- from tmp_revision_metadata tcm
+ insert into revision_intrinsic_metadata (id, metadata, mappings, indexer_configuration_id)
+ select id, metadata, mappings, indexer_configuration_id
+ from tmp_revision_intrinsic_metadata tcm
on conflict(id, indexer_configuration_id)
do update set
- translated_metadata = excluded.translated_metadata,
+ metadata = excluded.metadata,
mappings = excluded.mappings;
else
- insert into revision_metadata (id, translated_metadata, mappings, indexer_configuration_id)
- select id, translated_metadata, mappings, indexer_configuration_id
- from tmp_revision_metadata tcm
+ insert into revision_intrinsic_metadata (id, metadata, mappings, indexer_configuration_id)
+ select id, metadata, mappings, indexer_configuration_id
+ from tmp_revision_intrinsic_metadata tcm
on conflict(id, indexer_configuration_id)
do nothing;
end if;
@@ -334,19 +335,19 @@
end
$$;
-comment on function swh_revision_metadata_add(boolean) IS 'Add new revision metadata';
+comment on function swh_revision_intrinsic_metadata_add(boolean) IS 'Add new revision intrinsic metadata';
--- create a temporary table for retrieving revision_metadata
-create or replace function swh_mktemp_revision_metadata()
+-- create a temporary table for retrieving revision_intrinsic_metadata
+create or replace function swh_mktemp_revision_intrinsic_metadata()
returns void
language sql
as $$
- create temporary table tmp_revision_metadata (
- like revision_metadata including defaults
+ create temporary table tmp_revision_intrinsic_metadata (
+ like revision_intrinsic_metadata including defaults
) on commit drop;
$$;
-comment on function swh_mktemp_revision_metadata() is 'Helper table to add revision metadata';
+comment on function swh_mktemp_revision_intrinsic_metadata() is 'Helper table to add revision intrinsic metadata';
-- create a temporary table for retrieving origin_intrinsic_metadata
create or replace function swh_mktemp_origin_intrinsic_metadata()
@@ -412,21 +413,21 @@
begin
perform swh_origin_intrinsic_metadata_compute_tsvector();
if conflict_update then
- insert into origin_intrinsic_metadata (origin_id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
- select origin_id, metadata, indexer_configuration_id, from_revision,
+ insert into origin_intrinsic_metadata (id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
+ select id, metadata, indexer_configuration_id, from_revision,
metadata_tsvector, mappings
from tmp_origin_intrinsic_metadata
- on conflict(origin_id, indexer_configuration_id)
+ on conflict(id, indexer_configuration_id)
do update set
metadata = excluded.metadata,
mappings = excluded.mappings;
else
- insert into origin_intrinsic_metadata (origin_id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
- select origin_id, metadata, indexer_configuration_id, from_revision,
+ insert into origin_intrinsic_metadata (id, metadata, indexer_configuration_id, from_revision, metadata_tsvector, mappings)
+ select id, metadata, indexer_configuration_id, from_revision,
metadata_tsvector, mappings
from tmp_origin_intrinsic_metadata
- on conflict(origin_id, indexer_configuration_id)
+ on conflict(id, indexer_configuration_id)
do nothing;
end if;
return;
diff --git a/swh/indexer/sql/60-swh-indexes.sql b/swh/indexer/sql/60-swh-indexes.sql
--- a/swh/indexer/sql/60-swh-indexes.sql
+++ b/swh/indexer/sql/60-swh-indexes.sql
@@ -25,12 +25,12 @@
alter table content_metadata add constraint content_metadata_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
alter table content_metadata validate constraint content_metadata_indexer_configuration_id_fkey;
--- revision_metadata
-create unique index revision_metadata_pkey on revision_metadata(id, indexer_configuration_id);
-alter table revision_metadata add primary key using index revision_metadata_pkey;
+-- revision_intrinsic_metadata
+create unique index revision_intrinsic_metadata_pkey on revision_intrinsic_metadata(id, indexer_configuration_id);
+alter table revision_intrinsic_metadata add primary key using index revision_intrinsic_metadata_pkey;
-alter table revision_metadata add constraint revision_metadata_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
-alter table revision_metadata validate constraint revision_metadata_indexer_configuration_id_fkey;
+alter table revision_intrinsic_metadata add constraint revision_intrinsic_metadata_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
+alter table revision_intrinsic_metadata validate constraint revision_intrinsic_metadata_indexer_configuration_id_fkey;
-- content_mimetype
create unique index content_mimetype_pkey on content_mimetype(id, indexer_configuration_id);
@@ -57,12 +57,12 @@
alter table content_fossology_license validate constraint content_fossology_license_indexer_configuration_id_fkey;
-- origin_intrinsic_metadata
-create unique index origin_intrinsic_metadata_pkey on origin_intrinsic_metadata(origin_id, indexer_configuration_id);
+create unique index origin_intrinsic_metadata_pkey on origin_intrinsic_metadata(id, indexer_configuration_id);
alter table origin_intrinsic_metadata add primary key using index origin_intrinsic_metadata_pkey;
alter table origin_intrinsic_metadata add constraint origin_intrinsic_metadata_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
alter table origin_intrinsic_metadata validate constraint origin_intrinsic_metadata_indexer_configuration_id_fkey;
-alter table origin_intrinsic_metadata add constraint origin_intrinsic_metadata_revision_metadata_fkey foreign key (from_revision, indexer_configuration_id) references revision_metadata(id, indexer_configuration_id) not valid;
+alter table origin_intrinsic_metadata add constraint origin_intrinsic_metadata_revision_metadata_fkey foreign key (from_revision, indexer_configuration_id) references revision_intrinsic_metadata(id, indexer_configuration_id) not valid;
alter table origin_intrinsic_metadata validate constraint origin_intrinsic_metadata_revision_metadata_fkey;
create index origin_intrinsic_metadata_fulltext_idx on origin_intrinsic_metadata using gin (metadata_tsvector);
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -51,32 +51,30 @@
return IndexerStorage(**args)
-def _check_duplicates(data, key):
+def _check_id_duplicates(data):
"""
- If any two dictionaries in `data` have the same value for the
- key, raises a `ValueError`.
+ If any two dictionaries in `data` have the same id, raises
+ a `ValueError`.
Values associated to the key must be hashable.
Args:
data (List[dict]): List of dictionaries to be inserted
- key (str): Name of the key that acts as id.
- >>> _check_duplicates([
+ >>> _check_id_duplicates([
... {'id': 'foo', 'data': 'spam'},
... {'id': 'bar', 'data': 'egg'},
- ... ], 'id')
- >>> _check_duplicates([
+ ... ])
+ >>> _check_id_duplicates([
... {'id': 'foo', 'data': 'spam'},
... {'id': 'foo', 'data': 'egg'},
- ... ], 'id')
+ ... ])
Traceback (most recent call last):
...
ValueError: The same id is present more than once.
"""
- if len({item[key] for item in data}) < len(data):
- raise ValueError(
- 'The same {} is present more than once.'.format(key))
+ if len({item['id'] for item in data}) < len(data):
+ raise ValueError('The same id is present more than once.')
class IndexerStorage:
@@ -246,7 +244,7 @@
default)
"""
- _check_duplicates(mimetypes, 'id')
+ _check_id_duplicates(mimetypes)
mimetypes.sort(key=lambda m: m['id'])
db.mktemp_content_mimetype(cur)
db.copy_to(mimetypes, 'tmp_content_mimetype',
@@ -332,7 +330,7 @@
default)
"""
- _check_duplicates(languages, 'id')
+ _check_id_duplicates(languages)
languages.sort(key=lambda m: m['id'])
db.mktemp_content_language(cur)
# empty language is mapped to 'unknown'
@@ -403,7 +401,7 @@
line, lang
"""
- _check_duplicates(ctags, 'id')
+ _check_id_duplicates(ctags)
ctags.sort(key=lambda m: m['id'])
def _convert_ctags(__ctags):
@@ -487,7 +485,7 @@
list: content_license entries which failed due to unknown licenses
"""
- _check_duplicates(licenses, 'id')
+ _check_id_duplicates(licenses)
licenses.sort(key=lambda m: m['id'])
db.mktemp_content_fossology_license(cur)
db.copy_to(
@@ -562,7 +560,7 @@
dictionaries with the following keys:
id (bytes)
- translated_metadata (str): associated metadata
+ metadata (str): associated metadata
tool (dict): tool used to compute metadata
"""
@@ -580,25 +578,25 @@
metadata (iterable): dictionaries with keys:
- **id**: sha1
- - **translated_metadata**: arbitrary dict
+ - **metadata**: arbitrary dict
conflict_update: Flag to determine if we want to overwrite (true)
or skip duplicates (false, the default)
"""
- _check_duplicates(metadata, 'id')
+ _check_id_duplicates(metadata)
metadata.sort(key=lambda m: m['id'])
db.mktemp_content_metadata(cur)
db.copy_to(metadata, 'tmp_content_metadata',
- ['id', 'translated_metadata', 'indexer_configuration_id'],
+ ['id', 'metadata', 'indexer_configuration_id'],
cur)
db.content_metadata_add_from_temp(conflict_update, cur)
- @remote_api_endpoint('revision_metadata/missing')
+ @remote_api_endpoint('revision_intrinsic_metadata/missing')
@db_transaction_generator()
- def revision_metadata_missing(self, metadata, db=None, cur=None):
+ def revision_intrinsic_metadata_missing(self, metadata, db=None, cur=None):
"""List metadata missing from storage.
Args:
@@ -612,12 +610,13 @@
missing ids
"""
- for obj in db.revision_metadata_missing_from_list(metadata, cur):
+ for obj in db.revision_intrinsic_metadata_missing_from_list(
+ metadata, cur):
yield obj[0]
- @remote_api_endpoint('revision_metadata')
+ @remote_api_endpoint('revision_intrinsic_metadata')
@db_transaction_generator()
- def revision_metadata_get(self, ids, db=None, cur=None):
+ def revision_intrinsic_metadata_get(self, ids, db=None, cur=None):
"""Retrieve revision metadata per id.
Args:
@@ -627,27 +626,27 @@
dictionaries with the following keys:
- **id** (bytes)
- - **translated_metadata** (str): associated metadata
+ - **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
these metadata
"""
- for c in db.revision_metadata_get_from_list(ids, cur):
+ for c in db.revision_intrinsic_metadata_get_from_list(ids, cur):
yield converters.db_to_metadata(
- dict(zip(db.revision_metadata_cols, c)))
+ dict(zip(db.revision_intrinsic_metadata_cols, c)))
- @remote_api_endpoint('revision_metadata/add')
+ @remote_api_endpoint('revision_intrinsic_metadata/add')
@db_transaction()
- def revision_metadata_add(self, metadata, conflict_update=False, db=None,
- cur=None):
+ def revision_intrinsic_metadata_add(self, metadata, conflict_update=False,
+ db=None, cur=None):
"""Add metadata not present in storage.
Args:
metadata (iterable): dictionaries with keys:
- **id**: sha1_git of revision
- - **translated_metadata**: arbitrary dict
+ - **metadata**: arbitrary dict
- **indexer_configuration_id**: tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
these metadata
@@ -656,20 +655,20 @@
or skip duplicates (false, the default)
"""
- _check_duplicates(metadata, 'id')
+ _check_id_duplicates(metadata)
metadata.sort(key=lambda m: m['id'])
- db.mktemp_revision_metadata(cur)
+ db.mktemp_revision_intrinsic_metadata(cur)
- db.copy_to(metadata, 'tmp_revision_metadata',
- ['id', 'translated_metadata', 'mappings',
+ db.copy_to(metadata, 'tmp_revision_intrinsic_metadata',
+ ['id', 'metadata', 'mappings',
'indexer_configuration_id'],
cur)
- db.revision_metadata_add_from_temp(conflict_update, cur)
+ db.revision_intrinsic_metadata_add_from_temp(conflict_update, cur)
- @remote_api_endpoint('revision_metadata/delete')
+ @remote_api_endpoint('revision_intrinsic_metadata/delete')
@db_transaction()
- def revision_metadata_delete(self, entries, db=None, cur=None):
+ def revision_intrinsic_metadata_delete(self, entries, db=None, cur=None):
"""Remove revision metadata from the storage.
Args:
@@ -678,7 +677,7 @@
- **indexer_configuration_id** (int): tool used to compute
metadata
"""
- db.revision_metadata_delete(entries, cur)
+ db.revision_intrinsic_metadata_delete(entries, cur)
@remote_api_endpoint('origin_intrinsic_metadata')
@db_transaction_generator()
@@ -691,7 +690,7 @@
Yields:
list: dictionaries with the following keys:
- - **origin_id** (int)
+ - **id** (int)
- **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
@@ -712,7 +711,7 @@
Args:
metadata (iterable): dictionaries with keys:
- - **origin_id**: origin identifier
+ - **id**: origin identifier
- **from_revision**: sha1 id of the revision used to generate
these metadata.
- **metadata**: arbitrary dict
@@ -724,13 +723,13 @@
or skip duplicates (false, the default)
"""
- _check_duplicates(metadata, 'origin_id')
- metadata.sort(key=lambda m: m['origin_id'])
+ _check_id_duplicates(metadata)
+ metadata.sort(key=lambda m: m['id'])
db.mktemp_origin_intrinsic_metadata(cur)
db.copy_to(metadata, 'tmp_origin_intrinsic_metadata',
- ['origin_id', 'metadata', 'indexer_configuration_id',
+ ['id', 'metadata', 'indexer_configuration_id',
'from_revision', 'mappings'],
cur)
db.origin_intrinsic_metadata_add_from_temp(conflict_update, cur)
diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py
--- a/swh/indexer/storage/db.py
+++ b/swh/indexer/storage/db.py
@@ -281,7 +281,7 @@
cur=cur)
content_metadata_cols = [
- 'id', 'translated_metadata',
+ 'id', 'metadata',
'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
@stored_procedure('swh_mktemp_content_metadata')
@@ -295,44 +295,48 @@
yield from self._get_from_list(
'content_metadata', ids, self.content_metadata_cols, cur=cur)
- revision_metadata_hash_keys = [
+ revision_intrinsic_metadata_hash_keys = [
'id', 'indexer_configuration_id']
- def revision_metadata_missing_from_list(self, metadata, cur=None):
+ def revision_intrinsic_metadata_missing_from_list(
+ self, metadata, cur=None):
"""List missing metadata.
"""
yield from self._missing_from_list(
- 'revision_metadata', metadata, self.revision_metadata_hash_keys,
- cur=cur)
+ 'revision_intrinsic_metadata', metadata,
+ self.revision_intrinsic_metadata_hash_keys, cur=cur)
- revision_metadata_cols = [
- 'id', 'translated_metadata', 'mappings',
+ revision_intrinsic_metadata_cols = [
+ 'id', 'metadata', 'mappings',
'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
- @stored_procedure('swh_mktemp_revision_metadata')
- def mktemp_revision_metadata(self, cur=None): pass
+ @stored_procedure('swh_mktemp_revision_intrinsic_metadata')
+ def mktemp_revision_intrinsic_metadata(self, cur=None): pass
- def revision_metadata_add_from_temp(self, conflict_update, cur=None):
- self._cursor(cur).execute("SELECT swh_revision_metadata_add(%s)",
- (conflict_update, ))
+ def revision_intrinsic_metadata_add_from_temp(
+ self, conflict_update, cur=None):
+ self._cursor(cur).execute(
+ "SELECT swh_revision_intrinsic_metadata_add(%s)",
+ (conflict_update, ))
- def revision_metadata_delete(
+ def revision_intrinsic_metadata_delete(
self, entries, cur=None):
cur = self._cursor(cur)
cur.execute(
- "DELETE from revision_metadata "
+ "DELETE from revision_intrinsic_metadata "
"WHERE (id, indexer_configuration_id) IN "
" (VALUES %s)" % (', '.join('%s' for _ in entries)),
tuple((e['id'], e['indexer_configuration_id'])
for e in entries),)
- def revision_metadata_get_from_list(self, ids, cur=None):
+ def revision_intrinsic_metadata_get_from_list(self, ids, cur=None):
yield from self._get_from_list(
- 'revision_metadata', ids, self.revision_metadata_cols, cur=cur)
+ 'revision_intrinsic_metadata', ids,
+ self.revision_intrinsic_metadata_cols, cur=cur)
origin_intrinsic_metadata_cols = [
- 'origin_id', 'metadata', 'from_revision', 'mappings',
+ 'id', 'metadata', 'from_revision', 'mappings',
'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
origin_intrinsic_metadata_regconfig = 'pg_catalog.simple'
@@ -357,23 +361,25 @@
cur = self._cursor(cur)
cur.execute(
"DELETE from origin_intrinsic_metadata "
- "WHERE (origin_id, indexer_configuration_id) IN"
+ "WHERE (id, indexer_configuration_id) IN"
" (VALUES %s)" % (', '.join('%s' for _ in entries)),
- tuple((e['origin_id'], e['indexer_configuration_id'])
+ tuple((e['id'], e['indexer_configuration_id'])
for e in entries),)
def origin_intrinsic_metadata_get_from_list(self, orig_ids, cur=None):
yield from self._get_from_list(
'origin_intrinsic_metadata', orig_ids,
self.origin_intrinsic_metadata_cols, cur=cur,
- id_col='origin_id')
+ id_col='id')
def origin_intrinsic_metadata_search_fulltext(self, terms, *, limit, cur):
regconfig = self.origin_intrinsic_metadata_regconfig
tsquery_template = ' && '.join("plainto_tsquery('%s', %%s)" % regconfig
for _ in terms)
tsquery_args = [(term,) for term in terms]
- keys = map(self._convert_key, self.origin_intrinsic_metadata_cols)
+ keys = (self._convert_key(col, 'oim') for col in
+ self.origin_intrinsic_metadata_cols)
+
query = ("SELECT {keys} FROM origin_intrinsic_metadata AS oim "
"INNER JOIN indexer_configuration AS i "
"ON oim.indexer_configuration_id=i.id "
@@ -390,10 +396,10 @@
def origin_intrinsic_metadata_search_by_producer(
self, start, end, limit, ids_only, mappings, tool_ids, cur):
if ids_only:
- keys = 'oim.origin_id'
+ keys = 'oim.id'
else:
- keys = ', '.join(map(self._convert_key,
- self.origin_intrinsic_metadata_cols))
+ keys = ', '.join((self._convert_key(col, 'oim') for col in
+ self.origin_intrinsic_metadata_cols))
query_parts = [
"SELECT %s" % keys,
"FROM origin_intrinsic_metadata AS oim",
@@ -404,10 +410,10 @@
where = []
if start:
- where.append('oim.origin_id >= %s')
+ where.append('oim.id >= %s')
args.append(start)
if end:
- where.append('oim.origin_id <= %s')
+ where.append('oim.id <= %s')
args.append(end)
if mappings is not None:
where.append('oim.mappings && %s')
diff --git a/swh/indexer/storage/in_memory.py b/swh/indexer/storage/in_memory.py
--- a/swh/indexer/storage/in_memory.py
+++ b/swh/indexer/storage/in_memory.py
@@ -196,7 +196,7 @@
self._content_ctags = SubStorage(self._tools)
self._licenses = SubStorage(self._tools)
self._content_metadata = SubStorage(self._tools)
- self._revision_metadata = SubStorage(self._tools)
+ self._revision_intrinsic_metadata = SubStorage(self._tools)
self._origin_intrinsic_metadata = SubStorage(self._tools)
def content_mimetype_missing(self, mimetypes):
@@ -513,7 +513,7 @@
dictionaries with the following keys:
- **id** (bytes)
- - **translated_metadata** (str): associated metadata
+ - **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
"""
@@ -526,7 +526,7 @@
metadata (iterable): dictionaries with keys:
- **id**: sha1
- - **translated_metadata**: arbitrary dict
+ - **metadata**: arbitrary dict
- **indexer_configuration_id**: tool used to compute the
results
@@ -538,7 +538,7 @@
raise TypeError('identifiers must be bytes.')
self._content_metadata.add(metadata, conflict_update)
- def revision_metadata_missing(self, metadata):
+ def revision_intrinsic_metadata_missing(self, metadata):
"""List metadata missing from storage.
Args:
@@ -552,9 +552,9 @@
missing ids
"""
- yield from self._revision_metadata.missing(metadata)
+ yield from self._revision_intrinsic_metadata.missing(metadata)
- def revision_metadata_get(self, ids):
+ def revision_intrinsic_metadata_get(self, ids):
"""Retrieve revision metadata per id.
Args:
@@ -564,22 +564,22 @@
dictionaries with the following keys:
- **id** (bytes)
- - **translated_metadata** (str): associated metadata
+ - **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
these metadata
"""
- yield from self._revision_metadata.get(ids)
+ yield from self._revision_intrinsic_metadata.get(ids)
- def revision_metadata_add(self, metadata, conflict_update=False):
+ def revision_intrinsic_metadata_add(self, metadata, conflict_update=False):
"""Add metadata not present in storage.
Args:
metadata (iterable): dictionaries with keys:
- **id**: sha1_git of revision
- - **translated_metadata**: arbitrary dict
+ - **metadata**: arbitrary dict
- **indexer_configuration_id**: tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
these metadata
@@ -590,9 +590,9 @@
"""
if not all(isinstance(x['id'], bytes) for x in metadata):
raise TypeError('identifiers must be bytes.')
- self._revision_metadata.add(metadata, conflict_update)
+ self._revision_intrinsic_metadata.add(metadata, conflict_update)
- def revision_metadata_delete(self, entries):
+ def revision_intrinsic_metadata_delete(self, entries):
"""Remove revision metadata from the storage.
Args:
@@ -600,7 +600,7 @@
- **revision** (int): origin identifier
- **id** (int): tool used to compute metadata
"""
- self._revision_metadata.delete(entries)
+ self._revision_intrinsic_metadata.delete(entries)
def origin_intrinsic_metadata_get(self, ids):
"""Retrieve origin metadata per id.
@@ -611,16 +611,14 @@
Yields:
list: dictionaries with the following keys:
- - **origin_id** (int)
- - **translated_metadata** (str): associated metadata
+ - **id** (int)
+ - **metadata** (str): associated metadata
- **tool** (dict): tool used to compute metadata
- **mappings** (List[str]): list of mappings used to translate
these metadata
"""
- for item in self._origin_intrinsic_metadata.get(ids):
- item['origin_id'] = item.pop('id')
- yield item
+ yield from self._origin_intrinsic_metadata.get(ids)
def origin_intrinsic_metadata_add(self, metadata,
conflict_update=False):
@@ -629,7 +627,7 @@
Args:
metadata (iterable): dictionaries with keys:
- - **origin_id**: origin identifier
+ - **id**: origin identifier
- **from_revision**: sha1 id of the revision used to generate
these metadata.
- **metadata**: arbitrary dict
@@ -641,29 +639,18 @@
or skip duplicates (false, the default)
"""
-
- items = []
- for item in metadata:
- item = item.copy()
- item['id'] = item.pop('origin_id')
- items.append(item)
- self._origin_intrinsic_metadata.add(items, conflict_update)
+ self._origin_intrinsic_metadata.add(metadata, conflict_update)
def origin_intrinsic_metadata_delete(self, entries):
"""Remove origin metadata from the storage.
Args:
entries (dict): dictionaries with the following keys:
- - **origin_id** (int): origin identifier
+ - **id** (int): origin identifier
- **indexer_configuration_id** (int): tool used to compute
metadata
"""
- items = []
- for entry in entries:
- item = entry.copy()
- item['id'] = item.pop('origin_id')
- items.append(item)
- self._origin_intrinsic_metadata.delete(items)
+ self._origin_intrinsic_metadata.delete(entries)
def origin_intrinsic_metadata_search_fulltext(
self, conjunction, limit=100):
@@ -712,8 +699,6 @@
results.sort(key=operator.itemgetter(0), # Don't try to order 'data'
reverse=True)
for (rank_, result) in results[:limit]:
- result = result.copy()
- result['origin_id'] = result.pop('id')
yield result
def origin_intrinsic_metadata_search_by_producer(
@@ -759,8 +744,6 @@
if ids_only:
yield entry['id']
else:
- entry = entry.copy()
- entry['origin_id'] = entry.pop('id')
yield entry
nb_results += 1
diff --git a/swh/indexer/tests/conftest.py b/swh/indexer/tests/conftest.py
--- a/swh/indexer/tests/conftest.py
+++ b/swh/indexer/tests/conftest.py
@@ -12,7 +12,7 @@
from .utils import fill_storage, fill_obj_storage
-TASK_NAMES = ['revision_metadata', 'origin_intrinsic_metadata']
+TASK_NAMES = ['revision_intrinsic_metadata', 'origin_intrinsic_metadata']
@pytest.fixture
diff --git a/swh/indexer/tests/storage/test_converters.py b/swh/indexer/tests/storage/test_converters.py
--- a/swh/indexer/tests/storage/test_converters.py
+++ b/swh/indexer/tests/storage/test_converters.py
@@ -169,12 +169,12 @@
'tool_name': 'some-toolname',
'tool_version': 'some-toolversion',
'tool_configuration': {},
- 'translated_metadata': b'translated_metadata',
+ 'metadata': b'metadata',
}
expected_metadata = {
'id': b'some-id',
- 'translated_metadata': b'translated_metadata',
+ 'metadata': b'metadata',
'tool': {
'id': 20,
'name': 'some-toolname',
diff --git a/swh/indexer/tests/storage/test_storage.py b/swh/indexer/tests/storage/test_storage.py
--- a/swh/indexer/tests/storage/test_storage.py
+++ b/swh/indexer/tests/storage/test_storage.py
@@ -931,7 +931,7 @@
endpoint_type='content_metadata',
tool_name='swh-metadata-detector',
example_data1={
- 'translated_metadata': {
+ 'metadata': {
'other': {},
'codeRepository': {
'type': 'git',
@@ -943,7 +943,7 @@
},
},
example_data2={
- 'translated_metadata': {
+ 'metadata': {
'other': {},
'name': 'test_metadata',
'version': '0.0.1'
@@ -951,21 +951,21 @@
},
)
- # revision_metadata tests
+ # revision_intrinsic_metadata tests
(
- test_revision_metadata_missing,
- test_revision_metadata_add__drop_duplicate,
- test_revision_metadata_add__update_in_place_duplicate,
- test_revision_metadata_add__update_in_place_deadlock,
- test_revision_metadata_add__duplicate_twice,
- test_revision_metadata_get,
- test_revision_metadata_delete,
- test_revision_metadata_delete_nonexisting,
+ test_revision_intrinsic_metadata_missing,
+ test_revision_intrinsic_metadata_add__drop_duplicate,
+ test_revision_intrinsic_metadata_add__update_in_place_duplicate,
+ test_revision_intrinsic_metadata_add__update_in_place_deadlock,
+ test_revision_intrinsic_metadata_add__duplicate_twice,
+ test_revision_intrinsic_metadata_get,
+ test_revision_intrinsic_metadata_delete,
+ test_revision_intrinsic_metadata_delete_nonexisting,
) = gen_generic_endpoint_tests(
- endpoint_type='revision_metadata',
+ endpoint_type='revision_intrinsic_metadata',
tool_name='swh-metadata-detector',
example_data1={
- 'translated_metadata': {
+ 'metadata': {
'other': {},
'codeRepository': {
'type': 'git',
@@ -978,7 +978,7 @@
'mappings': ['mapping1'],
},
example_data2={
- 'translated_metadata': {
+ 'metadata': {
'other': {},
'name': 'test_metadata',
'version': '0.0.1'
@@ -997,12 +997,12 @@
}
metadata_rev = {
'id': self.revision_id_2,
- 'translated_metadata': metadata,
+ 'metadata': metadata,
'mappings': ['mapping1'],
'indexer_configuration_id': tool_id,
}
metadata_origin = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata,
'indexer_configuration_id': tool_id,
'mappings': ['mapping1'],
@@ -1010,7 +1010,7 @@
}
# when
- self.storage.revision_metadata_add([metadata_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata_rev])
self.storage.origin_intrinsic_metadata_add([metadata_origin])
# then
@@ -1018,7 +1018,7 @@
[self.origin_id_1, 42]))
expected_metadata = [{
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_2,
@@ -1037,28 +1037,28 @@
}
metadata_rev = {
'id': self.revision_id_2,
- 'translated_metadata': metadata,
+ 'metadata': metadata,
'mappings': ['mapping1'],
'indexer_configuration_id': tool_id,
}
metadata_origin = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata,
'indexer_configuration_id': tool_id,
'mappings': ['mapping1'],
'from_revision': self.revision_id_2,
}
metadata_origin2 = metadata_origin.copy()
- metadata_origin2['origin_id'] = self.origin_id_2
+ metadata_origin2['id'] = self.origin_id_2
# when
- self.storage.revision_metadata_add([metadata_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata_rev])
self.storage.origin_intrinsic_metadata_add([
metadata_origin, metadata_origin2])
self.storage.origin_intrinsic_metadata_delete([
{
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'indexer_configuration_id': tool_id
}
])
@@ -1074,7 +1074,7 @@
tool_id = self.tools['swh-metadata-detector']['id']
self.storage.origin_intrinsic_metadata_delete([
{
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'indexer_configuration_id': tool_id
}
])
@@ -1089,12 +1089,12 @@
}
metadata_rev_v1 = {
'id': self.revision_id_1,
- 'translated_metadata': metadata_v1.copy(),
+ 'metadata': metadata_v1.copy(),
'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata_origin_v1 = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata_v1.copy(),
'indexer_configuration_id': tool_id,
'mappings': [],
@@ -1102,7 +1102,7 @@
}
# given
- self.storage.revision_metadata_add([metadata_rev_v1])
+ self.storage.revision_intrinsic_metadata_add([metadata_rev_v1])
self.storage.origin_intrinsic_metadata_add([metadata_origin_v1])
# when
@@ -1110,7 +1110,7 @@
[self.origin_id_1, 42]))
expected_metadata_v1 = [{
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata_v1,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_1,
@@ -1127,10 +1127,10 @@
})
metadata_rev_v2 = metadata_rev_v1.copy()
metadata_origin_v2 = metadata_origin_v1.copy()
- metadata_rev_v2['translated_metadata'] = metadata_v2
- metadata_origin_v2['translated_metadata'] = metadata_v2
+ metadata_rev_v2['metadata'] = metadata_v2
+ metadata_origin_v2['metadata'] = metadata_v2
- self.storage.revision_metadata_add([metadata_rev_v2])
+ self.storage.revision_intrinsic_metadata_add([metadata_rev_v2])
self.storage.origin_intrinsic_metadata_add([metadata_origin_v2])
# then
@@ -1150,12 +1150,12 @@
}
metadata_rev_v1 = {
'id': self.revision_id_2,
- 'translated_metadata': metadata_v1,
+ 'metadata': metadata_v1,
'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata_origin_v1 = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata_v1.copy(),
'indexer_configuration_id': tool_id,
'mappings': [],
@@ -1163,7 +1163,7 @@
}
# given
- self.storage.revision_metadata_add([metadata_rev_v1])
+ self.storage.revision_intrinsic_metadata_add([metadata_rev_v1])
self.storage.origin_intrinsic_metadata_add([metadata_origin_v1])
# when
@@ -1172,7 +1172,7 @@
# then
expected_metadata_v1 = [{
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata_v1,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_2,
@@ -1188,19 +1188,19 @@
})
metadata_rev_v2 = metadata_rev_v1.copy()
metadata_origin_v2 = metadata_origin_v1.copy()
- metadata_rev_v2['translated_metadata'] = metadata_v2
+ metadata_rev_v2['metadata'] = metadata_v2
metadata_origin_v2['metadata'] = metadata_v2
- self.storage.revision_metadata_add([metadata_rev_v2],
- conflict_update=True)
- self.storage.origin_intrinsic_metadata_add([metadata_origin_v2],
- conflict_update=True)
+ self.storage.revision_intrinsic_metadata_add(
+ [metadata_rev_v2], conflict_update=True)
+ self.storage.origin_intrinsic_metadata_add(
+ [metadata_origin_v2], conflict_update=True)
actual_metadata = list(self.storage.origin_intrinsic_metadata_get(
[self.origin_id_1]))
expected_metadata_v2 = [{
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata_v2,
'tool': self.tools['swh-metadata-detector'],
'from_revision': self.revision_id_2,
@@ -1233,7 +1233,7 @@
metadata_rev_v1 = {
'id': self.revision_id_2,
- 'translated_metadata': {
+ 'metadata': {
'version': None,
'name': None,
},
@@ -1243,7 +1243,7 @@
data_v1 = [
{
- 'origin_id': id_,
+ 'id': id_,
'from_revision': self.revision_id_2,
**example_data1,
'indexer_configuration_id': tool_id,
@@ -1252,7 +1252,7 @@
]
data_v2 = [
{
- 'origin_id': id_,
+ 'id': id_,
'from_revision': self.revision_id_2,
**example_data2,
'indexer_configuration_id': tool_id,
@@ -1266,7 +1266,7 @@
data_v2b = list(reversed(data_v2[0:-1]))
# given
- self.storage.revision_metadata_add([metadata_rev_v1])
+ self.storage.revision_intrinsic_metadata_add([metadata_rev_v1])
self.storage.origin_intrinsic_metadata_add(data_v1)
# when
@@ -1274,7 +1274,7 @@
expected_data_v1 = [
{
- 'origin_id': id_,
+ 'id': id_,
'from_revision': self.revision_id_2,
**example_data1,
'tool': self.tools['swh-metadata-detector'],
@@ -1306,7 +1306,7 @@
expected_data_v2 = [
{
- 'origin_id': id_,
+ 'id': id_,
'from_revision': self.revision_id_2,
**example_data2,
'tool': self.tools['swh-metadata-detector'],
@@ -1327,12 +1327,12 @@
}
metadata_rev = {
'id': self.revision_id_2,
- 'translated_metadata': metadata,
+ 'metadata': metadata,
'mappings': ['mapping1'],
'indexer_configuration_id': tool_id,
}
metadata_origin = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata,
'indexer_configuration_id': tool_id,
'mappings': ['mapping1'],
@@ -1340,7 +1340,7 @@
}
# when
- self.storage.revision_metadata_add([metadata_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata_rev])
with self.assertRaises(ValueError):
self.storage.origin_intrinsic_metadata_add([
@@ -1355,12 +1355,12 @@
}
metadata1_rev = {
'id': self.revision_id_1,
- 'translated_metadata': metadata1,
+ 'metadata': metadata1,
'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata1_origin = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata1,
'mappings': [],
'indexer_configuration_id': tool_id,
@@ -1371,12 +1371,12 @@
}
metadata2_rev = {
'id': self.revision_id_2,
- 'translated_metadata': metadata2,
+ 'metadata': metadata2,
'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata2_origin = {
- 'origin_id': self.origin_id_2,
+ 'id': self.origin_id_2,
'metadata': metadata2,
'mappings': [],
'indexer_configuration_id': tool_id,
@@ -1384,24 +1384,24 @@
}
# when
- self.storage.revision_metadata_add([metadata1_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata1_rev])
self.storage.origin_intrinsic_metadata_add([metadata1_origin])
- self.storage.revision_metadata_add([metadata2_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata2_rev])
self.storage.origin_intrinsic_metadata_add([metadata2_origin])
# then
search = self.storage.origin_intrinsic_metadata_search_fulltext
self.assertCountEqual(
- [res['origin_id'] for res in search(['Doe'])],
+ [res['id'] for res in search(['Doe'])],
[self.origin_id_1, self.origin_id_2])
self.assertEqual(
- [res['origin_id'] for res in search(['John', 'Doe'])],
+ [res['id'] for res in search(['John', 'Doe'])],
[self.origin_id_1])
self.assertEqual(
- [res['origin_id'] for res in search(['John'])],
+ [res['id'] for res in search(['John'])],
[self.origin_id_1])
self.assertEqual(
- [res['origin_id'] for res in search(['John', 'Jane'])],
+ [res['id'] for res in search(['John', 'Jane'])],
[])
def test_origin_intrinsic_metadata_search_fulltext_rank(self):
@@ -1421,12 +1421,12 @@
}
metadata1_rev = {
'id': self.revision_id_1,
- 'translated_metadata': metadata1,
+ 'metadata': metadata1,
'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata1_origin = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata1,
'mappings': [],
'indexer_configuration_id': tool_id,
@@ -1440,12 +1440,12 @@
}
metadata2_rev = {
'id': self.revision_id_2,
- 'translated_metadata': metadata2,
+ 'metadata': metadata2,
'mappings': [],
'indexer_configuration_id': tool_id,
}
metadata2_origin = {
- 'origin_id': self.origin_id_2,
+ 'id': self.origin_id_2,
'metadata': metadata2,
'mappings': [],
'indexer_configuration_id': tool_id,
@@ -1453,27 +1453,27 @@
}
# when
- self.storage.revision_metadata_add([metadata1_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata1_rev])
self.storage.origin_intrinsic_metadata_add([metadata1_origin])
- self.storage.revision_metadata_add([metadata2_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata2_rev])
self.storage.origin_intrinsic_metadata_add([metadata2_origin])
# then
search = self.storage.origin_intrinsic_metadata_search_fulltext
self.assertEqual(
- [res['origin_id'] for res in search(['Doe'])],
+ [res['id'] for res in search(['Doe'])],
[self.origin_id_1, self.origin_id_2])
self.assertEqual(
- [res['origin_id'] for res in search(['Doe'], limit=1)],
+ [res['id'] for res in search(['Doe'], limit=1)],
[self.origin_id_1])
self.assertEqual(
- [res['origin_id'] for res in search(['John'])],
+ [res['id'] for res in search(['John'])],
[self.origin_id_1])
self.assertEqual(
- [res['origin_id'] for res in search(['Jane'])],
+ [res['id'] for res in search(['Jane'])],
[self.origin_id_2, self.origin_id_1])
self.assertEqual(
- [res['origin_id'] for res in search(['John', 'Jane'])],
+ [res['id'] for res in search(['John', 'Jane'])],
[self.origin_id_1])
def _fill_origin_intrinsic_metadata(self):
@@ -1486,12 +1486,12 @@
}
metadata1_rev = {
'id': self.revision_id_1,
- 'translated_metadata': metadata1,
+ 'metadata': metadata1,
'mappings': ['npm'],
'indexer_configuration_id': tool1_id,
}
metadata1_origin = {
- 'origin_id': self.origin_id_1,
+ 'id': self.origin_id_1,
'metadata': metadata1,
'mappings': ['npm'],
'indexer_configuration_id': tool1_id,
@@ -1503,12 +1503,12 @@
}
metadata2_rev = {
'id': self.revision_id_2,
- 'translated_metadata': metadata2,
+ 'metadata': metadata2,
'mappings': ['npm', 'gemspec'],
'indexer_configuration_id': tool2_id,
}
metadata2_origin = {
- 'origin_id': self.origin_id_2,
+ 'id': self.origin_id_2,
'metadata': metadata2,
'mappings': ['npm', 'gemspec'],
'indexer_configuration_id': tool2_id,
@@ -1519,23 +1519,23 @@
}
metadata3_rev = {
'id': self.revision_id_3,
- 'translated_metadata': metadata3,
+ 'metadata': metadata3,
'mappings': ['npm', 'gemspec'],
'indexer_configuration_id': tool2_id,
}
metadata3_origin = {
- 'origin_id': self.origin_id_3,
+ 'id': self.origin_id_3,
'metadata': metadata3,
'mappings': ['pkg-info'],
'indexer_configuration_id': tool2_id,
'from_revision': self.revision_id_3,
}
- self.storage.revision_metadata_add([metadata1_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata1_rev])
self.storage.origin_intrinsic_metadata_add([metadata1_origin])
- self.storage.revision_metadata_add([metadata2_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata2_rev])
self.storage.origin_intrinsic_metadata_add([metadata2_origin])
- self.storage.revision_metadata_add([metadata3_rev])
+ self.storage.revision_intrinsic_metadata_add([metadata3_rev])
self.storage.origin_intrinsic_metadata_add([metadata3_origin])
def test_origin_intrinsic_metadata_search_by_producer(self):
@@ -1597,7 +1597,7 @@
# test ids_only=False
self.assertEqual(list(endpoint(mappings=['gemspec'])), [{
- 'origin_id': self.origin_id_2,
+ 'id': self.origin_id_2,
'metadata': {
'@context': 'foo',
'author': 'Jane Doe',
diff --git a/swh/indexer/tests/tasks.py b/swh/indexer/tests/tasks.py
--- a/swh/indexer/tests/tasks.py
+++ b/swh/indexer/tests/tasks.py
@@ -41,7 +41,7 @@
@app.task
-def revision_metadata(*args, **kwargs):
+def revision_intrinsic_metadata(*args, **kwargs):
indexer = RevisionMetadataTestIndexer()
indexer.run(*args, **kwargs)
print('REV RESULT=', indexer.results)
diff --git a/swh/indexer/tests/test_cli.py b/swh/indexer/tests/test_cli.py
--- a/swh/indexer/tests/test_cli.py
+++ b/swh/indexer/tests/test_cli.py
@@ -41,7 +41,7 @@
origin_metadata = [
{
- 'origin_id': origin_id,
+ 'id': origin_id,
'from_revision': hash_to_bytes('abcd{:0>4}'.format(origin_id)),
'indexer_configuration_id': tools[origin_id % 2]['id'],
'metadata': {'name': 'origin %d' % origin_id},
@@ -59,7 +59,7 @@
for origin_id in range(nb_rows)
]
- idx_storage.revision_metadata_add(revision_metadata)
+ idx_storage.revision_intrinsic_metadata_add(revision_metadata)
idx_storage.origin_intrinsic_metadata_add(origin_metadata)
return [tool['id'] for tool in tools]
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -251,7 +251,7 @@
sha1s))
expected_results = [{
- 'translated_metadata': {
+ 'metadata': {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
'codeRepository':
@@ -262,7 +262,7 @@
},
'id': hash_to_bytes('26a9f72a7c87cc9205725cfd879f514ff4f3d8d5'),
}, {
- 'translated_metadata': {
+ 'metadata': {
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
'type': 'SoftwareSourceCode',
'issueTracker':
@@ -1111,7 +1111,7 @@
metadata_indexer.idx_storage.content_metadata_add([{
'indexer_configuration_id': tool['id'],
'id': b'cde',
- 'translated_metadata': YARN_PARSER_METADATA,
+ 'metadata': YARN_PARSER_METADATA,
}])
sha1_gits = [
@@ -1119,13 +1119,14 @@
]
metadata_indexer.run(sha1_gits, 'update-dups')
- results = list(metadata_indexer.idx_storage.revision_metadata_get(
- sha1_gits))
+ results = list(
+ metadata_indexer.idx_storage.
+ revision_intrinsic_metadata_get(sha1_gits))
expected_results = [{
'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'),
'tool': TRANSLATOR_TOOL,
- 'translated_metadata': YARN_PARSER_METADATA,
+ 'metadata': YARN_PARSER_METADATA,
'mappings': ['npm'],
}]
diff --git a/swh/indexer/tests/test_origin_head.py b/swh/indexer/tests/test_origin_head.py
--- a/swh/indexer/tests/test_origin_head.py
+++ b/swh/indexer/tests/test_origin_head.py
@@ -18,7 +18,7 @@
'configuration': {},
},
'tasks': {
- 'revision_metadata': None,
+ 'revision_intrinsic_metadata': None,
'origin_intrinsic_metadata': None,
}
}
diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py
--- a/swh/indexer/tests/test_origin_metadata.py
+++ b/swh/indexer/tests/test_origin_metadata.py
@@ -26,17 +26,18 @@
rev_metadata = {
'id': rev_id,
- 'translated_metadata': YARN_PARSER_METADATA,
+ 'metadata': YARN_PARSER_METADATA,
'mappings': ['npm'],
}
origin_metadata = {
- 'origin_id': origin['id'],
+ 'id': origin['id'],
'from_revision': rev_id,
'metadata': YARN_PARSER_METADATA,
'mappings': ['npm'],
}
- results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ results = list(
+ indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
for result in results:
del result['tool']
assert results == [rev_metadata]
@@ -62,7 +63,8 @@
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
- results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ results = list(
+ indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
assert len(results) == 1
results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
@@ -112,17 +114,18 @@
rev_metadata = {
'id': rev_id,
- 'translated_metadata': YARN_PARSER_METADATA,
+ 'metadata': YARN_PARSER_METADATA,
'mappings': ['npm'],
}
origin_metadata = {
- 'origin_id': origin2['id'],
+ 'id': origin2['id'],
'from_revision': rev_id,
'metadata': YARN_PARSER_METADATA,
'mappings': ['npm'],
}
- results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ results = list(
+ indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
for result in results:
del result['tool']
assert results == [rev_metadata]
@@ -151,7 +154,8 @@
assert origin1['id'] != origin2['id']
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
- results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ results = list(
+ indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
assert len(results) == 1
results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
@@ -172,7 +176,8 @@
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
- results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ results = list(
+ indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
assert results == []
results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
@@ -191,7 +196,8 @@
'url': 'https://github.com/librariesio/yarn-parser'})
rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f')
- results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ results = list(
+ indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
assert results != []
results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
@@ -202,7 +208,8 @@
b'foo.json'):
indexer.run(["git+https://github.com/librariesio/yarn-parser"])
- results = list(indexer.idx_storage.revision_metadata_get([rev_id]))
+ results = list(
+ indexer.idx_storage.revision_intrinsic_metadata_get([rev_id]))
assert results == []
results = list(indexer.idx_storage.origin_intrinsic_metadata_get([
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jan 30, 9:41 AM (19 h, 36 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216178
Attached To
D1226: Fix heterogeneity of names in metadata tables
Event Timeline
Log In to Comment