Changeset View
Standalone View
swh/indexer/storage/__init__.py
Show First 20 Lines • Show All 422 Lines • ▼ Show 20 Lines | class IndexerStorage: | ||||
def content_metadata_add(self, metadata, conflict_update=False, db=None, | def content_metadata_add(self, metadata, conflict_update=False, db=None, | ||||
cur=None): | cur=None): | ||||
"""Add metadata not present in storage. | """Add metadata not present in storage. | ||||
Args: | Args: | ||||
metadata (iterable): dictionaries with keys: | metadata (iterable): dictionaries with keys: | ||||
id: sha1 | id: sha1 | ||||
translated_metadata: bytes / jsonb ? | translated_metadata: arbitrary dict | ||||
conflict_update: Flag to determine if we want to overwrite (true) | conflict_update: Flag to determine if we want to overwrite (true) | ||||
or skip duplicates (false, the default) | or skip duplicates (false, the default) | ||||
""" | """ | ||||
db.mktemp_content_metadata(cur) | db.mktemp_content_metadata(cur) | ||||
# empty metadata is mapped to 'unknown' | |||||
db.copy_to(metadata, 'tmp_content_metadata', | db.copy_to(metadata, 'tmp_content_metadata', | ||||
['id', 'translated_metadata', 'indexer_configuration_id'], | ['id', 'translated_metadata', 'indexer_configuration_id'], | ||||
cur) | cur) | ||||
db.content_metadata_add_from_temp(conflict_update, cur) | db.content_metadata_add_from_temp(conflict_update, cur) | ||||
@remote_api_endpoint('revision_metadata/missing') | @remote_api_endpoint('revision_metadata/missing') | ||||
@db_transaction_generator() | @db_transaction_generator() | ||||
Show All 39 Lines | class IndexerStorage: | ||||
def revision_metadata_add(self, metadata, conflict_update=False, db=None, | def revision_metadata_add(self, metadata, conflict_update=False, db=None, | ||||
cur=None): | cur=None): | ||||
"""Add metadata not present in storage. | """Add metadata not present in storage. | ||||
Args: | Args: | ||||
metadata (iterable): dictionaries with keys: | metadata (iterable): dictionaries with keys: | ||||
- id: sha1_git of revision | - id: sha1_git of revision | ||||
- translated_metadata: bytes / jsonb ? | - translated_metadata: arbitrary dict | ||||
conflict_update: Flag to determine if we want to overwrite (true) | conflict_update: Flag to determine if we want to overwrite (true) | ||||
or skip duplicates (false, the default) | or skip duplicates (false, the default) | ||||
""" | """ | ||||
db.mktemp_revision_metadata(cur) | db.mktemp_revision_metadata(cur) | ||||
# empty metadata is mapped to 'unknown' | |||||
db.copy_to(metadata, 'tmp_revision_metadata', | db.copy_to(metadata, 'tmp_revision_metadata', | ||||
['id', 'translated_metadata', 'indexer_configuration_id'], | ['id', 'translated_metadata', 'indexer_configuration_id'], | ||||
cur) | cur) | ||||
db.revision_metadata_add_from_temp(conflict_update, cur) | db.revision_metadata_add_from_temp(conflict_update, cur) | ||||
@remote_api_endpoint('origin_intrinsic_metadata') | |||||
@db_transaction_generator() | |||||
def origin_intrinsic_metadata_get(self, ids, db=None, cur=None): | |||||
"""Retrieve origin metadata per id. | |||||
Args: | |||||
ids (iterable): origin identifiers | |||||
Yields: | |||||
list: dictionaries with the following keys: | |||||
id (int) | |||||
translated_metadata (str): associated metadata | |||||
tool (dict): tool used to compute metadata | |||||
""" | |||||
for c in db.origin_intrinsic_metadata_get_from_list(ids, cur): | |||||
yield converters.db_to_metadata( | |||||
dict(zip(db.origin_intrinsic_metadata_cols, c))) | |||||
@remote_api_endpoint('origin_intrinsic_metadata/add') | |||||
@db_transaction() | |||||
def origin_intrinsic_metadata_add(self, metadata, | |||||
conflict_update=False, db=None, | |||||
cur=None): | |||||
zack: this docstring should probably mention metadata somewhere | |||||
Done Inline ActionsWhat do you mean? vlorentz: What do you mean? | |||||
Done Inline Actionsyou're not adding "generic metadata" to storage, you're adding "*origin* metadata" to it zack: you're not adding "generic metadata" to storage, you're adding "*origin* metadata" to it | |||||
Not Done Inline Actionsi would add that it is intrinsic metadata- metadata that was detected and retrieved from the content itself moranegg: i would add that it is intrinsic metadata- metadata that was detected and retrieved from the… | |||||
"""Add origin metadata not present in storage. | |||||
Args: | |||||
metadata (iterable): dictionaries with keys: | |||||
- origin_id: origin identifier | |||||
- from_revision: sha1 id of the revision used to generate | |||||
these metadata. | |||||
Done Inline Actionsis this datatype incomplete or still undecided upon? fwiw, I think it should be a dict, to keep the Python API as Pythonic as possible. The implementation should then translate it to whatever is wanted by the DB zack: is this datatype incomplete or still undecided upon?
fwiw, I think it should be a dict, to… | |||||
Done Inline ActionsCopy-paste from another method. Fixed. vlorentz: Copy-paste from another method. Fixed. | |||||
Done Inline Actionsstill, the type of "metadata" argument is not jsonb, is it? (as that is not a python data type) zack: still, the type of "metadata" argument is not jsonb, is it? (as that is not a python data type)… | |||||
Done Inline ActionsYes, indeed. vlorentz: Yes, indeed. | |||||
- metadata: arbitrary dict | |||||
conflict_update: Flag to determine if we want to overwrite (true) | |||||
or skip duplicates (false, the default) | |||||
""" | |||||
db.mktemp_origin_intrinsic_metadata(cur) | |||||
Done Inline Actionsempty metadata should be mapped to an empty dictionary/json, no? zack: empty metadata should be mapped to an empty dictionary/json, no? | |||||
Done Inline ActionsCopy-paste from another method, it actually does not make sense here. Comment removed. vlorentz: Copy-paste from another method, it actually does not make sense here. Comment removed. | |||||
db.copy_to(metadata, 'tmp_origin_intrinsic_metadata', | |||||
['origin_id', 'metadata', 'indexer_configuration_id', | |||||
'from_revision'], | |||||
cur) | |||||
db.origin_intrinsic_metadata_add_from_temp(conflict_update, cur) | |||||
@remote_api_endpoint('indexer_configuration/add') | @remote_api_endpoint('indexer_configuration/add') | ||||
@db_transaction_generator() | @db_transaction_generator() | ||||
def indexer_configuration_add(self, tools, db=None, cur=None): | def indexer_configuration_add(self, tools, db=None, cur=None): | ||||
"""Add new tools to the storage. | """Add new tools to the storage. | ||||
Args: | Args: | ||||
tools ([dict]): List of dictionary representing tool to | tools ([dict]): List of dictionary representing tool to | ||||
insert in the db. Dictionary with the following keys:: | insert in the db. Dictionary with the following keys:: | ||||
▲ Show 20 Lines • Show All 48 Lines • Show Last 20 Lines |
this docstring should probably mention metadata somewhere