Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show All 19 Lines | |||||
# Max block size of contents to return | # Max block size of contents to return | ||||
BULK_BLOCK_CONTENT_LEN_MAX = 10000 | BULK_BLOCK_CONTENT_LEN_MAX = 10000 | ||||
def now(): | def now(): | ||||
return datetime.datetime.now(tz=datetime.timezone.utc) | return datetime.datetime.now(tz=datetime.timezone.utc) | ||||
OriginVisitKey = collections.namedtuple('OriginVisitKey', 'origin date') | |||||
class Storage: | class Storage: | ||||
def __init__(self): | def __init__(self): | ||||
self._contents = {} | self._contents = {} | ||||
self._contents_data = {} | self._contents_data = {} | ||||
self._content_indexes = defaultdict(lambda: defaultdict(set)) | self._content_indexes = defaultdict(lambda: defaultdict(set)) | ||||
self._directories = {} | self._directories = {} | ||||
self._revisions = {} | self._revisions = {} | ||||
self._releases = {} | self._releases = {} | ||||
self._snapshots = {} | self._snapshots = {} | ||||
self._origins = {} | self._origins = [] | ||||
self._origin_visits = {} | self._origin_visits = [] | ||||
self._origin_metadata = defaultdict(list) | self._origin_metadata = defaultdict(list) | ||||
self._tools = {} | self._tools = {} | ||||
self._metadata_providers = {} | self._metadata_providers = {} | ||||
self._objects = defaultdict(list) | self._objects = defaultdict(list) | ||||
# ideally we would want a skip list for both fast inserts and searches | # ideally we would want a skip list for both fast inserts and searches | ||||
self._sorted_sha1s = [] | self._sorted_sha1s = [] | ||||
▲ Show 20 Lines • Show All 496 Lines • ▼ Show 20 Lines | def snapshot_add(self, origin, visit, snapshot): | ||||
- **target_type** (:class:`str`): one of ``content``, | - **target_type** (:class:`str`): one of ``content``, | ||||
``directory``, ``revision``, ``release``, | ``directory``, ``revision``, ``release``, | ||||
``snapshot``, ``alias`` | ``snapshot``, ``alias`` | ||||
- **target** (:class:`bytes`): identifier of the target | - **target** (:class:`bytes`): identifier of the target | ||||
(currently a ``sha1_git`` for all object kinds, or the name | (currently a ``sha1_git`` for all object kinds, or the name | ||||
of the target branch for aliases) | of the target branch for aliases) | ||||
Raises: | Raises: | ||||
ValueError: if the origin or visit id does not exist. | ValueError: if the origin's or visit's identifier does not exist. | ||||
""" | """ | ||||
snapshot_id = snapshot['id'] | snapshot_id = snapshot['id'] | ||||
if snapshot_id not in self._snapshots: | if snapshot_id not in self._snapshots: | ||||
self._snapshots[snapshot_id] = { | self._snapshots[snapshot_id] = { | ||||
'origin': origin, | 'origin': origin, | ||||
'visit': visit, | 'visit': visit, | ||||
'id': snapshot_id, | 'id': snapshot_id, | ||||
'branches': copy.deepcopy(snapshot['branches']), | 'branches': copy.deepcopy(snapshot['branches']), | ||||
'_sorted_branch_names': sorted(snapshot['branches']) | '_sorted_branch_names': sorted(snapshot['branches']) | ||||
} | } | ||||
self._objects[snapshot_id].append(('snapshot', snapshot_id)) | self._objects[snapshot_id].append(('snapshot', snapshot_id)) | ||||
if visit not in self._origin_visits: | if origin <= len(self._origin_visits) and \ | ||||
raise ValueError('Origin %s has no visit %s' % (origin, visit)) | visit <= len(self._origin_visits[origin-1]): | ||||
self._origin_visits[visit]['snapshot'] = snapshot_id | self._origin_visits[origin-1][visit-1]['snapshot'] = snapshot_id | ||||
else: | |||||
raise ValueError('Origin with id %s does not exist or has no visit' | |||||
' with id %s' % (origin, visit)) | |||||
vlorentz: D769 | |||||
def snapshot_get(self, snapshot_id): | def snapshot_get(self, snapshot_id): | ||||
"""Get the content, possibly partial, of a snapshot with the given id | """Get the content, possibly partial, of a snapshot with the given id | ||||
The branches of the snapshot are iterated in the lexicographical | The branches of the snapshot are iterated in the lexicographical | ||||
order of their names. | order of their names. | ||||
.. warning:: At most 1000 branches contained in the snapshot will be | .. warning:: At most 1000 branches contained in the snapshot will be | ||||
returned for performance reasons. In order to browse the whole | returned for performance reasons. In order to browse the whole | ||||
Show All 20 Lines | def snapshot_get_by_origin_visit(self, origin, visit): | ||||
order of their names. | order of their names. | ||||
.. warning:: At most 1000 branches contained in the snapshot will be | .. warning:: At most 1000 branches contained in the snapshot will be | ||||
returned for performance reasons. In order to browse the whole | returned for performance reasons. In order to browse the whole | ||||
set of branches, the method :meth:`snapshot_get_branches` | set of branches, the method :meth:`snapshot_get_branches` | ||||
should be used instead. | should be used instead. | ||||
Args: | Args: | ||||
origin (int): the origin identifier | origin (int): the origin's identifier | ||||
visit (int): the visit identifier | visit (int): the visit's identifier | ||||
Returns: | Returns: | ||||
dict: None if the snapshot does not exist; | dict: None if the snapshot does not exist; | ||||
a dict with three keys otherwise: | a dict with three keys otherwise: | ||||
* **id**: identifier of the snapshot | * **id**: identifier of the snapshot | ||||
* **branches**: a dict of branches contained in the snapshot | * **branches**: a dict of branches contained in the snapshot | ||||
whose keys are the branches' names. | whose keys are the branches' names. | ||||
* **next_branch**: the name of the first branch not returned | * **next_branch**: the name of the first branch not returned | ||||
or :const:`None` if the snapshot has less than 1000 | or :const:`None` if the snapshot has less than 1000 | ||||
branches. | branches. | ||||
""" | """ | ||||
if visit not in self._origin_visits: | if origin > len(self._origins) or \ | ||||
visit > len(self._origin_visits[origin-1]): | |||||
return None | return None | ||||
snapshot_id = self._origin_visits[visit]['snapshot'] | snapshot_id = self._origin_visits[origin-1][visit-1]['snapshot'] | ||||
if snapshot_id: | if snapshot_id: | ||||
return self.snapshot_get(snapshot_id) | return self.snapshot_get(snapshot_id) | ||||
else: | else: | ||||
return None | return None | ||||
def snapshot_get_latest(self, origin, allowed_statuses=None): | def snapshot_get_latest(self, origin, allowed_statuses=None): | ||||
"""Get the content, possibly partial, of the latest snapshot for the | """Get the content, possibly partial, of the latest snapshot for the | ||||
given origin, optionally only from visits that have one of the given | given origin, optionally only from visits that have one of the given | ||||
allowed_statuses | allowed_statuses | ||||
The branches of the snapshot are iterated in the lexicographical | The branches of the snapshot are iterated in the lexicographical | ||||
order of their names. | order of their names. | ||||
.. warning:: At most 1000 branches contained in the snapshot will be | .. warning:: At most 1000 branches contained in the snapshot will be | ||||
returned for performance reasons. In order to browse the whole | returned for performance reasons. In order to browse the whole | ||||
set of branches, the method :meth:`snapshot_get_branches` | set of branches, the method :meth:`snapshot_get_branches` | ||||
should be used instead. | should be used instead. | ||||
Args: | Args: | ||||
origin (int): the origin identifier | origin (int): the origin's identifier | ||||
allowed_statuses (list of str): list of visit statuses considered | allowed_statuses (list of str): list of visit statuses considered | ||||
to find the latest snapshot for the visit. For instance, | to find the latest snapshot for the visit. For instance, | ||||
``allowed_statuses=['full']`` will only consider visits that | ``allowed_statuses=['full']`` will only consider visits that | ||||
have successfully run to completion. | have successfully run to completion. | ||||
Returns: | Returns: | ||||
dict: a dict with three keys: | dict: a dict with three keys: | ||||
* **id**: identifier of the snapshot | * **id**: identifier of the snapshot | ||||
* **branches**: a dict of branches contained in the snapshot | * **branches**: a dict of branches contained in the snapshot | ||||
whose keys are the branches' names. | whose keys are the branches' names. | ||||
* **next_branch**: the name of the first branch not returned | * **next_branch**: the name of the first branch not returned | ||||
or :const:`None` if the snapshot has less than 1000 | or :const:`None` if the snapshot has less than 1000 | ||||
branches. | branches. | ||||
""" | """ | ||||
if allowed_statuses is None: | visits = self._origin_visits[origin-1] | ||||
visits_dates = list(itertools.chain( | if allowed_statuses is not None: | ||||
*self._origins[origin]['visits_dates'].values())) | visits = [visit for visit in visits | ||||
else: | if visit['status'] in allowed_statuses] | ||||
last_visits = self._origins[origin]['visits_dates'] | snapshot = None | ||||
Not Done Inline Actionssort by date + test case vlorentz: sort by date + test case | |||||
visits_dates = list(itertools.chain( | for visit in sorted(visits, key=lambda v: (v['date'], v['visit']), | ||||
*map(last_visits.__getitem__, allowed_statuses))) | reverse=True): | ||||
snapshot_id = visit['snapshot'] | |||||
for visit_date in sorted(visits_dates, reverse=True): | |||||
visit_id = OriginVisitKey(origin=origin, date=visit_date) | |||||
snapshot_id = self._origin_visits[visit_id]['snapshot'] | |||||
snapshot = self.snapshot_get(snapshot_id) | snapshot = self.snapshot_get(snapshot_id) | ||||
if snapshot: | if snapshot: | ||||
return snapshot | break | ||||
return None | return snapshot | ||||
def snapshot_count_branches(self, snapshot_id, db=None, cur=None): | def snapshot_count_branches(self, snapshot_id, db=None, cur=None): | ||||
"""Count the number of branches in the snapshot with the given id | """Count the number of branches in the snapshot with the given id | ||||
Args: | Args: | ||||
snapshot_id (bytes): identifier of the snapshot | snapshot_id (bytes): identifier of the snapshot | ||||
Returns: | Returns: | ||||
▲ Show 20 Lines • Show All 99 Lines • ▼ Show 20 Lines | def origin_get(self, origin): | ||||
origin: dictionary representing the individual origin to find. | origin: dictionary representing the individual origin to find. | ||||
This dict has either the keys type and url: | This dict has either the keys type and url: | ||||
- type (FIXME: enum TBD): the origin type ('git', 'wget', ...) | - type (FIXME: enum TBD): the origin type ('git', 'wget', ...) | ||||
- url (bytes): the url the origin points to | - url (bytes): the url the origin points to | ||||
or the id: | or the id: | ||||
- id: the origin id | - id (int): the origin's identifier | ||||
Returns: | Returns: | ||||
dict: the origin dictionary with the keys: | dict: the origin dictionary with the keys: | ||||
- id: origin's id | - id: origin's id | ||||
- type: origin's type | - type: origin's type | ||||
- url: origin's url | - url: origin's url | ||||
Raises: | Raises: | ||||
ValueError: if the keys does not match (url and type) nor id. | ValueError: if the keys does not match (url and type) nor id. | ||||
""" | """ | ||||
if 'id' in origin: | if 'id' in origin: | ||||
key = origin['id'] | origin_id = origin['id'] | ||||
elif 'type' in origin and 'url' in origin: | elif 'type' in origin and 'url' in origin: | ||||
key = self._origin_key(origin) | origin_id = self._origin_id(origin) | ||||
else: | else: | ||||
raise ValueError('Origin must have either id or (type and url).') | raise ValueError('Origin must have either id or (type and url).') | ||||
if key not in self._origins: | origin = None | ||||
Not Done Inline Actionsadd comment vlorentz: add comment | |||||
return None | # self._origin_id can return None | ||||
else: | if origin_id is not None: | ||||
origin = copy.deepcopy(self._origins[key]) | origin = copy.deepcopy(self._origins[origin_id-1]) | ||||
del origin['visits_dates'] | origin['id'] = origin_id | ||||
origin['id'] = self._origin_key(origin) | |||||
return origin | return origin | ||||
def origin_search(self, url_pattern, offset=0, limit=50, | def origin_search(self, url_pattern, offset=0, limit=50, | ||||
regexp=False, with_visit=False, db=None, cur=None): | regexp=False, with_visit=False, db=None, cur=None): | ||||
"""Search for origins whose urls contain a provided string pattern | """Search for origins whose urls contain a provided string pattern | ||||
or match a provided regular expression. | or match a provided regular expression. | ||||
The search is performed in a case insensitive way. | The search is performed in a case insensitive way. | ||||
Args: | Args: | ||||
url_pattern (str): the string pattern to search for in origin urls | url_pattern (str): the string pattern to search for in origin urls | ||||
offset (int): number of found origins to skip before returning | offset (int): number of found origins to skip before returning | ||||
results | results | ||||
limit (int): the maximum number of found origins to return | limit (int): the maximum number of found origins to return | ||||
regexp (bool): if True, consider the provided pattern as a regular | regexp (bool): if True, consider the provided pattern as a regular | ||||
expression and return origins whose urls match it | expression and return origins whose urls match it | ||||
with_visit (bool): if True, filter out origins with no visit | with_visit (bool): if True, filter out origins with no visit | ||||
Returns: | Returns: | ||||
An iterable of dict containing origin information as returned | An iterable of dict containing origin information as returned | ||||
by :meth:`swh.storage.storage.Storage.origin_get`. | by :meth:`swh.storage.storage.Storage.origin_get`. | ||||
""" | """ | ||||
origins = iter(self._origins.values()) | origins = self._origins | ||||
if regexp: | if regexp: | ||||
pat = re.compile(url_pattern) | pat = re.compile(url_pattern) | ||||
origins = (orig for orig in origins if pat.match(orig['url'])) | origins = [orig for orig in origins if pat.match(orig['url'])] | ||||
else: | else: | ||||
origins = (orig for orig in origins if url_pattern in orig['url']) | origins = [orig for orig in origins if url_pattern in orig['url']] | ||||
if with_visit: | if with_visit: | ||||
origins = (orig for orig in origins if orig['visits_dates']) | origins = [orig for orig in origins | ||||
origins = sorted(origins, key=self._origin_key) | if len(self._origin_visits[orig['id']-1]) > 0] | ||||
Not Done Inline Actionssorting not needed vlorentz: sorting not needed | |||||
origins = copy.deepcopy(origins[offset:offset+limit]) | origins = copy.deepcopy(origins[offset:offset+limit]) | ||||
for orig in origins: | |||||
del orig['visits_dates'] | |||||
orig['id'] = self._origin_key(orig) | |||||
return origins | return origins | ||||
def origin_add(self, origins): | def origin_add(self, origins): | ||||
"""Add origins to the storage | """Add origins to the storage | ||||
Args: | Args: | ||||
origins: list of dictionaries representing the individual origins, | origins: list of dictionaries representing the individual origins, | ||||
with the following keys: | with the following keys: | ||||
Show All 22 Lines | def origin_add_one(self, origin): | ||||
Returns: | Returns: | ||||
the id of the added origin, or of the identical one that already | the id of the added origin, or of the identical one that already | ||||
exists. | exists. | ||||
""" | """ | ||||
origin = copy.deepcopy(origin) | origin = copy.deepcopy(origin) | ||||
assert 'id' not in origin | assert 'id' not in origin | ||||
assert 'visits_dates' not in origin | origin_id = self._origin_id(origin) | ||||
key = self._origin_key(origin) | if origin_id is None: | ||||
origin['visits_dates'] = defaultdict(set) | # origin ids are in the range [1, +inf[ | ||||
if key not in self._origins: | origin_id = len(self._origins) + 1 | ||||
self._origins[key] = origin | origin['id'] = origin_id | ||||
self._objects[key].append(('origin', key)) | self._origins.append(origin) | ||||
return key | self._origin_visits.append([]) | ||||
key = (origin['type'], origin['url']) | |||||
self._objects[key].append(('origin', origin_id)) | |||||
return origin_id | |||||
def fetch_history_start(self, origin_id): | def fetch_history_start(self, origin_id): | ||||
"""Add an entry for origin origin_id in fetch_history. Returns the id | """Add an entry for origin origin_id in fetch_history. Returns the id | ||||
of the added fetch_history entry | of the added fetch_history entry | ||||
""" | """ | ||||
pass | pass | ||||
def fetch_history_end(self, fetch_history_id, data): | def fetch_history_end(self, fetch_history_id, data): | ||||
"""Close the fetch_history entry with id `fetch_history_id`, replacing | """Close the fetch_history entry with id `fetch_history_id`, replacing | ||||
its data with `data`. | its data with `data`. | ||||
""" | """ | ||||
pass | pass | ||||
def fetch_history_get(self, fetch_history_id): | def fetch_history_get(self, fetch_history_id): | ||||
"""Get the fetch_history entry with id `fetch_history_id`. | """Get the fetch_history entry with id `fetch_history_id`. | ||||
""" | """ | ||||
raise NotImplementedError('fetch_history_get is deprecated, use ' | raise NotImplementedError('fetch_history_get is deprecated, use ' | ||||
'origin_visit_get instead.') | 'origin_visit_get instead.') | ||||
def origin_visit_add(self, origin, date=None, *, ts=None): | def origin_visit_add(self, origin, date=None, *, ts=None): | ||||
"""Add an origin_visit for the origin at date with status 'ongoing'. | """Add an origin_visit for the origin at date with status 'ongoing'. | ||||
Args: | Args: | ||||
origin: Visited Origin id | origin (int): visited origin's identifier | ||||
date: timestamp of such visit | date: timestamp of such visit | ||||
Returns: | Returns: | ||||
dict: dictionary with keys origin and visit where: | dict: dictionary with keys origin and visit where: | ||||
- origin: origin identifier | - origin: origin's identifier | ||||
- visit: the visit identifier for the new visit occurrence | - visit: the visit's identifier for the new visit occurrence | ||||
""" | """ | ||||
if ts is None: | if ts is None: | ||||
if date is None: | if date is None: | ||||
raise TypeError('origin_visit_add expected 2 arguments.') | raise TypeError('origin_visit_add expected 2 arguments.') | ||||
else: | else: | ||||
assert date is None | assert date is None | ||||
warnings.warn("argument 'ts' of origin_visit_add was renamed " | warnings.warn("argument 'ts' of origin_visit_add was renamed " | ||||
"to 'date' in v0.0.109.", | "to 'date' in v0.0.109.", | ||||
DeprecationWarning) | DeprecationWarning) | ||||
date = ts | date = ts | ||||
if isinstance(date, str): | if isinstance(date, str): | ||||
date = dateutil.parser.parse(date) | date = dateutil.parser.parse(date) | ||||
visit_ret = None | |||||
if origin <= len(self._origin_visits): | |||||
# visit ids are in the range [1, +inf[ | |||||
visit_id = len(self._origin_visits[origin-1]) + 1 | |||||
status = 'ongoing' | status = 'ongoing' | ||||
visit = { | visit = { | ||||
'origin': origin, | 'origin': origin, | ||||
'date': date, | 'date': date, | ||||
'status': status, | 'status': status, | ||||
'snapshot': None, | 'snapshot': None, | ||||
'metadata': None, | 'metadata': None, | ||||
'visit': visit_id | |||||
} | } | ||||
key = OriginVisitKey(origin=origin, date=date) | self._origin_visits[origin-1].append(copy.deepcopy(visit)) | ||||
visit['visit'] = key | visit_ret = { | ||||
if key not in self._origin_visits: | |||||
self._origin_visits[key] = copy.deepcopy(visit) | |||||
self._origins[origin]['visits_dates'][status].add(date) | |||||
return { | |||||
'origin': origin, | 'origin': origin, | ||||
'visit': key, | 'visit': visit_id, | ||||
} | } | ||||
return visit_ret | |||||
def origin_visit_update(self, origin, visit_id, status, metadata=None): | def origin_visit_update(self, origin, visit_id, status, metadata=None): | ||||
"""Update an origin_visit's status. | """Update an origin_visit's status. | ||||
Args: | Args: | ||||
origin: Visited Origin id | origin (int): visited origin's identifier | ||||
visit_id: Visit's id | visit_id (int): visit's identifier | ||||
status: Visit's new status | status: visit's new status | ||||
metadata: Data associated to the visit | metadata: data associated to the visit | ||||
Returns: | Returns: | ||||
None | None | ||||
""" | """ | ||||
old_status = self._origin_visits[visit_id]['status'] | if origin > len(self._origin_visits) or \ | ||||
self._origins[origin]['visits_dates'][old_status] \ | visit_id > len(self._origin_visits[origin-1]): | ||||
.remove(visit_id.date) | return | ||||
self._origins[origin]['visits_dates'][status] \ | self._origin_visits[origin-1][visit_id-1].update({ | ||||
.add(visit_id.date) | |||||
self._origin_visits[visit_id].update({ | |||||
'status': status, | 'status': status, | ||||
'metadata': metadata}) | 'metadata': metadata}) | ||||
def origin_visit_get(self, origin, last_visit=None, limit=None): | def origin_visit_get(self, origin, last_visit=None, limit=None): | ||||
"""Retrieve all the origin's visit's information. | """Retrieve all the origin's visit's information. | ||||
Args: | Args: | ||||
origin (int): The occurrence's origin (identifier). | origin (int): the origin's identifier | ||||
last_visit: Starting point from which listing the next visits | last_visit (int): visit's id from which listing the next ones, | ||||
Default to None | default to None | ||||
limit (int): Number of results to return from the last visit. | limit (int): maximum number of results to return, | ||||
Default to None | default to None | ||||
Yields: | Yields: | ||||
List of visits. | List of visits. | ||||
""" | """ | ||||
visits_dates = sorted(itertools.chain.from_iterable( | visits = self._origin_visits[origin-1] | ||||
self._origins[origin]['visits_dates'].values())) | |||||
if last_visit is not None: | if last_visit is not None: | ||||
from_index = bisect.bisect_right(visits_dates, last_visit.date) | visits = visits[last_visit:] | ||||
visits_dates = visits_dates[from_index:] | |||||
if limit is not None: | if limit is not None: | ||||
visits_dates = visits_dates[:limit] | visits = visits[:limit] | ||||
keys = (OriginVisitKey(origin=origin, date=date) | for visit in visits: | ||||
for date in visits_dates) | visit_id = visit['visit'] | ||||
yield from map(self._origin_visits.__getitem__, keys) | yield self._origin_visits[origin-1][visit_id-1] | ||||
def origin_visit_get_by(self, origin, visit): | def origin_visit_get_by(self, origin, visit): | ||||
"""Retrieve origin visit's information. | """Retrieve origin visit's information. | ||||
Args: | Args: | ||||
origin: The occurrence's origin (identifier). | origin (int): the origin's identifier | ||||
Returns: | Returns: | ||||
The information on that particular (origin, visit) or None if | The information on that particular (origin, visit) or None if | ||||
it does not exist | it does not exist | ||||
""" | """ | ||||
return self._origin_visits.get(visit) | origin_visit = None | ||||
if origin <= len(self._origin_visits) and \ | |||||
visit <= len(self._origin_visits[origin-1]): | |||||
origin_visit = self._origin_visits[origin-1][visit-1] | |||||
return origin_visit | |||||
def stat_counters(self): | def stat_counters(self): | ||||
"""compute statistics about the number of tuples in various tables | """compute statistics about the number of tuples in various tables | ||||
Returns: | Returns: | ||||
dict: a dictionary mapping textual labels (e.g., content) to | dict: a dictionary mapping textual labels (e.g., content) to | ||||
integer values (e.g., the number of tuples in table content) | integer values (e.g., the number of tuples in table content) | ||||
Show All 21 Lines | def refresh_stat_counters(self): | ||||
pass | pass | ||||
def origin_metadata_add(self, origin_id, ts, provider, tool, metadata, | def origin_metadata_add(self, origin_id, ts, provider, tool, metadata, | ||||
db=None, cur=None): | db=None, cur=None): | ||||
""" Add an origin_metadata for the origin at ts with provenance and | """ Add an origin_metadata for the origin at ts with provenance and | ||||
metadata. | metadata. | ||||
Args: | Args: | ||||
origin_id: the origin's id for which the metadata is added | origin_id (int): the origin's id for which the metadata is added | ||||
ts (datetime): timestamp of the found metadata | ts (datetime): timestamp of the found metadata | ||||
provider: id of the provider of metadata (ex:'hal') | provider: id of the provider of metadata (ex:'hal') | ||||
tool: id of the tool used to extract metadata | tool: id of the tool used to extract metadata | ||||
metadata (jsonb): the metadata retrieved at the time and location | metadata (jsonb): the metadata retrieved at the time and location | ||||
""" | """ | ||||
if isinstance(ts, str): | if isinstance(ts, str): | ||||
ts = dateutil.parser.parse(ts) | ts = dateutil.parser.parse(ts) | ||||
origin_metadata = { | origin_metadata = { | ||||
'origin_id': origin_id, | 'origin_id': origin_id, | ||||
'discovery_date': ts, | 'discovery_date': ts, | ||||
'tool_id': tool, | 'tool_id': tool, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'provider_id': provider, | 'provider_id': provider, | ||||
} | } | ||||
self._origin_metadata[origin_id].append(origin_metadata) | self._origin_metadata[origin_id].append(origin_metadata) | ||||
return None | return None | ||||
def origin_metadata_get_by(self, origin_id, provider_type=None, db=None, | def origin_metadata_get_by(self, origin_id, provider_type=None, db=None, | ||||
cur=None): | cur=None): | ||||
"""Retrieve list of all origin_metadata entries for the origin_id | """Retrieve list of all origin_metadata entries for the origin_id | ||||
Args: | Args: | ||||
origin_id (int): the unique origin identifier | origin_id (int): the unique origin's identifier | ||||
provider_type (str): (optional) type of provider | provider_type (str): (optional) type of provider | ||||
Returns: | Returns: | ||||
list of dicts: the origin_metadata dictionary with the keys: | list of dicts: the origin_metadata dictionary with the keys: | ||||
- origin_id (int): origin's id | - origin_id (int): origin's identifier | ||||
- discovery_date (datetime): timestamp of discovery | - discovery_date (datetime): timestamp of discovery | ||||
- tool_id (int): metadata's extracting tool | - tool_id (int): metadata's extracting tool | ||||
- metadata (jsonb) | - metadata (jsonb) | ||||
- provider_id (int): metadata's provider | - provider_id (int): metadata's provider | ||||
- provider_name (str) | - provider_name (str) | ||||
- provider_type (str) | - provider_type (str) | ||||
- provider_url (str) | - provider_url (str) | ||||
▲ Show 20 Lines • Show All 98 Lines • ▼ Show 20 Lines | def metadata_provider_get_by(self, provider, db=None, cur=None): | ||||
dict: same as `metadata_provider_add`; | dict: same as `metadata_provider_add`; | ||||
or None if it does not exist. | or None if it does not exist. | ||||
""" | """ | ||||
key = self._metadata_provider_key({ | key = self._metadata_provider_key({ | ||||
'name': provider['provider_name'], | 'name': provider['provider_name'], | ||||
'url': provider['provider_url']}) | 'url': provider['provider_url']}) | ||||
return self._metadata_providers.get(key) | return self._metadata_providers.get(key) | ||||
def _origin_id(self, origin): | |||||
origin_id = None | |||||
for stored_origin in self._origins: | |||||
if stored_origin['type'] == origin['type'] and \ | |||||
stored_origin['url'] == origin['url']: | |||||
origin_id = stored_origin['id'] | |||||
break | |||||
return origin_id | |||||
@staticmethod | @staticmethod | ||||
def _content_key(content): | def _content_key(content): | ||||
"""A stable key for a content""" | """A stable key for a content""" | ||||
return tuple(content.get(key) for key in sorted(DEFAULT_ALGORITHMS)) | return tuple(content.get(key) for key in sorted(DEFAULT_ALGORITHMS)) | ||||
@staticmethod | @staticmethod | ||||
def _origin_key(origin): | |||||
return (origin['type'], origin['url']) | |||||
@staticmethod | |||||
def _tool_key(tool): | def _tool_key(tool): | ||||
return (tool['name'], tool['version'], | return (tool['name'], tool['version'], | ||||
tuple(sorted(tool['configuration'].items()))) | tuple(sorted(tool['configuration'].items()))) | ||||
@staticmethod | @staticmethod | ||||
def _metadata_provider_key(provider): | def _metadata_provider_key(provider): | ||||
return (provider['name'], provider['url']) | return (provider['name'], provider['url']) |
D769