Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show First 20 Lines • Show All 302 Lines • ▼ Show 20 Lines | def content_get(self, content): | ||||
try: | try: | ||||
data = self.objstorage.get(obj_id) | data = self.objstorage.get(obj_id) | ||||
except ObjNotFoundError: | except ObjNotFoundError: | ||||
yield None | yield None | ||||
continue | continue | ||||
yield {'sha1': obj_id, 'data': data} | yield {'sha1': obj_id, 'data': data} | ||||
def content_get_range(self, start, end, limit=1000, db=None, cur=None): | def content_get_range(self, start, end, limit=1000): | ||||
"""Retrieve contents within range [start, end] bound by limit. | """Retrieve contents within range [start, end] bound by limit. | ||||
Note that this function may return more than one blob per hash. The | Note that this function may return more than one blob per hash. The | ||||
limit is enforced with multiplicity (ie. two blobs with the same hash | limit is enforced with multiplicity (ie. two blobs with the same hash | ||||
will count twice toward the limit). | will count twice toward the limit). | ||||
Args: | Args: | ||||
**start** (bytes): Starting identifier range (expected smaller | **start** (bytes): Starting identifier range (expected smaller | ||||
▲ Show 20 Lines • Show All 721 Lines • ▼ Show 20 Lines | def snapshot_get_latest(self, origin, allowed_statuses=None): | ||||
require_snapshot=True) | require_snapshot=True) | ||||
if visit and visit['snapshot']: | if visit and visit['snapshot']: | ||||
snapshot = self.snapshot_get(visit['snapshot']) | snapshot = self.snapshot_get(visit['snapshot']) | ||||
if not snapshot: | if not snapshot: | ||||
raise ValueError( | raise ValueError( | ||||
'last origin visit references an unknown snapshot') | 'last origin visit references an unknown snapshot') | ||||
return snapshot | return snapshot | ||||
def snapshot_count_branches(self, snapshot_id, db=None, cur=None): | def snapshot_count_branches(self, snapshot_id): | ||||
"""Count the number of branches in the snapshot with the given id | """Count the number of branches in the snapshot with the given id | ||||
Args: | Args: | ||||
snapshot_id (bytes): identifier of the snapshot | snapshot_id (bytes): identifier of the snapshot | ||||
Returns: | Returns: | ||||
dict: A dict whose keys are the target types of branches and | dict: A dict whose keys are the target types of branches and | ||||
values their corresponding amount | values their corresponding amount | ||||
▲ Show 20 Lines • Show All 69 Lines • ▼ Show 20 Lines | |||||
def snapshot_get_random(self): | def snapshot_get_random(self): | ||||
"""Finds a random snapshot id. | """Finds a random snapshot id. | ||||
Returns: | Returns: | ||||
a sha1_git | a sha1_git | ||||
""" | """ | ||||
return random.choice(list(self._snapshots)) | return random.choice(list(self._snapshots)) | ||||
def object_find_by_sha1_git(self, ids, db=None, cur=None): | def object_find_by_sha1_git(self, ids): | ||||
"""Return the objects found with the given ids. | """Return the objects found with the given ids. | ||||
Args: | Args: | ||||
ids: a generator of sha1_gits | ids: a generator of sha1_gits | ||||
Returns: | Returns: | ||||
dict: a mapping from id to the list of objects found. Each object | dict: a mapping from id to the list of objects found. Each object | ||||
found is itself a dict with keys: | found is itself a dict with keys: | ||||
▲ Show 20 Lines • Show All 146 Lines • ▼ Show 20 Lines | def origin_list(self, page_token: Optional[str] = None, limit: int = 100 | ||||
} | } | ||||
if from_+limit < len(origin_urls): | if from_+limit < len(origin_urls): | ||||
result['next_page_token'] = origin_urls[from_+limit] | result['next_page_token'] = origin_urls[from_+limit] | ||||
return result | return result | ||||
def origin_search(self, url_pattern, offset=0, limit=50, | def origin_search(self, url_pattern, offset=0, limit=50, | ||||
regexp=False, with_visit=False, db=None, cur=None): | regexp=False, with_visit=False): | ||||
"""Search for origins whose urls contain a provided string pattern | """Search for origins whose urls contain a provided string pattern | ||||
or match a provided regular expression. | or match a provided regular expression. | ||||
The search is performed in a case insensitive way. | The search is performed in a case insensitive way. | ||||
Args: | Args: | ||||
url_pattern (str): the string pattern to search for in origin urls | url_pattern (str): the string pattern to search for in origin urls | ||||
offset (int): number of found origins to skip before returning | offset (int): number of found origins to skip before returning | ||||
results | results | ||||
Show All 18 Lines | def origin_search(self, url_pattern, offset=0, limit=50, | ||||
if len(self._origin_visits[orig['url']]) > 0 and | if len(self._origin_visits[orig['url']]) > 0 and | ||||
set(ov.snapshot | set(ov.snapshot | ||||
for ov in self._origin_visits[orig['url']] | for ov in self._origin_visits[orig['url']] | ||||
if ov.snapshot) & | if ov.snapshot) & | ||||
set(self._snapshots)] | set(self._snapshots)] | ||||
return origins[offset:offset+limit] | return origins[offset:offset+limit] | ||||
def origin_count(self, url_pattern, regexp=False, with_visit=False, | def origin_count(self, url_pattern, regexp=False, with_visit=False): | ||||
db=None, cur=None): | |||||
"""Count origins whose urls contain a provided string pattern | """Count origins whose urls contain a provided string pattern | ||||
or match a provided regular expression. | or match a provided regular expression. | ||||
The pattern search in origin urls is performed in a case insensitive | The pattern search in origin urls is performed in a case insensitive | ||||
way. | way. | ||||
Args: | Args: | ||||
url_pattern (str): the string pattern to search for in origin urls | url_pattern (str): the string pattern to search for in origin urls | ||||
regexp (bool): if True, consider the provided pattern as a regular | regexp (bool): if True, consider the provided pattern as a regular | ||||
▲ Show 20 Lines • Show All 365 Lines • ▼ Show 20 Lines | def stat_counters(self): | ||||
for (obj_type, obj_id) | for (obj_type, obj_id) | ||||
in itertools.chain(*self._objects.values()))) | in itertools.chain(*self._objects.values()))) | ||||
return stats | return stats | ||||
def refresh_stat_counters(self): | def refresh_stat_counters(self): | ||||
"""Recomputes the statistics for `stat_counters`.""" | """Recomputes the statistics for `stat_counters`.""" | ||||
pass | pass | ||||
def origin_metadata_add(self, origin_url, ts, provider, tool, metadata, | def origin_metadata_add(self, origin_url, ts, provider, tool, metadata): | ||||
db=None, cur=None): | |||||
""" Add an origin_metadata for the origin at ts with provenance and | """ Add an origin_metadata for the origin at ts with provenance and | ||||
metadata. | metadata. | ||||
Args: | Args: | ||||
origin_url (str): the origin url for which the metadata is added | origin_url (str): the origin url for which the metadata is added | ||||
ts (datetime): timestamp of the found metadata | ts (datetime): timestamp of the found metadata | ||||
provider: id of the provider of metadata (ex:'hal') | provider: id of the provider of metadata (ex:'hal') | ||||
tool: id of the tool used to extract metadata | tool: id of the tool used to extract metadata | ||||
Show All 10 Lines | def origin_metadata_add(self, origin_url, ts, provider, tool, metadata): | ||||
'discovery_date': ts, | 'discovery_date': ts, | ||||
'tool_id': tool, | 'tool_id': tool, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'provider_id': provider, | 'provider_id': provider, | ||||
} | } | ||||
self._origin_metadata[origin_url].append(origin_metadata) | self._origin_metadata[origin_url].append(origin_metadata) | ||||
return None | return None | ||||
def origin_metadata_get_by(self, origin_url, provider_type=None, db=None, | def origin_metadata_get_by(self, origin_url, provider_type=None): | ||||
cur=None): | |||||
"""Retrieve list of all origin_metadata entries for the origin_url | """Retrieve list of all origin_metadata entries for the origin_url | ||||
Args: | Args: | ||||
origin_url (str): the origin's url | origin_url (str): the origin's url | ||||
provider_type (str): (optional) type of provider | provider_type (str): (optional) type of provider | ||||
Returns: | Returns: | ||||
list of dicts: the origin_metadata dictionary with the keys: | list of dicts: the origin_metadata dictionary with the keys: | ||||
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines | def metadata_provider_add(self, provider_name, provider_type, provider_url, | ||||
'provider_url': provider_url, | 'provider_url': provider_url, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
} | } | ||||
key = self._metadata_provider_key(provider) | key = self._metadata_provider_key(provider) | ||||
provider['id'] = key | provider['id'] = key | ||||
self._metadata_providers[key] = provider | self._metadata_providers[key] = provider | ||||
return key | return key | ||||
def metadata_provider_get(self, provider_id, db=None, cur=None): | def metadata_provider_get(self, provider_id): | ||||
"""Get a metadata provider | """Get a metadata provider | ||||
Args: | Args: | ||||
provider_id: Its identifier, as given by `metadata_provider_add`. | provider_id: Its identifier, as given by `metadata_provider_add`. | ||||
Returns: | Returns: | ||||
dict: same as `metadata_provider_add`; | dict: same as `metadata_provider_add`; | ||||
or None if it does not exist. | or None if it does not exist. | ||||
""" | """ | ||||
return self._metadata_providers.get(provider_id) | return self._metadata_providers.get(provider_id) | ||||
def metadata_provider_get_by(self, provider, db=None, cur=None): | def metadata_provider_get_by(self, provider): | ||||
"""Get a metadata provider | """Get a metadata provider | ||||
Args: | Args: | ||||
provider_name: Its name | provider_name: Its name | ||||
provider_url: Its URL | provider_url: Its URL | ||||
Returns: | Returns: | ||||
dict: same as `metadata_provider_add`; | dict: same as `metadata_provider_add`; | ||||
▲ Show 20 Lines • Show All 52 Lines • Show Last 20 Lines |