Changeset View
Standalone View
swh/storage/in_memory.py
Show All 32 Lines | def __init__(self): | ||||
self._content_indexes = defaultdict(lambda: defaultdict(set)) | self._content_indexes = defaultdict(lambda: defaultdict(set)) | ||||
self._directories = {} | self._directories = {} | ||||
self._revisions = {} | self._revisions = {} | ||||
self._releases = {} | self._releases = {} | ||||
self._snapshots = {} | self._snapshots = {} | ||||
self._origins = [] | self._origins = [] | ||||
self._origin_visits = [] | self._origin_visits = [] | ||||
self._persons = [] | |||||
self._origin_metadata = defaultdict(list) | self._origin_metadata = defaultdict(list) | ||||
self._tools = {} | self._tools = {} | ||||
self._metadata_providers = {} | self._metadata_providers = {} | ||||
self._objects = defaultdict(list) | self._objects = defaultdict(list) | ||||
# ideally we would want a skip list for both fast inserts and searches | # ideally we would want a skip list for both fast inserts and searches | ||||
self._sorted_sha1s = [] | self._sorted_sha1s = [] | ||||
▲ Show 20 Lines • Show All 361 Lines • ▼ Show 20 Lines | def revision_add(self, revisions): | ||||
- **parents** (:class:`list[sha1_git]`): the parents of | - **parents** (:class:`list[sha1_git]`): the parents of | ||||
this revision | this revision | ||||
date dictionaries have the form defined in :mod:`swh.model`. | date dictionaries have the form defined in :mod:`swh.model`. | ||||
""" | """ | ||||
for revision in revisions: | for revision in revisions: | ||||
if revision['id'] not in self._revisions: | if revision['id'] not in self._revisions: | ||||
self._revisions[revision['id']] = rev = copy.deepcopy(revision) | self._revisions[revision['id']] = rev = copy.deepcopy(revision) | ||||
self._person_add(rev['committer']) | |||||
self._person_add(rev['author']) | |||||
rev['date'] = normalize_timestamp(rev.get('date')) | rev['date'] = normalize_timestamp(rev.get('date')) | ||||
rev['committer_date'] = normalize_timestamp( | rev['committer_date'] = normalize_timestamp( | ||||
rev.get('committer_date')) | rev.get('committer_date')) | ||||
self._objects[revision['id']].append( | self._objects[revision['id']].append( | ||||
('revision', revision['id'])) | ('revision', revision['id'])) | ||||
def revision_missing(self, revision_ids): | def revision_missing(self, revision_ids): | ||||
"""List revisions missing from storage | """List revisions missing from storage | ||||
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines | def release_add(self, releases): | ||||
- **comment** (:class:`bytes`): the comment associated with | - **comment** (:class:`bytes`): the comment associated with | ||||
the release | the release | ||||
- **author** (:class:`Dict[str, bytes]`): dictionary with | - **author** (:class:`Dict[str, bytes]`): dictionary with | ||||
keys: name, fullname, email | keys: name, fullname, email | ||||
the date dictionary has the form defined in :mod:`swh.model`. | the date dictionary has the form defined in :mod:`swh.model`. | ||||
""" | """ | ||||
for rel in releases: | for rel in releases: | ||||
rel = copy.deepcopy(rel) | |||||
rel['date'] = normalize_timestamp(rel['date']) | rel['date'] = normalize_timestamp(rel['date']) | ||||
self._person_add(rel['author']) | |||||
self._objects[rel['id']].append( | self._objects[rel['id']].append( | ||||
('release', rel['id'])) | ('release', rel['id'])) | ||||
self._releases.update((rel['id'], rel) for rel in releases) | self._releases[rel['id']] = rel | ||||
def release_missing(self, releases): | def release_missing(self, releases): | ||||
"""List releases missing from storage | """List releases missing from storage | ||||
Args: | Args: | ||||
releases: an iterable of release ids | releases: an iterable of release ids | ||||
Returns: | Returns: | ||||
▲ Show 20 Lines • Show All 497 Lines • ▼ Show 20 Lines | def origin_visit_get_by(self, origin, visit): | ||||
""" | """ | ||||
origin_visit = None | origin_visit = None | ||||
if origin <= len(self._origin_visits) and \ | if origin <= len(self._origin_visits) and \ | ||||
visit <= len(self._origin_visits[origin-1]): | visit <= len(self._origin_visits[origin-1]): | ||||
origin_visit = self._origin_visits[origin-1][visit-1] | origin_visit = self._origin_visits[origin-1][visit-1] | ||||
return origin_visit | return origin_visit | ||||
def person_get(self, person): | |||||
"""Return the persons identified by their ids. | |||||
Args: | |||||
person: array of ids. | |||||
Returns: | |||||
The array of persons corresponding of the ids. | |||||
""" | |||||
for p in person: | |||||
if 0 <= (p - 1) < len(self._persons): | |||||
olasd: could you swap the if and yield? it'd avoid needing the backslash | |||||
yield dict(self._persons[p - 1], id=p) | |||||
else: | |||||
yield None | |||||
def stat_counters(self): | def stat_counters(self): | ||||
"""compute statistics about the number of tuples in various tables | """compute statistics about the number of tuples in various tables | ||||
Returns: | Returns: | ||||
dict: a dictionary mapping textual labels (e.g., content) to | dict: a dictionary mapping textual labels (e.g., content) to | ||||
integer values (e.g., the number of tuples in table content) | integer values (e.g., the number of tuples in table content) | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 174 Lines • ▼ Show 20 Lines | def _origin_id(self, origin): | ||||
origin_id = None | origin_id = None | ||||
for stored_origin in self._origins: | for stored_origin in self._origins: | ||||
if stored_origin['type'] == origin['type'] and \ | if stored_origin['type'] == origin['type'] and \ | ||||
stored_origin['url'] == origin['url']: | stored_origin['url'] == origin['url']: | ||||
origin_id = stored_origin['id'] | origin_id = stored_origin['id'] | ||||
break | break | ||||
return origin_id | return origin_id | ||||
def _person_add(self, person): | |||||
"""Add a person in storage. | |||||
Note: Private method, do not use outside of this class. | |||||
Args: | |||||
person: dictionary with keys fullname, name and email. | |||||
""" | |||||
Not Done Inline ActionsI'm not sure what the semantics of self._objects is, but using it for this check doesn't feel right: what if someone's full name ends up being a valid object identifier for another type? olasd: I'm not sure what the semantics of `self._objects` is, but using it for this check doesn't feel… | |||||
Done Inline ActionsBased on my understanding of vlorentz implementation, the _objects dict enable to easily lookup objects by sha1s Other keys in that dict corresponds to sha1s or (origin_type, origin_url) tuples so it's safe to use fullname as key here. anlambert: Based on my understanding of vlorentz implementation, the `_objects` dict enable to easily… | |||||
Not Done Inline ActionsI mean, technically, a full name could match a sha1 (yeah, I know it's more than a bit far-fetched). I don't think it matters much, it's just a bit surprising. olasd: I mean, technically, a full name could match a sha1 (yeah, I know it's more than a bit far… | |||||
Done Inline ActionsOk, will use a tuple key instead and land that diff anlambert: Ok, will use a tuple key instead and land that diff | |||||
key = ('person', person['fullname']) | |||||
if key not in self._objects: | |||||
person_id = len(self._persons) + 1 | |||||
self._persons.append(dict(person)) | |||||
self._objects[key].append(('person', person_id)) | |||||
else: | |||||
person_id = self._objects[key][0][1] | |||||
p = next(self.person_get([person_id])) | |||||
person.update(p.items()) | |||||
person['id'] = person_id | |||||
@staticmethod | @staticmethod | ||||
def _content_key(content): | def _content_key(content): | ||||
"""A stable key for a content""" | """A stable key for a content""" | ||||
return tuple(content.get(key) for key in sorted(DEFAULT_ALGORITHMS)) | return tuple(content.get(key) for key in sorted(DEFAULT_ALGORITHMS)) | ||||
@staticmethod | @staticmethod | ||||
def _tool_key(tool): | def _tool_key(tool): | ||||
return (tool['name'], tool['version'], | return (tool['name'], tool['version'], | ||||
tuple(sorted(tool['configuration'].items()))) | tuple(sorted(tool['configuration'].items()))) | ||||
@staticmethod | @staticmethod | ||||
def _metadata_provider_key(provider): | def _metadata_provider_key(provider): | ||||
return (provider['name'], provider['url']) | return (provider['name'], provider['url']) |
could you swap the if and yield? it'd avoid needing the backslash