Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7342975
D767.id2409.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
9 KB
Subscribers
None
D767.id2409.diff
View Options
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -25,9 +25,6 @@
return datetime.datetime.now(tz=datetime.timezone.utc)
-OriginVisitKey = collections.namedtuple('OriginVisitKey', 'origin date')
-
-
class Storage:
def __init__(self):
self._contents = {}
@@ -38,8 +35,8 @@
self._revisions = {}
self._releases = {}
self._snapshots = {}
- self._origins = {}
- self._origin_visits = {}
+ self._origins = []
+ self._origin_visits = []
self._origin_metadata = defaultdict(list)
self._tools = {}
self._metadata_providers = {}
@@ -561,7 +558,9 @@
'_sorted_branch_names': sorted(snapshot['branches'])
}
self._objects[snapshot_id].append(('snapshot', snapshot_id))
- self._origin_visits[visit]['snapshot'] = snapshot_id
+ if origin <= len(self._origin_visits) and \
+ visit <= len(self._origin_visits[origin-1]):
+ self._origin_visits[origin-1][visit-1]['snapshot'] = snapshot_id
def snapshot_get(self, snapshot_id):
"""Get the content, possibly partial, of a snapshot with the given id
@@ -612,9 +611,10 @@
branches.
"""
- if visit not in self._origin_visits:
+ if origin > len(self._origins) or \
+ visit > len(self._origin_visits[origin-1]):
return None
- snapshot_id = self._origin_visits[visit]['snapshot']
+ snapshot_id = self._origin_visits[origin-1][visit-1]['snapshot']
if snapshot_id:
return self.snapshot_get(snapshot_id)
else:
@@ -648,22 +648,18 @@
or :const:`None` if the snapshot has less than 1000
branches.
"""
- if allowed_statuses is None:
- visits_dates = list(itertools.chain(
- *self._origins[origin]['visits_dates'].values()))
- else:
- last_visits = self._origins[origin]['visits_dates']
- visits_dates = list(itertools.chain(
- *map(last_visits.__getitem__, allowed_statuses)))
-
- for visit_date in sorted(visits_dates, reverse=True):
- visit_id = OriginVisitKey(origin=origin, date=visit_date)
- snapshot_id = self._origin_visits[visit_id]['snapshot']
+ visits = self._origin_visits[origin-1]
+ if allowed_statuses is not None:
+ visits = [visit for visit in visits
+ if visit['status'] in allowed_statuses]
+ snapshot = None
+ for visit in reversed(visits):
+ snapshot_id = visit['snapshot']
snapshot = self.snapshot_get(snapshot_id)
if snapshot:
- return snapshot
+ break
- return None
+ return snapshot
def snapshot_count_branches(self, snapshot_id, db=None, cur=None):
"""Count the number of branches in the snapshot with the given id
@@ -793,17 +789,16 @@
"""
if 'id' in origin:
- key = origin['id']
+ origin_id = origin['id']
elif 'type' in origin and 'url' in origin:
- key = self._origin_key(origin)
+ origin_id = self._origin_id(origin)
else:
raise ValueError('Origin must have either id or (type and url).')
- if key not in self._origins:
+ if origin_id is None or origin_id > len(self._origins):
return None
else:
- origin = copy.deepcopy(self._origins[key])
- del origin['visits_dates']
- origin['id'] = self._origin_key(origin)
+ origin = copy.deepcopy(self._origins[origin_id-1])
+ origin['id'] = origin_id
return origin
def origin_search(self, url_pattern, offset=0, limit=50,
@@ -825,19 +820,17 @@
An iterable of dict containing origin information as returned
by :meth:`swh.storage.storage.Storage.origin_get`.
"""
- origins = iter(self._origins.values())
+ origins = self._origins
if regexp:
pat = re.compile(url_pattern)
origins = (orig for orig in origins if pat.match(orig['url']))
else:
origins = (orig for orig in origins if url_pattern in orig['url'])
if with_visit:
- origins = (orig for orig in origins if orig['visits_dates'])
- origins = sorted(origins, key=self._origin_key)
+ origins = (orig for orig in origins
+ if len(self._origin_visits[orig['id']-1]) > 0)
+ origins = sorted(origins, key=lambda origin: origin['id'])
origins = copy.deepcopy(origins[offset:offset+limit])
- for orig in origins:
- del orig['visits_dates']
- orig['id'] = self._origin_key(orig)
return origins
def origin_add(self, origins):
@@ -876,13 +869,16 @@
"""
origin = copy.deepcopy(origin)
assert 'id' not in origin
- assert 'visits_dates' not in origin
- key = self._origin_key(origin)
- origin['visits_dates'] = defaultdict(set)
- if key not in self._origins:
- self._origins[key] = origin
- self._objects[key].append(('origin', key))
- return key
+ origin_id = self._origin_id(origin)
+ if origin_id is None:
+ # origin ids are in the range [1, +inf[
+ origin_id = len(self._origins) + 1
+ origin['id'] = origin_id
+ self._origins.append(origin)
+ self._origin_visits.append([])
+ key = (origin['type'], origin['url'])
+ self._objects[key].append(('origin', origin_id))
+ return origin_id
def fetch_history_start(self, origin_id):
"""Add an entry for origin origin_id in fetch_history. Returns the id
@@ -929,25 +925,26 @@
if isinstance(date, str):
date = dateutil.parser.parse(date)
- status = 'ongoing'
-
- visit = {
+ visit_ret = None
+ if origin <= len(self._origin_visits):
+ # visit ids are in the range [1, +inf[
+ visit_id = len(self._origin_visits[origin-1]) + 1
+ status = 'ongoing'
+ visit = {
'origin': origin,
'date': date,
'status': status,
'snapshot': None,
'metadata': None,
- }
- key = OriginVisitKey(origin=origin, date=date)
- visit['visit'] = key
- if key not in self._origin_visits:
- self._origin_visits[key] = copy.deepcopy(visit)
- self._origins[origin]['visits_dates'][status].add(date)
+ 'visit': visit_id
+ }
+ self._origin_visits[origin-1].append(copy.deepcopy(visit))
+ visit_ret = {
+ 'origin': origin,
+ 'visit': visit_id,
+ }
- return {
- 'origin': origin,
- 'visit': key,
- }
+ return visit_ret
def origin_visit_update(self, origin, visit_id, status, metadata=None):
"""Update an origin_visit's status.
@@ -962,12 +959,10 @@
None
"""
- old_status = self._origin_visits[visit_id]['status']
- self._origins[origin]['visits_dates'][old_status] \
- .remove(visit_id.date)
- self._origins[origin]['visits_dates'][status] \
- .add(visit_id.date)
- self._origin_visits[visit_id].update({
+ if origin > len(self._origin_visits) or \
+ visit_id > len(self._origin_visits[origin-1]):
+ return
+ self._origin_visits[origin-1][visit_id-1].update({
'status': status,
'metadata': metadata})
@@ -985,16 +980,14 @@
List of visits.
"""
- visits_dates = sorted(itertools.chain.from_iterable(
- self._origins[origin]['visits_dates'].values()))
+ visits = self._origin_visits[origin-1]
if last_visit is not None:
- from_index = bisect.bisect_right(visits_dates, last_visit.date)
- visits_dates = visits_dates[from_index:]
+ visits = visits[last_visit:]
if limit is not None:
- visits_dates = visits_dates[:limit]
- keys = (OriginVisitKey(origin=origin, date=date)
- for date in visits_dates)
- yield from map(self._origin_visits.__getitem__, keys)
+ visits = visits[:limit]
+ for visit in visits:
+ visit_id = visit['visit']
+ yield self._origin_visits[origin-1][visit_id-1]
def origin_visit_get_by(self, origin, visit):
"""Retrieve origin visit's information.
@@ -1007,7 +1000,11 @@
it does not exist
"""
- return self._origin_visits.get(visit)
+ origin_visit = None
+ if origin <= len(self._origin_visits) and \
+ visit <= len(self._origin_visits[origin-1]):
+ origin_visit = self._origin_visits[origin-1][visit-1]
+ return origin_visit
def stat_counters(self):
"""compute statistics about the number of tuples in various tables
@@ -1190,16 +1187,21 @@
'url': provider['provider_url']})
return self._metadata_providers.get(key)
+ def _origin_id(self, origin):
+ origin_id = None
+ for stored_origin in self._origins:
+ if stored_origin['type'] == origin['type'] and \
+ stored_origin['url'] == origin['url']:
+ origin_id = stored_origin['id']
+ break
+ return origin_id
+
@staticmethod
def _content_key(content):
"""A stable key for a content"""
return tuple(content.get(key) for key in sorted(DEFAULT_ALGORITHMS))
@staticmethod
- def _origin_key(origin):
- return (origin['type'], origin['url'])
-
- @staticmethod
def _tool_key(tool):
return (tool['name'], tool['version'],
tuple(sorted(tool['configuration'].items())))
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mar 17 2025, 6:26 PM (7 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225929
Attached To
D767: storage.in_memory: Use integer ids for origin and visit instead of tuples
Event Timeline
Log In to Comment