Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/revision.py
from datetime import datetime | from datetime import datetime | ||||
from itertools import islice | |||||
import threading | import threading | ||||
from typing import Optional | from typing import Iterable, Iterator, Optional, Tuple | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from .archive import ArchiveInterface | from .archive import ArchiveInterface | ||||
class RevisionEntry: | class RevisionEntry: | ||||
def __init__( | def __init__( | ||||
Show All 27 Lines | def __iter__(self): | ||||
return iter(self.parents) | return iter(self.parents) | ||||
######################################################################################## | ######################################################################################## | ||||
######################################################################################## | ######################################################################################## | ||||
class RevisionIterator: | class CSVRevisionIterator: | ||||
"""Iterator interface.""" | """Iterator over revisions typically present in the given CSV file. | ||||
def __iter__(self): | The input is an iterator that produces 3 elements per row: | ||||
pass | |||||
def __next__(self): | |||||
pass | |||||
(id, date, root) | |||||
class FileRevisionIterator(RevisionIterator): | where: | ||||
"""Iterator over revisions present in the given CSV file.""" | - id: is the id (sha1_git) of the revision | ||||
- date: is the author date | |||||
- root: sha1 of the directory | |||||
""" | |||||
def __init__( | def __init__( | ||||
self, filename: str, archive: ArchiveInterface, limit: Optional[int] = None | self, | ||||
revisions: Iterable[Tuple[bytes, datetime, bytes]], | |||||
archive: ArchiveInterface, | |||||
limit: Optional[int] = None, | |||||
): | ): | ||||
self.file = open(filename) | self.revisions: Iterator[Tuple[bytes, datetime, bytes]] | ||||
self.idx = 0 | if limit is not None: | ||||
self.limit = limit | self.revisions = islice(revisions, limit) | ||||
else: | |||||
self.revisions = iter(revisions) | |||||
self.mutex = threading.Lock() | self.mutex = threading.Lock() | ||||
self.archive = archive | self.archive = archive | ||||
def next(self): | def __iter__(self): | ||||
self.mutex.acquire() | return self | ||||
line = self.file.readline().strip() | |||||
if line and (self.limit is None or self.idx < self.limit): | |||||
self.idx = self.idx + 1 | |||||
id, date, root = line.strip().split(",") | |||||
self.mutex.release() | |||||
def __next__(self): | |||||
with self.mutex: | |||||
id, date, root = next(self.revisions) | |||||
return RevisionEntry( | return RevisionEntry( | ||||
self.archive, | self.archive, | ||||
hash_to_bytes(id), | hash_to_bytes(id), | ||||
date=datetime.fromisoformat(date), | date=datetime.fromisoformat(date), | ||||
root=hash_to_bytes(root), | root=hash_to_bytes(root), | ||||
) | ) | ||||
else: | |||||
self.mutex.release() | |||||
return None | |||||
# class ArchiveRevisionIterator(RevisionIterator): | # class ArchiveRevisionIterator(RevisionIterator): | ||||
# """Iterator over revisions present in the given database.""" | # """Iterator over revisions present in the given database.""" | ||||
# | # | ||||
# def __init__(self, conn, limit=None, chunksize=100): | # def __init__(self, conn, limit=None, chunksize=100): | ||||
# self.cur = conn.cursor() | # self.cur = conn.cursor() | ||||
# self.chunksize = chunksize | # self.chunksize = chunksize | ||||
▲ Show 20 Lines • Show All 90 Lines • Show Last 20 Lines |