Page MenuHomeSoftware Heritage

revision.py
No OneTemporary

revision.py

from datetime import datetime, timezone
from itertools import islice
import threading
from typing import Iterable, Iterator, Optional, Tuple
import iso8601
from swh.model.hashutil import hash_to_bytes
from swh.provenance.archive import ArchiveInterface
from swh.provenance.model import RevisionEntry
########################################################################################
########################################################################################
class CSVRevisionIterator:
"""Iterator over revisions typically present in the given CSV file.
The input is an iterator that produces 3 elements per row:
(id, date, root)
where:
- id: is the id (sha1_git) of the revision
- date: is the author date
- root: sha1 of the directory
"""
def __init__(
self,
revisions: Iterable[Tuple[bytes, datetime, bytes]],
archive: ArchiveInterface,
limit: Optional[int] = None,
):
self.revisions: Iterator[Tuple[bytes, datetime, bytes]]
if limit is not None:
self.revisions = islice(revisions, limit)
else:
self.revisions = iter(revisions)
self.mutex = threading.Lock()
self.archive = archive
def __iter__(self):
return self
def __next__(self):
with self.mutex:
id, date, root = next(self.revisions)
date = iso8601.parse_date(date)
if date.tzinfo is None:
date = date.replace(tzinfo=timezone.utc)
return RevisionEntry(
hash_to_bytes(id),
date=date,
root=hash_to_bytes(root),
)

File Metadata

Mime Type
text/x-python
Expires
Jan 27 2026, 12:56 PM (5 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3256716

Event Timeline