Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/backfill.py
Show All 17 Lines | |||||
import logging | import logging | ||||
from typing import Any, Callable, Dict, Optional | from typing import Any, Callable, Dict, Optional | ||||
from swh.core.db import BaseDb | from swh.core.db import BaseDb | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
BaseModel, | BaseModel, | ||||
Directory, | Directory, | ||||
DirectoryEntry, | DirectoryEntry, | ||||
ExtID, | |||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
Release, | Release, | ||||
Revision, | Revision, | ||||
Snapshot, | Snapshot, | ||||
SnapshotBranch, | SnapshotBranch, | ||||
TargetType, | TargetType, | ||||
) | ) | ||||
from swh.storage.postgresql.converters import ( | from swh.storage.postgresql.converters import ( | ||||
db_to_extid, | |||||
db_to_raw_extrinsic_metadata, | db_to_raw_extrinsic_metadata, | ||||
db_to_release, | db_to_release, | ||||
db_to_revision, | db_to_revision, | ||||
) | ) | ||||
from swh.storage.replay import object_converter_fn | from swh.storage.replay import object_converter_fn | ||||
from swh.storage.writer import JournalWriter | from swh.storage.writer import JournalWriter | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
PARTITION_KEY = { | PARTITION_KEY = { | ||||
"content": "sha1", | "content": "sha1", | ||||
"skipped_content": "sha1", | "skipped_content": "sha1", | ||||
"directory": "id", | "directory": "id", | ||||
"extid": "target", | |||||
"metadata_authority": "type, url", | "metadata_authority": "type, url", | ||||
"metadata_fetcher": "name, version", | "metadata_fetcher": "name, version", | ||||
"raw_extrinsic_metadata": "target", | "raw_extrinsic_metadata": "target", | ||||
"revision": "revision.id", | "revision": "revision.id", | ||||
"release": "release.id", | "release": "release.id", | ||||
"snapshot": "id", | "snapshot": "id", | ||||
"origin": "id", | "origin": "id", | ||||
"origin_visit": "origin_visit.origin", | "origin_visit": "origin_visit.origin", | ||||
Show All 16 Lines | "skipped_content": [ | ||||
"sha256", | "sha256", | ||||
"blake2s256", | "blake2s256", | ||||
"length", | "length", | ||||
"ctime", | "ctime", | ||||
"status", | "status", | ||||
"reason", | "reason", | ||||
], | ], | ||||
"directory": ["id", "dir_entries", "file_entries", "rev_entries"], | "directory": ["id", "dir_entries", "file_entries", "rev_entries"], | ||||
"extid": ["extid_type", "extid", "target_type", "target"], | |||||
"metadata_authority": ["type", "url", "metadata",], | "metadata_authority": ["type", "url", "metadata",], | ||||
"metadata_fetcher": ["name", "version", "metadata",], | "metadata_fetcher": ["name", "version", "metadata",], | ||||
"origin": ["url"], | |||||
"origin_visit": ["visit", "type", ("origin.url", "origin"), "date",], | |||||
"origin_visit_status": [ | |||||
("origin_visit_status.visit", "visit"), | |||||
("origin.url", "origin"), | |||||
("origin_visit_status.date", "date"), | |||||
"type", | |||||
"snapshot", | |||||
"status", | |||||
"metadata", | |||||
], | |||||
"raw_extrinsic_metadata": [ | "raw_extrinsic_metadata": [ | ||||
"raw_extrinsic_metadata.type", | "raw_extrinsic_metadata.type", | ||||
"raw_extrinsic_metadata.target", | "raw_extrinsic_metadata.target", | ||||
"metadata_authority.type", | "metadata_authority.type", | ||||
"metadata_authority.url", | "metadata_authority.url", | ||||
"metadata_fetcher.name", | "metadata_fetcher.name", | ||||
"metadata_fetcher.version", | "metadata_fetcher.version", | ||||
"discovery_date", | "discovery_date", | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | "release": [ | ||||
"target", | "target", | ||||
"target_type", | "target_type", | ||||
("a.id", "author_id"), | ("a.id", "author_id"), | ||||
("a.name", "author_name"), | ("a.name", "author_name"), | ||||
("a.email", "author_email"), | ("a.email", "author_email"), | ||||
("a.fullname", "author_fullname"), | ("a.fullname", "author_fullname"), | ||||
], | ], | ||||
"snapshot": ["id", "object_id"], | "snapshot": ["id", "object_id"], | ||||
"origin": ["url"], | |||||
"origin_visit": ["visit", "type", ("origin.url", "origin"), "date",], | |||||
"origin_visit_status": [ | |||||
("origin_visit_status.visit", "visit"), | |||||
("origin.url", "origin"), | |||||
("origin_visit_status.date", "date"), | |||||
"type", | |||||
"snapshot", | |||||
"status", | |||||
"metadata", | |||||
], | |||||
} | } | ||||
JOINS = { | JOINS = { | ||||
"release": ["person a on release.author=a.id"], | "release": ["person a on release.author=a.id"], | ||||
"revision": [ | "revision": [ | ||||
"person a on revision.author=a.id", | "person a on revision.author=a.id", | ||||
"person c on revision.committer=c.id", | "person c on revision.committer=c.id", | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | with db.cursor() as cur: | ||||
entries.append(entry) | entries.append(entry) | ||||
return Directory(id=directory_d["id"], entries=tuple(entries),) | return Directory(id=directory_d["id"], entries=tuple(entries),) | ||||
def raw_extrinsic_metadata_converter( | def raw_extrinsic_metadata_converter( | ||||
db: BaseDb, metadata: Dict[str, Any] | db: BaseDb, metadata: Dict[str, Any] | ||||
) -> RawExtrinsicMetadata: | ) -> RawExtrinsicMetadata: | ||||
"""Convert revision from the flat representation to swh model | """Convert a raw extrinsic metadata from the flat representation to swh model | ||||
compatible objects. | compatible objects. | ||||
""" | """ | ||||
return db_to_raw_extrinsic_metadata(metadata) | return db_to_raw_extrinsic_metadata(metadata) | ||||
def extid_converter(db: BaseDb, extid: Dict[str, Any]) -> ExtID: | |||||
"""Convert an extid from the flat representation to swh model | |||||
compatible objects. | |||||
""" | |||||
return db_to_extid(extid) | |||||
def revision_converter(db: BaseDb, revision_d: Dict[str, Any]) -> Revision: | def revision_converter(db: BaseDb, revision_d: Dict[str, Any]) -> Revision: | ||||
"""Convert revision from the flat representation to swh model | """Convert revision from the flat representation to swh model | ||||
compatible objects. | compatible objects. | ||||
""" | """ | ||||
revision = db_to_revision(revision_d) | revision = db_to_revision(revision_d) | ||||
assert revision is not None, revision_d["id"] | assert revision is not None, revision_d["id"] | ||||
return revision | return revision | ||||
Show All 37 Lines | with db.cursor() as cur: | ||||
branch = None | branch = None | ||||
branches[name] = branch | branches[name] = branch | ||||
return Snapshot(id=snapshot_d["id"], branches=branches,) | return Snapshot(id=snapshot_d["id"], branches=branches,) | ||||
CONVERTERS: Dict[str, Callable[[BaseDb, Dict[str, Any]], BaseModel]] = { | CONVERTERS: Dict[str, Callable[[BaseDb, Dict[str, Any]], BaseModel]] = { | ||||
"directory": directory_converter, | "directory": directory_converter, | ||||
"extid": extid_converter, | |||||
"raw_extrinsic_metadata": raw_extrinsic_metadata_converter, | "raw_extrinsic_metadata": raw_extrinsic_metadata_converter, | ||||
"revision": revision_converter, | "revision": revision_converter, | ||||
"release": release_converter, | "release": release_converter, | ||||
"snapshot": snapshot_converter, | "snapshot": snapshot_converter, | ||||
} | } | ||||
def object_to_offset(object_id, numbits): | def object_to_offset(object_id, numbits): | ||||
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines | for start in range(start, end, block_size): | ||||
else: | else: | ||||
yield start, start + block_size | yield start, start + block_size | ||||
RANGE_GENERATORS = { | RANGE_GENERATORS = { | ||||
"content": lambda start, end: byte_ranges(24, start, end), | "content": lambda start, end: byte_ranges(24, start, end), | ||||
"skipped_content": lambda start, end: [(None, None)], | "skipped_content": lambda start, end: [(None, None)], | ||||
"directory": lambda start, end: byte_ranges(24, start, end), | "directory": lambda start, end: byte_ranges(24, start, end), | ||||
"extid": lambda start, end: byte_ranges(24, start, end), | |||||
"revision": lambda start, end: byte_ranges(24, start, end), | "revision": lambda start, end: byte_ranges(24, start, end), | ||||
"release": lambda start, end: byte_ranges(16, start, end), | "release": lambda start, end: byte_ranges(16, start, end), | ||||
"snapshot": lambda start, end: byte_ranges(16, start, end), | "snapshot": lambda start, end: byte_ranges(16, start, end), | ||||
"origin": integer_ranges, | "origin": integer_ranges, | ||||
"origin_visit": integer_ranges, | "origin_visit": integer_ranges, | ||||
"origin_visit_status": integer_ranges, | "origin_visit_status": integer_ranges, | ||||
} | } | ||||
▲ Show 20 Lines • Show All 188 Lines • Show Last 20 Lines |