Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/mongo/backend.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from __future__ import annotations | |||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
import os | import os | ||||
from typing import Any, Dict, Generator, Iterable, List, Optional, Set, Union | from types import TracebackType | ||||
from typing import Any, Dict, Generator, Iterable, List, Optional, Set, Type, Union | |||||
from bson import ObjectId | from bson import ObjectId | ||||
import pymongo.database | import mongomock | ||||
import pymongo | |||||
from swh.model.model import Sha1Git | from swh.model.model import Sha1Git | ||||
from ..interface import ( | from ..interface import ( | ||||
EntityType, | EntityType, | ||||
ProvenanceResult, | ProvenanceResult, | ||||
ProvenanceStorageInterface, | |||||
RelationData, | RelationData, | ||||
RelationType, | RelationType, | ||||
RevisionData, | RevisionData, | ||||
) | ) | ||||
class ProvenanceStorageMongoDb: | class ProvenanceStorageMongoDb: | ||||
def __init__(self, db: pymongo.database.Database): | def __init__(self, engine: str, **kwargs): | ||||
self.db = db | self.engine = engine | ||||
self.dbname = kwargs.pop("dbname") | |||||
self.conn_args = kwargs | |||||
def __enter__(self) -> ProvenanceStorageInterface: | |||||
self.open() | |||||
return self | |||||
def __exit__( | |||||
self, | |||||
exc_type: Optional[Type[BaseException]], | |||||
exc_val: Optional[BaseException], | |||||
exc_tb: Optional[TracebackType], | |||||
) -> None: | |||||
self.close() | |||||
def close(self) -> None: | |||||
self.db.client.close() | |||||
def content_add( | def content_add( | ||||
self, cnts: Union[Iterable[Sha1Git], Dict[Sha1Git, Optional[datetime]]] | self, cnts: Union[Iterable[Sha1Git], Dict[Sha1Git, Optional[datetime]]] | ||||
) -> bool: | ) -> bool: | ||||
data = cnts if isinstance(cnts, dict) else dict.fromkeys(cnts) | data = cnts if isinstance(cnts, dict) else dict.fromkeys(cnts) | ||||
existing = { | existing = { | ||||
x["sha1"]: x | x["sha1"]: x | ||||
for x in self.db.content.find( | for x in self.db.content.find( | ||||
▲ Show 20 Lines • Show All 163 Lines • ▼ Show 20 Lines | def location_get_all(self) -> Set[bytes]: | ||||
paths.extend(value for _, value in content["revision"].items()) | paths.extend(value for _, value in content["revision"].items()) | ||||
paths.extend(value for _, value in content["directory"].items()) | paths.extend(value for _, value in content["directory"].items()) | ||||
dirs = self.db.directory.find({}, {"revision": 1, "_id": 0}) | dirs = self.db.directory.find({}, {"revision": 1, "_id": 0}) | ||||
for each_dir in dirs: | for each_dir in dirs: | ||||
paths.extend(value for _, value in each_dir["revision"].items()) | paths.extend(value for _, value in each_dir["revision"].items()) | ||||
return set(sum(paths, [])) | return set(sum(paths, [])) | ||||
def open(self) -> None: | |||||
if self.engine == "mongomock": | |||||
self.db = mongomock.MongoClient(**self.conn_args).get_database(self.dbname) | |||||
else: | |||||
# assume real MongoDB server by default | |||||
self.db = pymongo.MongoClient(**self.conn_args).get_database(self.dbname) | |||||
def origin_add(self, orgs: Dict[Sha1Git, str]) -> bool: | def origin_add(self, orgs: Dict[Sha1Git, str]) -> bool: | ||||
existing = { | existing = { | ||||
x["sha1"]: x | x["sha1"]: x | ||||
for x in self.db.origin.find( | for x in self.db.origin.find( | ||||
{"sha1": {"$in": list(orgs)}}, {"sha1": 1, "url": 1, "_id": 1} | {"sha1": {"$in": list(orgs)}}, {"sha1": 1, "url": 1, "_id": 1} | ||||
) | ) | ||||
} | } | ||||
for sha1, url in orgs.items(): | for sha1, url in orgs.items(): | ||||
▲ Show 20 Lines • Show All 247 Lines • Show Last 20 Lines |