Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9342015
D6251.id22676.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
12 KB
Subscribers
None
D6251.id22676.diff
View Options
diff --git a/swh/provenance/mongo/backend.py b/swh/provenance/mongo/backend.py
--- a/swh/provenance/mongo/backend.py
+++ b/swh/provenance/mongo/backend.py
@@ -140,11 +140,14 @@
yield from sorted(occurs, key=lambda x: (x.date, x.revision, x.origin, x.path))
def content_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, datetime]:
+ # FIXME, add index in contnet sha1 and ts
+ # FIXME, do the timezone operation in mongo
return {
x["sha1"]: datetime.fromtimestamp(x["ts"], timezone.utc)
+ # FIXME try to avoid this loop and return directly in the needed format from mongo
for x in self.db.content.find(
- {"sha1": {"$in": list(ids)}, "ts": {"$ne": None}},
- {"sha1": 1, "ts": 1, "_id": 0},
+ {"sha1": {"$in": list(ids)}, "ts": {"$ne": None}},
+ {"sha1": 1, "ts": 1, "_id": 0},
)
}
@@ -173,6 +176,8 @@
def directory_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, datetime]:
return {
x["sha1"]: datetime.fromtimestamp(x["ts"], timezone.utc)
+ # FIXME try to avoid this loop and return directly in the needed format from mongo
+ # FIXME add ts to index in directory
for x in self.db.directory.find(
{"sha1": {"$in": list(ids)}, "ts": {"$ne": None}},
{"sha1": 1, "ts": 1, "_id": 0},
@@ -180,12 +185,17 @@
}
def entity_get_all(self, entity: EntityType) -> Set[Sha1Git]:
- return {
- x["sha1"]
- for x in self.db.get_collection(entity.value).find(
- {}, {"sha1": 1, "_id": 0}
- )
- }
+ # only for tests
+
+ return set(self.db.get_collection(entity.value).distinct('sha1'))
+
+ # return {
+ # x["sha1"]
+ # # FIXME try to avoid this loop and return directly in the needed format from mongo
+ # for x in self.db.get_collection(entity.value).find(
+ # {}, {"sha1": 1, "_id": 0}
+ # )
+ # }
def location_add(self, paths: Iterable[bytes]) -> bool:
# TODO: implement this methods if path are to be stored in a separate collection
@@ -194,6 +204,7 @@
def location_get_all(self) -> Set[bytes]:
contents = self.db.content.find({}, {"revision": 1, "_id": 0, "directory": 1})
paths: List[Iterable[bytes]] = []
+
for content in contents:
paths.extend(value for _, value in content["revision"].items())
paths.extend(value for _, value in content["directory"].items())
@@ -216,9 +227,22 @@
self.db.origin.insert_one({"sha1": sha1, "url": url})
return True
+ # origins = {
+ # x["sha1"]: x
+ # for x in self.db.origin.find(
+ # {"sha1": {"$in": list(urls)}}, {"sha1": 1, "url": 1, "_id": 1}
+ # )
+ # }
+ # for sha1, url in urls.items():
+ # if sha1 not in origins:
+ # # add new origin
+ # self.db.origin.insert_one({"sha1": sha1, "url": url})
+ # return True
+
def origin_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, str]:
return {
x["sha1"]: x["url"]
+ # FIXME try to avoid this loop and return directly in the needed format from mongo
for x in self.db.origin.find(
{"sha1": {"$in": list(ids)}}, {"sha1": 1, "url": 1, "_id": 0}
)
@@ -283,6 +307,7 @@
def relation_add(
self, relation: RelationType, data: Dict[Sha1Git, Set[RelationData]]
) -> bool:
+
src_relation, *_, dst_relation = relation.value.split("_")
dst_objs = {
diff --git a/swh/provenance/mongo/bootstrap.py b/swh/provenance/mongo/bootstrap.py
new file mode 100644
--- /dev/null
+++ b/swh/provenance/mongo/bootstrap.py
@@ -0,0 +1,8 @@
+# FIXME, maybe this should not be part of the code
+
+collections = []
+
+indexes = []
+
+def bootstrap_db():
+ pass
diff --git a/swh/provenance/mongo/errors.py b/swh/provenance/mongo/errors.py
new file mode 100644
--- /dev/null
+++ b/swh/provenance/mongo/errors.py
@@ -0,0 +1,6 @@
+class DBError(Exception):
+ # FIXME, add mongo specific logging
+ pass
+
+class EntiryError(Exception):
+ pass
diff --git a/swh/provenance/mongo/models/cache.py b/swh/provenance/mongo/models/cache.py
new file mode 100644
--- /dev/null
+++ b/swh/provenance/mongo/models/cache.py
@@ -0,0 +1,20 @@
+class Cache:
+ """
+ A object cache layer, now use only sha1 as the key
+ An in memory implementation
+ """
+
+ def __init__(self, data):
+ self.data = self.data
+
+ def _clear(self):
+ self.data = dict()
+
+ def set_obj(self, obj):
+ self.data[obj.sha1] = obj
+
+ def add_data(self, data):
+ pass
+
+ def get(self):
+ self.data.get(key, None)
diff --git a/swh/provenance/mongo/models/content.py b/swh/provenance/mongo/models/content.py
new file mode 100644
--- /dev/null
+++ b/swh/provenance/mongo/models/content.py
@@ -0,0 +1,12 @@
+class Content(Entity):
+ collection = 'content'
+ model = {}
+ validate_model = False
+
+
+ def find_first(sef):
+ pass
+
+
+ def find_all(self):
+ pass
diff --git a/swh/provenance/mongo/models/directory.py b/swh/provenance/mongo/models/directory.py
new file mode 100644
--- /dev/null
+++ b/swh/provenance/mongo/models/directory.py
@@ -0,0 +1,4 @@
+class Directory(Entity):
+ collection = 'directory'
+ model = {}
+ validate_model = False
diff --git a/swh/provenance/mongo/models/entity.py b/swh/provenance/mongo/models/entity.py
new file mode 100644
--- /dev/null
+++ b/swh/provenance/mongo/models/entity.py
@@ -0,0 +1,60 @@
+from abc import ABCMeta
+
+
+class Entity(ABCMeta):
+ """
+ An object saved in the db
+ """
+
+ collection: str
+ model: dict
+ validate_model: bool
+
+ @staticmethod
+ def factory(entity: str):
+ mapping = {
+ 'content': Content
+ 'directory': Directory
+ }
+ if entity in mapping:
+ return entity[mapping]
+ raise EntiryError(f"invalid entity type {entity}")
+
+ def __int__(self, data):
+ self.model = self._load_model()
+ self.data = self._set_data(data)
+
+ def _load_model(self):
+ return {}
+
+ def _set_data(self, data):
+ pass
+
+ def _validate(self):
+ self.db.command() # use json in schema.json
+
+ def save(self):
+ if self.validate_model and self._validate():
+ raise DataError()
+ save()
+
+ def get(self, qry):
+ pass
+
+ def _is_older_in_time(self):
+ return self.data.ts < ts
+
+ def add_if_older(self):
+ if _is_older():
+ self.save()
+
+ def with_excetion_handle(self):
+ pass
+
+
+class EntityList:
+ """
+ List or array of entities and their operations
+ Operate mostly on object cache
+ """
+ pass
diff --git a/swh/provenance/mongo/models/origin.py b/swh/provenance/mongo/models/origin.py
new file mode 100644
--- /dev/null
+++ b/swh/provenance/mongo/models/origin.py
@@ -0,0 +1,4 @@
+class Origin(Entity):
+ collection = 'origin'
+ model = {}
+ validate_model = False
diff --git a/swh/provenance/mongo/models/revision.py b/swh/provenance/mongo/models/revision.py
new file mode 100644
--- /dev/null
+++ b/swh/provenance/mongo/models/revision.py
@@ -0,0 +1,4 @@
+class Revision(Entity):
+ collection = 'revision'
+ model = {}
+ validate_model = False
diff --git a/swh/provenance/mongo/schema.json b/swh/provenance/mongo/schema.json
new file mode 100644
--- /dev/null
+++ b/swh/provenance/mongo/schema.json
@@ -0,0 +1,34 @@
+// The json schema for the first version
+
+{
+ "content": {
+ "bsonType": "object",
+ "required": [ "sha1" ],
+ "properties": {
+ "sha1": {
+ "bsonType": "Binary",
+ "description": ""
+ },
+ }
+ },
+ 'directory': {
+ "bsonType": "object",
+ "required": [ "sha1" ],
+ "properties": {
+ "sha1": {
+ "bsonType": "Binary",
+ "description": ""
+ },
+ }
+ },
+ 'revision': {
+ "bsonType": "object",
+ "required": [ "sha1" ],
+ "properties": {
+ "sha1": {
+ "bsonType": "Binary",
+ "description": ""
+ },
+ }
+ }
+}
diff --git a/swh/provenance/mongo/storage.py b/swh/provenance/mongo/storage.py
new file mode 100644
--- /dev/null
+++ b/swh/provenance/mongo/storage.py
@@ -0,0 +1,104 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from datetime import datetime, timezone
+import os
+from typing import Any, Dict, Generator, Iterable, List, Optional, Set, Union
+
+from bson import ObjectId
+import pymongo.database
+
+from swh.model.model import Sha1Git
+
+from ..interface import (
+ EntityType,
+ ProvenanceResult,
+ RelationData,
+ RelationType,
+ RevisionData,
+)
+
+
+class ProvenanceStorageMongoDb:
+ def __init__(self, db: pymongo.database.Database):
+ self.db = db
+
+ def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]:
+ pass
+
+ def content_find_all(self, id: Sha1Git, limit: Optional[int] = None) -> Generator[ProvenanceResult, None, None]:
+ pass
+
+ def content_add(self, cnts: Union[Iterable[Sha1Git], Dict[Sha1Git, datetime]]) -> bool:
+ for each_cnt in cnts:
+ try:
+ Entity.factory('content')(each_cnt).add_if_older()
+ except DBError as e:
+ # logging and skipping this item
+ # FIXME, add logging, raise if needed
+ pass
+ return True
+
+ def content_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, datetime]:
+ try:
+ contents = Entity.factory('content').get_all_in_list({'sha1': ids})
+ except DBError as e:
+ # logging and returning None
+ # FIXME, add logging, raise if needed
+ return None
+ return Entity.factory('content').dump_list_as_date_dict(contents)
+
+ def directory_add(self, dirs: Union[Iterable[Sha1Git], Dict[Sha1Git, datetime]]) -> bool:
+ for each_cnt in cnts:
+ try:
+ Entity.factory('directory')(each_cnt).add_if_older()
+ except DBError as e:
+ # logging and skipping this item
+ # FIXME, add logging, raise if needed
+ pass
+ return True
+
+ def directory_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, datetime]:
+ try:
+ directories = Entity.factory('directory').get_all_in_list({'sha1': ids})
+ except DBError as e:
+ # logging and returning None
+ # FIXME, add logging, raise if needed
+ return None
+ return Entity.factory('directory').dump_list_as_date_dict(contents)
+
+ def entity_get_all(self, entity: EntityType) -> Set[Sha1Git]:
+ return Entity.factory(entity).get_all_in_list({'sha1': ids})
+
+ def location_add(self, paths: Iterable[bytes]) -> bool:
+ # TODO: implement this methods if path are to be stored in a separate collection
+ return True
+
+ def location_get_all(self) -> Set[bytes]:
+ pass
+
+ def origin_add(self, orgs: Dict[Sha1Git, str]) -> bool:
+ return True
+
+ def origin_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, str]:
+ pass
+
+ def revision_add(self, revs: Union[Iterable[Sha1Git], Dict[Sha1Git, RevisionData]]) -> bool:
+ return True
+
+ def revision_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, RevisionData]:
+ pass
+
+ def relation_add(self, relation: RelationType, data: Dict[Sha1Git, Set[RelationData]]) -> bool:
+ return True
+
+ def relation_get(self, relation: RelationType, ids: Iterable[Sha1Git], reverse: bool = False) -> Dict[Sha1Git, Set[RelationData]]:
+ return pass
+
+ def relation_get_all(self, relation: RelationType) -> Dict[Sha1Git, Set[RelationData]]:
+ pass
+
+ def with_path(self) -> bool:
+ return True
diff --git a/swh/provenance/mongo/tests/__init__.py b/swh/provenance/mongo/tests/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/provenance/mongo/tests/__init__.py
@@ -0,0 +1 @@
+pass
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 12:26 PM (2 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226758
Attached To
D6251: Work in progress. Not for review.
Event Timeline
Log In to Comment