diff --git a/debian/control b/debian/control --- a/debian/control +++ b/debian/control @@ -15,7 +15,7 @@ python3-swh.model (>= 0.0.15~), python3-swh.objstorage (>= 0.0.13~), python3-swh.scheduler (>= 0.0.14~), - python3-swh.storage (>= 0.0.93~), + python3-swh.storage (>= 0.0.100~), python3-vcversioner Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/78/ @@ -26,7 +26,7 @@ python3-swh.model (>= 0.0.15~), python3-swh.objstorage (>= 0.0.13~), python3-swh.scheduler (>= 0.0.14~), - python3-swh.storage (>= 0.0.93~), + python3-swh.storage (>= 0.0.100~), ${misc:Depends}, ${python3:Depends} Description: Software Heritage Content Indexer Storage @@ -38,7 +38,7 @@ python3-swh.model (>= 0.0.15~), python3-swh.objstorage (>= 0.0.13~), python3-swh.scheduler (>= 0.0.14~), - python3-swh.storage (>= 0.0.93~), + python3-swh.storage (>= 0.0.100~), python3-swh.indexer.storage (= ${binary:Version}), universal-ctags (>= 0.8~), fossology-nomossa (>= 3.1~), diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -2,4 +2,4 @@ swh.model >= 0.0.15 swh.objstorage >= 0.0.13 swh.scheduler >= 0.0.14 -swh.storage >= 0.0.93 +swh.storage >= 0.0.100 diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py --- a/swh/indexer/storage/__init__.py +++ b/swh/indexer/storage/__init__.py @@ -57,16 +57,19 @@ """ try: if isinstance(db, psycopg2.extensions.connection): - self.db = Db(db) + self._db = Db(db) else: - self.db = Db.connect(db) + self._db = Db.connect(db) except psycopg2.OperationalError as e: raise StorageDBError(e) + def get_db(self): + return self._db + def check_config(self, *, check_write): """Check that the storage is configured and ready to go.""" # Check permissions on one of the tables - with self.db.transaction() as cur: + with self.get_db().transaction() as cur: if check_write: check = 'INSERT' else: @@ -81,7 +84,7 @@ return True @db_transaction_generator - def content_mimetype_missing(self, mimetypes, cur=None): + def content_mimetype_missing(self, mimetypes, db=None, cur=None): """List mimetypes missing from storage. Args: @@ -96,7 +99,6 @@ tool_version """ - db = self.db db.mktemp_content_mimetype_missing(cur) db.copy_to(mimetypes, 'tmp_content_mimetype_missing', ['id', 'indexer_configuration_id'], @@ -105,7 +107,8 @@ yield obj[0] @db_transaction - def content_mimetype_add(self, mimetypes, conflict_update=False, cur=None): + def content_mimetype_add(self, mimetypes, conflict_update=False, db=None, + cur=None): """Add mimetypes not present in storage. Args: @@ -120,7 +123,6 @@ overwrite (true) or skip duplicates (false, the default) """ - db = self.db db.mktemp_content_mimetype(cur) db.copy_to(mimetypes, 'tmp_content_mimetype', ['id', 'mimetype', 'encoding', 'indexer_configuration_id'], @@ -128,15 +130,14 @@ db.content_mimetype_add_from_temp(conflict_update, cur) @db_transaction_generator - def content_mimetype_get(self, ids, cur=None): - db = self.db + def content_mimetype_get(self, ids, db=None, cur=None): db.store_tmp_bytea(ids, cur) for c in db.content_mimetype_get_from_temp(): yield converters.db_to_mimetype( dict(zip(db.content_mimetype_cols, c))) @db_transaction_generator - def content_language_missing(self, languages, cur=None): + def content_language_missing(self, languages, db=None, cur=None): """List languages missing from storage. Args: @@ -150,7 +151,6 @@ iterable: identifiers of missing languages """ - db = self.db db.mktemp_content_language_missing(cur) db.copy_to(languages, 'tmp_content_language_missing', ['id', 'indexer_configuration_id'], cur) @@ -158,15 +158,15 @@ yield obj[0] @db_transaction_generator - def content_language_get(self, ids, cur=None): - db = self.db + def content_language_get(self, ids, db=None, cur=None): db.store_tmp_bytea(ids, cur) for c in db.content_language_get_from_temp(): yield converters.db_to_language( dict(zip(db.content_language_cols, c))) @db_transaction - def content_language_add(self, languages, conflict_update=False, cur=None): + def content_language_add(self, languages, conflict_update=False, db=None, + cur=None): """Add languages not present in storage. Args: @@ -179,7 +179,6 @@ or skip duplicates (false, the default) """ - db = self.db db.mktemp_content_language(cur) # empty language is mapped to 'unknown' db.copy_to( @@ -194,7 +193,7 @@ db.content_language_add_from_temp(conflict_update, cur) @db_transaction_generator - def content_ctags_missing(self, ctags, cur=None): + def content_ctags_missing(self, ctags, db=None, cur=None): """List ctags missing from storage. Args: @@ -208,8 +207,6 @@ an iterable of missing id """ - db = self.db - db.mktemp_content_ctags_missing(cur) db.copy_to(ctags, tblname='tmp_content_ctags_missing', @@ -219,20 +216,20 @@ yield obj[0] @db_transaction_generator - def content_ctags_get(self, ids, cur=None): + def content_ctags_get(self, ids, db=None, cur=None): """Retrieve ctags per id. Args: ids (iterable): sha1 checksums """ - db = self.db db.store_tmp_bytea(ids, cur) for c in db.content_ctags_get_from_temp(): yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, c))) @db_transaction - def content_ctags_add(self, ctags, conflict_update=False, cur=None): + def content_ctags_add(self, ctags, conflict_update=False, db=None, + cur=None): """Add ctags not present in storage Args: @@ -243,8 +240,6 @@ line, language """ - db = self.db - def _convert_ctags(__ctags): """Convert ctags dict to list of ctags. @@ -263,7 +258,7 @@ @db_transaction_generator def content_ctags_search(self, expression, - limit=10, last_sha1=None, cur=None): + limit=10, last_sha1=None, db=None, cur=None): """Search through content's raw ctags symbols. Args: @@ -275,14 +270,12 @@ rows of ctags including id, name, lang, kind, line, etc... """ - db = self.db - for obj in db.content_ctags_search(expression, last_sha1, limit, cur=cur): yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, obj))) @db_transaction_generator - def content_fossology_license_get(self, ids, cur=None): + def content_fossology_license_get(self, ids, db=None, cur=None): """Retrieve licenses per id. Args: @@ -295,7 +288,6 @@ - licenses ([str]): associated licenses for that content """ - db = self.db db.store_tmp_bytea(ids, cur) for c in db.content_fossology_license_get_from_temp(): @@ -303,8 +295,8 @@ yield converters.db_to_fossology_license(license) @db_transaction - def content_fossology_license_add(self, licenses, - conflict_update=False, cur=None): + def content_fossology_license_add(self, licenses, conflict_update=False, + db=None, cur=None): """Add licenses not present in storage. Args: @@ -321,8 +313,6 @@ list: content_license entries which failed due to unknown licenses """ - db = self.db - # Then, we add the correct ones db.mktemp_content_fossology_license(cur) db.copy_to( @@ -338,7 +328,7 @@ db.content_fossology_license_add_from_temp(conflict_update, cur) @db_transaction_generator - def content_metadata_missing(self, metadatas, cur=None): + def content_metadata_missing(self, metadatas, db=None, cur=None): """List metadatas missing from storage. Args: @@ -352,7 +342,6 @@ iterable: missing ids """ - db = self.db db.mktemp_content_metadata_missing(cur) db.copy_to(metadatas, 'tmp_content_metadata_missing', ['id', 'indexer_configuration_id'], cur) @@ -360,15 +349,15 @@ yield obj[0] @db_transaction_generator - def content_metadata_get(self, ids, cur=None): - db = self.db + def content_metadata_get(self, ids, db=None, cur=None): db.store_tmp_bytea(ids, cur) for c in db.content_metadata_get_from_temp(): yield converters.db_to_metadata( dict(zip(db.content_metadata_cols, c))) @db_transaction - def content_metadata_add(self, metadatas, conflict_update=False, cur=None): + def content_metadata_add(self, metadatas, conflict_update=False, db=None, + cur=None): """Add metadatas not present in storage. Args: @@ -381,7 +370,6 @@ or skip duplicates (false, the default) """ - db = self.db db.mktemp_content_metadata(cur) # empty metadata is mapped to 'unknown' @@ -391,7 +379,7 @@ db.content_metadata_add_from_temp(conflict_update, cur) @db_transaction_generator - def revision_metadata_missing(self, metadatas, cur=None): + def revision_metadata_missing(self, metadatas, db=None, cur=None): """List metadatas missing from storage. Args: @@ -405,7 +393,6 @@ iterable: missing ids """ - db = self.db db.mktemp_revision_metadata_missing(cur) db.copy_to(metadatas, 'tmp_revision_metadata_missing', ['id', 'indexer_configuration_id'], cur) @@ -413,16 +400,15 @@ yield obj[0] @db_transaction_generator - def revision_metadata_get(self, ids, cur=None): - db = self.db + def revision_metadata_get(self, ids, db=None, cur=None): db.store_tmp_bytea(ids, cur) for c in db.revision_metadata_get_from_temp(): yield converters.db_to_metadata( dict(zip(db.revision_metadata_cols, c))) @db_transaction - def revision_metadata_add(self, metadatas, - conflict_update=False, cur=None): + def revision_metadata_add(self, metadatas, conflict_update=False, db=None, + cur=None): """Add metadatas not present in storage. Args: @@ -435,7 +421,6 @@ or skip duplicates (false, the default) """ - db = self.db db.mktemp_revision_metadata(cur) # empty metadata is mapped to 'unknown' @@ -446,7 +431,7 @@ @db_transaction def origin_metadata_add(self, origin_id, ts, provider, tool, metadata, - cur=None): + db=None, cur=None): """ Add an origin_metadata for the origin at ts with provenance and metadata. @@ -463,11 +448,12 @@ if isinstance(ts, str): ts = dateutil.parser.parse(ts) - return self.db.origin_metadata_add(origin_id, ts, provider, tool, - metadata, cur) + return db.origin_metadata_add(origin_id, ts, provider, tool, + metadata, cur) @db_transaction_generator - def origin_metadata_get_by(self, origin_id, provider_type=None, cur=None): + def origin_metadata_get_by(self, origin_id, provider_type=None, db=None, + cur=None): """Retrieve list of all origin_metadata entries for the origin_id Args: @@ -488,12 +474,11 @@ - provider_url (str) """ - db = self.db for line in db.origin_metadata_get_by(origin_id, provider_type, cur): yield dict(zip(db.origin_metadata_get_cols, line)) @db_transaction_generator - def indexer_configuration_add(self, tools, cur=None): + def indexer_configuration_add(self, tools, db=None, cur=None): """Add new tools to the storage. Args: @@ -511,7 +496,6 @@ the order of the initial list. """ - db = self.db db.mktemp_indexer_configuration(cur) db.copy_to(tools, 'tmp_indexer_configuration', ['tool_name', 'tool_version', 'tool_configuration'], @@ -522,7 +506,7 @@ yield dict(zip(db.indexer_configuration_cols, line)) @db_transaction - def indexer_configuration_get(self, tool, cur=None): + def indexer_configuration_get(self, tool, db=None, cur=None): """Retrieve tool information. Args: @@ -538,7 +522,6 @@ The identifier of the tool if it exists, None otherwise. """ - db = self.db tool_conf = tool['tool_configuration'] if isinstance(tool_conf, dict): tool_conf = json.dumps(tool_conf) @@ -547,4 +530,4 @@ tool_conf) if not idx: return None - return dict(zip(self.db.indexer_configuration_cols, idx)) + return dict(zip(db.indexer_configuration_cols, idx))