diff --git a/CONTRIBUTORS b/CONTRIBUTORS --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -1,3 +1,4 @@ Daniele Serafini Ishan Bhanuka +Kumar Shivendu Quentin Campos diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -192,6 +192,7 @@ return summary def content_add(self, content: List[Content]) -> Dict: + content = list(set(content)) contents = [attr.evolve(c, ctime=now()) for c in content] return self._content_add(list(contents), with_data=True) @@ -354,6 +355,7 @@ yield {algo: content[algo] for algo in DEFAULT_ALGORITHMS} def directory_add(self, directories: List[Directory]) -> Dict: + directories = list(set(directories)) # Filter out directories that are already inserted. missing = self.directory_missing([dir_.id for dir_ in directories]) directories = [dir_ for dir_ in directories if dir_.id in missing] @@ -479,6 +481,7 @@ def revision_add(self, revisions: List[Revision]) -> Dict: # Filter-out revisions already in the database + revisions = list(set(revisions)) missing = self.revision_missing([rev.id for rev in revisions]) revisions = [rev for rev in revisions if rev.id in missing] self.journal_writer.revision_add(revisions) @@ -589,10 +592,7 @@ return revision.id def release_add(self, releases: List[Release]) -> Dict: - to_add = [] - for rel in releases: - if rel not in to_add: - to_add.append(rel) + to_add = list(set(releases)) missing = set(self.release_missing([rel.id for rel in to_add])) to_add = [rel for rel in to_add if rel.id in missing] @@ -622,6 +622,7 @@ return release.id def snapshot_add(self, snapshots: List[Snapshot]) -> Dict: + snapshots = list(set(snapshots)) missing = self._cql_runner.snapshot_missing([snp.id for snp in snapshots]) snapshots = [snp for snp in snapshots if snp.id in missing] @@ -892,6 +893,7 @@ ) def origin_add(self, origins: List[Origin]) -> Dict[str, int]: + origins = list(set(origins)) to_add = [ori for ori in origins if self.origin_get_one(ori.url) is None] # keep only one occurrence of each given origin while keeping the list # sorted as originally given