Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/postgresql/storage.py
Show First 20 Lines • Show All 1,202 Lines • ▼ Show 20 Lines | ) -> int: | ||||
return db.origin_count(url_pattern, regexp, with_visit, cur) | return db.origin_count(url_pattern, regexp, with_visit, cur) | ||||
@timed | @timed | ||||
@process_metrics | @process_metrics | ||||
@db_transaction() | @db_transaction() | ||||
def origin_add(self, origins: List[Origin], db=None, cur=None) -> Dict[str, int]: | def origin_add(self, origins: List[Origin], db=None, cur=None) -> Dict[str, int]: | ||||
urls = [o.url for o in origins] | urls = [o.url for o in origins] | ||||
known_origins = set(url for (url,) in db.origin_get_by_url(urls, cur)) | known_origins = set(url for (url,) in db.origin_get_by_url(urls, cur)) | ||||
# use lists here to keep origins sorted; some tests depend on this | # keep only one occurrence of each given origin while keeping the list | ||||
to_add = [url for url in urls if url not in known_origins] | # sorted as originally given | ||||
to_add = sorted(set(urls) - known_origins, key=urls.index) | |||||
ardumont: keeping | |||||
self.journal_writer.origin_add([Origin(url=url) for url in to_add]) | self.journal_writer.origin_add([Origin(url=url) for url in to_add]) | ||||
added = 0 | added = 0 | ||||
for url in to_add: | for url in to_add: | ||||
if db.origin_add(url, cur): | if db.origin_add(url, cur): | ||||
added += 1 | added += 1 | ||||
return {"origin:add": added} | return {"origin:add": added} | ||||
▲ Show 20 Lines • Show All 210 Lines • Show Last 20 Lines |
keeping