diff --git a/debian/control b/debian/control --- a/debian/control +++ b/debian/control @@ -13,7 +13,7 @@ python3-requests, python3-setuptools, python3-swh.core (>= 0.0.28~), - python3-swh.model (>= 0.0.14~), + python3-swh.model (>= 0.0.15~), python3-swh.objstorage (>= 0.0.17~), python3-swh.scheduler (>= 0.0.11~), python3-vcversioner @@ -23,7 +23,7 @@ Package: python3-swh.storage Architecture: all Depends: python3-swh.core (>= 0.0.28~), - python3-swh.model (>= 0.0.14~), + python3-swh.model (>= 0.0.15~), python3-swh.objstorage (>= 0.0.17~), ${misc:Depends}, ${python3:Depends} @@ -31,7 +31,7 @@ Package: python3-swh.storage.listener Architecture: all -Depends: python3-swh.journal, +Depends: python3-swh.journal (>= 0.0.2~), python3-kafka (>= 1.3.1~), python3-swh.storage (= ${binary:Version}), ${misc:Depends}, diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,4 +1,5 @@ swh.core >= 0.0.28 -swh.model >= 0.0.14 +swh.model >= 0.0.15 swh.objstorage >= 0.0.17 swh.scheduler >= 0.0.11 +swh.journal >= 0.0.2 diff --git a/sql/swh-func.sql b/sql/swh-func.sql --- a/sql/swh-func.sql +++ b/sql/swh-func.sql @@ -135,9 +135,10 @@ -- uniquely identify content, for the purpose of verifying if we already have -- some content or not during content injection create type content_signature as ( - sha1 sha1, - sha1_git sha1_git, - sha256 sha256 + sha1 sha1, + sha1_git sha1_git, + sha256 sha256, + blake2s256 blake2s256 ); @@ -151,10 +152,12 @@ as $$ begin return query ( - select sha1, sha1_git, sha256 from tmp_content as tmp + select sha1, sha1_git, sha256, blake2s256 from tmp_content as tmp where not exists ( select 1 from content as c - where c.sha1 = tmp.sha1 and c.sha1_git = tmp.sha1_git and c.sha256 = tmp.sha256 + where c.sha1 = tmp.sha1 and + c.sha1_git = tmp.sha1_git and + c.sha256 = tmp.sha256 ) ); return; @@ -189,7 +192,7 @@ as $$ begin return query - select sha1, sha1_git, sha256 from tmp_skipped_content t + select sha1, sha1_git, sha256, blake2s256 from tmp_skipped_content t where not exists (select 1 from skipped_content s where s.sha1 is not distinct from t.sha1 and @@ -210,9 +213,10 @@ -- (e.g., for the web app), for batch lookup of missing content (e.g., to be -- added) see swh_content_missing create or replace function swh_content_find( - sha1 sha1 default NULL, - sha1_git sha1_git default NULL, - sha256 sha256 default NULL + sha1 sha1 default NULL, + sha1_git sha1_git default NULL, + sha256 sha256 default NULL, + blake2s256 blake2s256 default NULL ) returns content language plpgsql @@ -231,12 +235,15 @@ if sha256 is not null then filters := filters || format('sha256 = %L', sha256); end if; + if blake2s256 is not null then + filters := filters || format('blake2s256 = %L', blake2s256); + end if; if cardinality(filters) = 0 then return null; else q = format('select * from content where %s', - array_to_string(filters, ' and ')); + array_to_string(filters, ' and ')); execute q into con; return con; end if; @@ -253,10 +260,10 @@ language plpgsql as $$ begin - insert into content (sha1, sha1_git, sha256, length, status) - select distinct sha1, sha1_git, sha256, length, status + insert into content (sha1, sha1_git, sha256, blake2s256, length, status) + select distinct sha1, sha1_git, sha256, blake2s256, length, status from tmp_content - where (sha1, sha1_git, sha256) in + where (sha1, sha1_git, sha256, blake2s256) in (select * from swh_content_missing()); -- TODO XXX use postgres 9.5 "UPSERT" support here, when available. -- Specifically, using "INSERT .. ON CONFLICT IGNORE" we can avoid @@ -275,11 +282,11 @@ language plpgsql as $$ begin - insert into skipped_content (sha1, sha1_git, sha256, length, status, reason, origin) - select distinct sha1, sha1_git, sha256, length, status, reason, origin + insert into skipped_content (sha1, sha1_git, sha256, blake2s256, length, status, reason, origin) + select distinct sha1, sha1_git, sha256, blake2s256, length, status, reason, origin from tmp_skipped_content - where (coalesce(sha1, ''), coalesce(sha1_git, ''), coalesce(sha256, '')) in - (select coalesce(sha1, ''), coalesce(sha1_git, ''), coalesce(sha256, '') from swh_skipped_content_missing()); + where (coalesce(sha1, ''), coalesce(sha1_git, ''), coalesce(sha256, ''), coalesce(blake2s256, '')) in + (select coalesce(sha1, ''), coalesce(sha1_git, ''), coalesce(sha256, ''), coalesce(blake2s256, '') from swh_skipped_content_missing()); -- TODO XXX use postgres 9.5 "UPSERT" support here, when available. -- Specifically, using "INSERT .. ON CONFLICT IGNORE" we can avoid -- the extra swh_content_missing() query here. @@ -287,7 +294,6 @@ end $$; - -- Update content entries from temporary table. -- (columns are potential new columns added to the schema, this cannot be empty) -- diff --git a/sql/swh-indexes.sql b/sql/swh-indexes.sql --- a/sql/swh-indexes.sql +++ b/sql/swh-indexes.sql @@ -2,7 +2,8 @@ create unique index concurrently content_pkey on content(sha1); create unique index concurrently on content(sha1_git); -create unique index concurrently on content(sha256); +create index concurrently on content(sha256); +create index concurrently on content(blake2s256); create index concurrently on content(ctime); -- TODO use a BRIN index here (postgres >= 9.5) create index concurrently on content(object_id); @@ -77,7 +78,8 @@ create unique index concurrently on skipped_content(sha1); create unique index concurrently on skipped_content(sha1_git); -create unique index concurrently on skipped_content(sha256); +create index concurrently on skipped_content(sha256); +create index concurrently on skipped_content(blake2s256); create index concurrently on skipped_content(object_id); alter table skipped_content add constraint skipped_content_origin_fkey foreign key (origin) references origin(id) not valid; diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql --- a/sql/swh-schema.sql +++ b/sql/swh-schema.sql @@ -14,7 +14,7 @@ ); insert into dbversion(version, release, description) - values(103, now(), 'Work In Progress'); + values(104, now(), 'Work In Progress'); -- a SHA1 checksum (not necessarily originating from Git) create domain sha1 as bytea check (length(value) = 20); @@ -25,6 +25,9 @@ -- a SHA256 checksum create domain sha256 as bytea check (length(value) = 32); +-- a blake2 checksum +create domain blake2s256 as bytea check (length(value) = 32); + -- UNIX path (absolute, relative, individual path component, etc.) create domain unix_path as bytea; @@ -37,14 +40,15 @@ -- content collisions not knowingly. create table content ( - sha1 sha1 not null, - sha1_git sha1_git not null, - sha256 sha256 not null, - length bigint not null, - ctime timestamptz not null default now(), - -- creation time, i.e. time of (first) injection into the storage - status content_status not null default 'visible', - object_id bigserial + sha1 sha1 not null, + sha1_git sha1_git not null, + sha256 sha256 not null, + blake2s256 blake2s256, + length bigint not null, + ctime timestamptz not null default now(), + -- creation time, i.e. time of (first) injection into the storage + status content_status not null default 'visible', + object_id bigserial ); @@ -165,15 +169,16 @@ -- out which origin contains that skipped content. create table skipped_content ( - sha1 sha1, - sha1_git sha1_git, - sha256 sha256, - length bigint not null, - ctime timestamptz not null default now(), - status content_status not null default 'absent', - reason text not null, - origin bigint, - object_id bigserial + sha1 sha1, + sha1_git sha1_git, + sha256 sha256, + blake2s256 blake2s256, + length bigint not null, + ctime timestamptz not null default now(), + status content_status not null default 'absent', + reason text not null, + origin bigint, + object_id bigserial ); -- Log of all origin fetches (i.e., origin crawling) that have been done in the diff --git a/sql/swh-triggers.sql b/sql/swh-triggers.sql --- a/sql/swh-triggers.sql +++ b/sql/swh-triggers.sql @@ -7,7 +7,8 @@ perform pg_notify('new_content', json_build_object( 'sha1', encode(new.sha1, 'hex'), 'sha1_git', encode(new.sha1_git, 'hex'), - 'sha256', encode(new.sha256, 'hex') + 'sha256', encode(new.sha256, 'hex'), + 'blake2s256', encode(new.blake2s256, 'hex') )::text); return null; end; @@ -45,7 +46,8 @@ perform pg_notify('new_skipped_content', json_build_object( 'sha1', encode(new.sha1, 'hex'), 'sha1_git', encode(new.sha1_git, 'hex'), - 'sha256', encode(new.sha256, 'hex') + 'sha256', encode(new.sha256, 'hex'), + 'blake2s256', encode(new.blake2s256, 'hex') )::text); return null; end; diff --git a/sql/upgrades/104.sql b/sql/upgrades/104.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/104.sql @@ -0,0 +1,163 @@ +-- SWH DB schema upgrade +-- from_version: 103 +-- to_version: 104 +-- description: Compute new hash blake2s256 + +insert into dbversion(version, release, description) + values(104, now(), 'Work In Progress'); + +DROP FUNCTION swh_content_find(sha1 sha1, sha1_git sha1_git, sha256 sha256); + +DROP INDEX content_sha256_idx; + +DROP INDEX skipped_content_sha256_idx; + +create domain blake2s256 as bytea check (length(value) = 32); + +ALTER TABLE content + ADD COLUMN blake2s256 blake2s256; + +ALTER TABLE skipped_content + ADD COLUMN blake2s256 blake2s256; + +CREATE OR REPLACE FUNCTION notify_new_content() RETURNS trigger + LANGUAGE plpgsql + AS $$ + begin + perform pg_notify('new_content', json_build_object( + 'sha1', encode(new.sha1, 'hex'), + 'sha1_git', encode(new.sha1_git, 'hex'), + 'sha256', encode(new.sha256, 'hex'), + 'blake2s256', encode(new.blake2s256, 'hex') + )::text); + return null; + end; +$$; + +CREATE OR REPLACE FUNCTION notify_new_skipped_content() RETURNS trigger + LANGUAGE plpgsql + AS $$ + begin + perform pg_notify('new_skipped_content', json_build_object( + 'sha1', encode(new.sha1, 'hex'), + 'sha1_git', encode(new.sha1_git, 'hex'), + 'sha256', encode(new.sha256, 'hex'), + 'blake2s256', encode(new.blake2s256, 'hex') + )::text); + return null; + end; +$$; + +CREATE OR REPLACE FUNCTION swh_content_add() RETURNS void + LANGUAGE plpgsql + AS $$ +begin + insert into content (sha1, sha1_git, sha256, blake2s256, length, status) + select distinct sha1, sha1_git, sha256, blake2s256, length, status + from tmp_content + where (sha1, sha1_git, sha256, blake2s256) in + (select * from swh_content_missing()); + -- TODO XXX use postgres 9.5 "UPSERT" support here, when available. + -- Specifically, using "INSERT .. ON CONFLICT IGNORE" we can avoid + -- the extra swh_content_missing() query here. + return; +end +$$; + +CREATE OR REPLACE FUNCTION swh_content_find(sha1 sha1 = NULL::bytea, sha1_git sha1_git = NULL::bytea, sha256 sha256 = NULL::bytea, blake2s256 blake2s256 = NULL::bytea) RETURNS content + LANGUAGE plpgsql + AS $$ +declare + con content; + filters text[] := array[] :: text[]; -- AND-clauses used to filter content + q text; +begin + if sha1 is not null then + filters := filters || format('sha1 = %L', sha1); + end if; + if sha1_git is not null then + filters := filters || format('sha1_git = %L', sha1_git); + end if; + if sha256 is not null then + filters := filters || format('sha256 = %L', sha256); + end if; + if blake2s256 is not null then + filters := filters || format('blake2s256 = %L', blake2s256); + end if; + + if cardinality(filters) = 0 then + return null; + else + q = format('select * from content where %s', + array_to_string(filters, ' and ')); + execute q into con; + return con; + end if; +end +$$; + +drop type content_signature cascade; + +create type content_signature as ( + sha1 sha1, + sha1_git sha1_git, + sha256 sha256, + blake2s256 blake2s256 +); + + +CREATE OR REPLACE FUNCTION swh_content_missing() RETURNS SETOF content_signature + LANGUAGE plpgsql + AS $$ +begin + return query ( + select sha1, sha1_git, sha256, blake2s256 from tmp_content as tmp + where not exists ( + select 1 from content as c + where c.sha1 = tmp.sha1 and + c.sha1_git = tmp.sha1_git and + c.sha256 = tmp.sha256 + ) + ); + return; +end +$$; + +CREATE OR REPLACE FUNCTION swh_skipped_content_add() RETURNS void + LANGUAGE plpgsql + AS $$ +begin + insert into skipped_content (sha1, sha1_git, sha256, blake2s256, length, status, reason, origin) + select distinct sha1, sha1_git, sha256, blake2s256, length, status, reason, origin + from tmp_skipped_content + where (coalesce(sha1, ''), coalesce(sha1_git, ''), coalesce(sha256, ''), coalesce(blake2s256, '') in + (select coalesce(sha1, ''), coalesce(sha1_git, ''), coalesce(sha256, ''), coalesce(blake2s256, '') from swh_skipped_content_missing()); + -- TODO XXX use postgres 9.5 "UPSERT" support here, when available. + -- Specifically, using "INSERT .. ON CONFLICT IGNORE" we can avoid + -- the extra swh_content_missing() query here. + return; +end +$$; + +CREATE OR REPLACE FUNCTION swh_skipped_content_missing() RETURNS SETOF content_signature + LANGUAGE plpgsql + AS $$ +begin + return query + select sha1, sha1_git, sha256, blake2s256 from tmp_skipped_content t + where not exists + (select 1 from skipped_content s where + s.sha1 is not distinct from t.sha1 and + s.sha1_git is not distinct from t.sha1_git and + s.sha256 is not distinct from t.sha256); + return; +end +$$; + +CREATE INDEX content_blake2s256_idx ON content USING btree (blake2s256); + +CREATE INDEX content_sha256_idx ON content USING btree (sha256); + +CREATE INDEX skipped_content_blake2s256_idx ON skipped_content USING btree (blake2s256); + +CREATE INDEX skipped_content_sha256_idx ON skipped_content USING btree (sha256); diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -268,8 +268,12 @@ cur.execute("""select swh_content_update(ARRAY[%s] :: text[])""" % keys_to_update) - content_get_metadata_keys = ['sha1', 'sha1_git', 'sha256', 'length', - 'status'] + content_get_metadata_keys = [ + 'sha1', 'sha1_git', 'sha256', 'blake2s256', 'length', 'status'] + + skipped_content_keys = [ + 'sha1', 'sha1_git', 'sha256', 'blake2s256', + 'length', 'reason', 'status', 'origin'] def content_get_metadata_from_temp(self, cur=None): cur = self._cursor(cur) @@ -282,7 +286,7 @@ def content_missing_from_temp(self, cur=None): cur = self._cursor(cur) - cur.execute("""SELECT sha1, sha1_git, sha256 + cur.execute("""SELECT sha1, sha1_git, sha256, blake2s256 FROM swh_content_missing()""") yield from cursor_to_bytes(cur) @@ -298,7 +302,7 @@ def skipped_content_missing_from_temp(self, cur=None): cur = self._cursor(cur) - cur.execute("""SELECT sha1, sha1_git, sha256 + cur.execute("""SELECT sha1, sha1_git, sha256, blake2s256 FROM swh_skipped_content_missing()""") yield from cursor_to_bytes(cur) @@ -319,24 +323,30 @@ yield from cursor_to_bytes(cur) - def content_find(self, sha1=None, sha1_git=None, sha256=None, cur=None): + content_find_cols = ['sha1', 'sha1_git', 'sha256', 'blake2s256', 'length', + 'ctime', 'status'] + + def content_find(self, sha1=None, sha1_git=None, sha256=None, + blake2s256=None, cur=None): """Find the content optionally on a combination of the following - checksums sha1, sha1_git or sha256. + checksums sha1, sha1_git, sha256 or blake2s256. Args: sha1: sha1 content git_sha1: the sha1 computed `a la git` sha1 of the content sha256: sha256 content + blake2s256: blake2s256 content Returns: - The triplet (sha1, sha1_git, sha256) if found or None. + The tuple (sha1, sha1_git, sha256, blake2s256) if found or None. """ cur = self._cursor(cur) - cur.execute("""SELECT sha1, sha1_git, sha256, length, ctime, status - FROM swh_content_find(%s, %s, %s) - LIMIT 1""", (sha1, sha1_git, sha256)) + cur.execute("""SELECT %s + FROM swh_content_find(%%s, %%s, %%s, %%s) + LIMIT 1""" % ','.join(self.content_find_cols), + (sha1, sha1_git, sha256, blake2s256)) content = line_to_bytes(cur.fetchone()) if set(content) == {None}: diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -23,6 +23,9 @@ BULK_BLOCK_CONTENT_LEN_MAX = 10000 +CONTENT_HASH_KEYS = ['sha1', 'sha1_git', 'sha256', 'blake2s256'] + + class Storage(): """SWH storage proxy, encompassing DB and object storage @@ -89,6 +92,15 @@ """ db = self.db + def _unique_key(hash, keys=CONTENT_HASH_KEYS): + """Given a hash (tuple or dict), return a unique key from the + aggregation of keys. + + """ + if isinstance(hash, tuple): + return hash + return tuple([hash[k] for k in keys]) + content_by_status = defaultdict(list) for d in content: if 'status' not in d: @@ -101,9 +113,9 @@ content_without_data = content_by_status['absent'] missing_content = set(self.content_missing(content_with_data)) - missing_skipped = set( - sha1_git for sha1, sha1_git, sha256 - in self.skipped_content_missing(content_without_data)) + missing_skipped = set(_unique_key(hashes) for hashes + in self.skipped_content_missing( + content_without_data)) with db.transaction() as cur: if missing_content: @@ -118,19 +130,19 @@ if cont['sha1'] in missing_content) db.copy_to(content_filtered, 'tmp_content', - ['sha1', 'sha1_git', 'sha256', 'length', 'status'], + db.content_get_metadata_keys, cur, item_cb=add_to_objstorage) # move metadata in place db.content_add_from_temp(cur) if missing_skipped: - missing_filtered = (cont for cont in content_without_data - if cont['sha1_git'] in missing_skipped) + missing_filtered = [cont for cont in content_without_data + if _unique_key(cont) in missing_skipped] + db.mktemp('skipped_content', cur) db.copy_to(missing_filtered, 'tmp_skipped_content', - ['sha1', 'sha1_git', 'sha256', 'length', - 'reason', 'status', 'origin'], cur) + db.skipped_content_keys, cur) # move metadata in place db.skipped_content_add_from_temp(cur) @@ -232,9 +244,9 @@ """ db = self.db - keys = ['sha1', 'sha1_git', 'sha256'] + keys = CONTENT_HASH_KEYS - if key_hash not in keys: + if key_hash not in CONTENT_HASH_KEYS: raise ValueError("key_hash should be one of %s" % keys) key_hash_idx = keys.index(key_hash) @@ -278,7 +290,7 @@ Returns: an iterable of signatures missing from the storage """ - keys = ['sha1', 'sha1_git', 'sha256'] + keys = CONTENT_HASH_KEYS db = self.db @@ -310,15 +322,15 @@ if not set(content).intersection(ALGORITHMS): raise ValueError('content keys must contain at least one of: ' - 'sha1, sha1_git, sha256') + 'sha1, sha1_git, sha256, blake2s256') c = db.content_find(sha1=content.get('sha1'), sha1_git=content.get('sha1_git'), sha256=content.get('sha256'), + blake2s256=content.get('blake2s256'), cur=cur) if c: - keys = ['sha1', 'sha1_git', 'sha256', 'length', 'ctime', 'status'] - return dict(zip(keys, c)) + return dict(zip(db.content_find_cols, c)) return None @db_transaction_generator diff --git a/swh/storage/tests/test_db.py b/swh/storage/tests/test_db.py --- a/swh/storage/tests/test_db.py +++ b/swh/storage/tests/test_db.py @@ -43,9 +43,11 @@ 'sha256': hash_to_bytes( '673650f936cb3b0a2f93ce09d81be107' '48b1b203c19e8176b4eefc1964a0cf3a'), + 'blake2s256': hash_to_bytes('69217a3079908094e11121d042354a7c' + '1f55b6482ca1a51e1b250dfd1ed0eef9'), 'length': 3}], 'tmp_content', - ['sha1', 'sha1_git', 'sha256', 'length'], + ['sha1', 'sha1_git', 'sha256', 'blake2s256', 'length'], cur) self.db.content_add_from_temp(cur) self.cursor.execute('SELECT sha1 FROM content WHERE sha1 = %s', diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -65,6 +65,8 @@ 'sha256': hash_to_bytes( '673650f936cb3b0a2f93ce09d81be107' '48b1b203c19e8176b4eefc1964a0cf3a'), + 'blake2s256': hash_to_bytes('d5fe1939576527e42cfd76a9455a2' + '432fe7f56669564577dd93c4280e76d661d'), 'status': 'visible', } @@ -78,6 +80,8 @@ 'sha256': hash_to_bytes( '859f0b154fdb2d630f45e1ecae4a8629' '15435e663248bb8461d914696fc047cd'), + 'blake2s256': hash_to_bytes('849c20fad132b7c2d62c15de310adfe87be' + '94a379941bed295e8141c6219810d'), 'status': 'visible', } @@ -91,6 +95,8 @@ 'sha256': hash_to_bytes( '92fb72daf8c6818288a35137b72155f5' '07e5de8d892712ab96277aaed8cf8a36'), + 'blake2s256': hash_to_bytes('76d0346f44e5a27f6bafdd9c2befd304af' + 'f83780f93121d801ab6a1d4769db11'), 'status': 'visible', } @@ -104,6 +110,8 @@ 'sha256': hash_to_bytes( '6bbd052ab054ef222c1c87be60cd191a' 'ddedd24cc882d1f5f7f7be61dc61bb3a'), + 'blake2s256': hash_to_bytes('306856b8fd879edb7b6f1aeaaf8db9bbecc9' + '93cd7f776c333ac3a782fa5c6eba'), 'status': 'absent', } @@ -111,6 +119,14 @@ 'length': 1024 * 1024 * 200, 'sha1_git': hash_to_bytes( '33e45d56f88993aae6a0198013efa80716fd8920'), + 'sha1': hash_to_bytes( + '43e45d56f88993aae6a0198013efa80716fd8920'), + 'sha256': hash_to_bytes( + '7bbd052ab054ef222c1c87be60cd191a' + 'ddedd24cc882d1f5f7f7be61dc61bb3a'), + 'blake2s256': hash_to_bytes( + 'ade18b1adecb33f891ca36664da676e1' + '2c772cc193778aac9a137b8dc5834b9b'), 'reason': 'Content too long', 'status': 'absent', } @@ -118,7 +134,15 @@ self.skipped_cont2 = { 'length': 1024 * 1024 * 300, 'sha1_git': hash_to_bytes( - '33e45d56f88993aae6a0198013efa80716fd8921'), + '44e45d56f88993aae6a0198013efa80716fd8921'), + 'sha1': hash_to_bytes( + '54e45d56f88993aae6a0198013efa80716fd8920'), + 'sha256': hash_to_bytes( + '8cbd052ab054ef222c1c87be60cd191a' + 'ddedd24cc882d1f5f7f7be61dc61bb3a'), + 'blake2s256': hash_to_bytes( + '9ce18b1adecb33f891ca36664da676e1' + '2c772cc193778aac9a137b8dc5834b9b'), 'reason': 'Content too long', 'status': 'absent', } @@ -621,28 +645,36 @@ @istest def skipped_content_add(self): - cont = self.skipped_cont - cont2 = self.skipped_cont2 + cont = self.skipped_cont.copy() + cont2 = self.skipped_cont2.copy() + cont2['blake2s256'] = None - self.storage.content_add([cont]) - self.storage.content_add([cont2]) + self.storage.content_add([cont, cont, cont2]) - self.cursor.execute('SELECT sha1, sha1_git, sha256, length, status,' - 'reason FROM skipped_content ORDER BY sha1_git') + self.cursor.execute('SELECT sha1, sha1_git, sha256, blake2s256, ' + 'length, status, reason ' + 'FROM skipped_content ORDER BY sha1_git') - datum = self.cursor.fetchone() + datums = self.cursor.fetchall() + + self.assertEquals(2, len(datums)) + datum = datums[0] self.assertEqual( - (datum[0], datum[1].tobytes(), datum[2], - datum[3], datum[4], datum[5]), - (None, cont['sha1_git'], None, - cont['length'], 'absent', 'Content too long')) + (datum[0].tobytes(), datum[1].tobytes(), datum[2].tobytes(), + datum[3].tobytes(), datum[4], datum[5], datum[6]), + (cont['sha1'], cont['sha1_git'], cont['sha256'], + cont['blake2s256'], cont['length'], 'absent', + 'Content too long') + ) - datum2 = self.cursor.fetchone() + datum2 = datums[1] self.assertEqual( - (datum2[0], datum2[1].tobytes(), datum2[2], - datum2[3], datum2[4], datum2[5]), - (None, cont2['sha1_git'], None, - cont2['length'], 'absent', 'Content too long')) + (datum2[0].tobytes(), datum2[1].tobytes(), datum2[2].tobytes(), + datum2[3], datum2[4], datum2[5], datum2[6]), + (cont2['sha1'], cont2['sha1_git'], cont2['sha256'], + cont2['blake2s256'], cont2['length'], 'absent', + 'Content too long') + ) @istest def content_missing(self): @@ -1898,6 +1930,7 @@ 'sha1': cont['sha1'], 'sha256': cont['sha256'], 'sha1_git': cont['sha1_git'], + 'blake2s256': cont['blake2s256'], 'length': cont['length'], 'status': 'visible' }) @@ -1911,6 +1944,7 @@ 'sha1': cont['sha1'], 'sha256': cont['sha256'], 'sha1_git': cont['sha1_git'], + 'blake2s256': cont['blake2s256'], 'length': cont['length'], 'status': 'visible' }) @@ -1924,21 +1958,25 @@ 'sha1': cont['sha1'], 'sha256': cont['sha256'], 'sha1_git': cont['sha1_git'], + 'blake2s256': cont['blake2s256'], 'length': cont['length'], 'status': 'visible' }) # 4. with something to find - actually_present = self.storage.content_find( - {'sha1': cont['sha1'], - 'sha1_git': cont['sha1_git'], - 'sha256': cont['sha256']}) + actually_present = self.storage.content_find({ + 'sha1': cont['sha1'], + 'sha1_git': cont['sha1_git'], + 'sha256': cont['sha256'], + 'blake2s256': cont['blake2s256'], + }) actually_present.pop('ctime') self.assertEqual(actually_present, { 'sha1': cont['sha1'], 'sha256': cont['sha256'], 'sha1_git': cont['sha1_git'], + 'blake2s256': cont['blake2s256'], 'length': cont['length'], 'status': 'visible' })