Page MenuHomeSoftware Heritage

D200.id667.diff
No OneTemporary

D200.id667.diff

diff --git a/debian/control b/debian/control
--- a/debian/control
+++ b/debian/control
@@ -13,7 +13,7 @@
python3-requests,
python3-setuptools,
python3-swh.core (>= 0.0.28~),
- python3-swh.model (>= 0.0.14~),
+ python3-swh.model (>= 0.0.15~),
python3-swh.objstorage (>= 0.0.17~),
python3-swh.scheduler (>= 0.0.11~),
python3-vcversioner
@@ -23,7 +23,7 @@
Package: python3-swh.storage
Architecture: all
Depends: python3-swh.core (>= 0.0.28~),
- python3-swh.model (>= 0.0.14~),
+ python3-swh.model (>= 0.0.15~),
python3-swh.objstorage (>= 0.0.17~),
${misc:Depends},
${python3:Depends}
@@ -31,7 +31,7 @@
Package: python3-swh.storage.listener
Architecture: all
-Depends: python3-swh.journal,
+Depends: python3-swh.journal (>= 0.0.2~),
python3-kafka (>= 1.3.1~),
python3-swh.storage (= ${binary:Version}),
${misc:Depends},
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,5 @@
swh.core >= 0.0.28
-swh.model >= 0.0.14
+swh.model >= 0.0.15
swh.objstorage >= 0.0.17
swh.scheduler >= 0.0.11
+swh.journal >= 0.0.2
diff --git a/sql/swh-func.sql b/sql/swh-func.sql
--- a/sql/swh-func.sql
+++ b/sql/swh-func.sql
@@ -135,9 +135,10 @@
-- uniquely identify content, for the purpose of verifying if we already have
-- some content or not during content injection
create type content_signature as (
- sha1 sha1,
- sha1_git sha1_git,
- sha256 sha256
+ sha1 sha1,
+ sha1_git sha1_git,
+ sha256 sha256,
+ blake2s256 blake2s256
);
@@ -151,10 +152,12 @@
as $$
begin
return query (
- select sha1, sha1_git, sha256 from tmp_content as tmp
+ select sha1, sha1_git, sha256, blake2s256 from tmp_content as tmp
where not exists (
select 1 from content as c
- where c.sha1 = tmp.sha1 and c.sha1_git = tmp.sha1_git and c.sha256 = tmp.sha256
+ where c.sha1 = tmp.sha1 and
+ c.sha1_git = tmp.sha1_git and
+ c.sha256 = tmp.sha256
)
);
return;
@@ -189,7 +192,7 @@
as $$
begin
return query
- select sha1, sha1_git, sha256 from tmp_skipped_content t
+ select sha1, sha1_git, sha256, blake2s256 from tmp_skipped_content t
where not exists
(select 1 from skipped_content s where
s.sha1 is not distinct from t.sha1 and
@@ -210,9 +213,10 @@
-- (e.g., for the web app), for batch lookup of missing content (e.g., to be
-- added) see swh_content_missing
create or replace function swh_content_find(
- sha1 sha1 default NULL,
- sha1_git sha1_git default NULL,
- sha256 sha256 default NULL
+ sha1 sha1 default NULL,
+ sha1_git sha1_git default NULL,
+ sha256 sha256 default NULL,
+ blake2s256 blake2s256 default NULL
)
returns content
language plpgsql
@@ -231,12 +235,15 @@
if sha256 is not null then
filters := filters || format('sha256 = %L', sha256);
end if;
+ if blake2s256 is not null then
+ filters := filters || format('blake2s256 = %L', blake2s256);
+ end if;
if cardinality(filters) = 0 then
return null;
else
q = format('select * from content where %s',
- array_to_string(filters, ' and '));
+ array_to_string(filters, ' and '));
execute q into con;
return con;
end if;
@@ -253,10 +260,10 @@
language plpgsql
as $$
begin
- insert into content (sha1, sha1_git, sha256, length, status)
- select distinct sha1, sha1_git, sha256, length, status
+ insert into content (sha1, sha1_git, sha256, blake2s256, length, status)
+ select distinct sha1, sha1_git, sha256, blake2s256, length, status
from tmp_content
- where (sha1, sha1_git, sha256) in
+ where (sha1, sha1_git, sha256, blake2s256) in
(select * from swh_content_missing());
-- TODO XXX use postgres 9.5 "UPSERT" support here, when available.
-- Specifically, using "INSERT .. ON CONFLICT IGNORE" we can avoid
@@ -275,11 +282,11 @@
language plpgsql
as $$
begin
- insert into skipped_content (sha1, sha1_git, sha256, length, status, reason, origin)
- select distinct sha1, sha1_git, sha256, length, status, reason, origin
+ insert into skipped_content (sha1, sha1_git, sha256, blake2s256, length, status, reason, origin)
+ select distinct sha1, sha1_git, sha256, blake2s256, length, status, reason, origin
from tmp_skipped_content
- where (coalesce(sha1, ''), coalesce(sha1_git, ''), coalesce(sha256, '')) in
- (select coalesce(sha1, ''), coalesce(sha1_git, ''), coalesce(sha256, '') from swh_skipped_content_missing());
+ where (coalesce(sha1, ''), coalesce(sha1_git, ''), coalesce(sha256, ''), coalesce(blake2s256, '')) in
+ (select coalesce(sha1, ''), coalesce(sha1_git, ''), coalesce(sha256, ''), coalesce(blake2s256, '') from swh_skipped_content_missing());
-- TODO XXX use postgres 9.5 "UPSERT" support here, when available.
-- Specifically, using "INSERT .. ON CONFLICT IGNORE" we can avoid
-- the extra swh_content_missing() query here.
@@ -287,7 +294,6 @@
end
$$;
-
-- Update content entries from temporary table.
-- (columns are potential new columns added to the schema, this cannot be empty)
--
diff --git a/sql/swh-indexes.sql b/sql/swh-indexes.sql
--- a/sql/swh-indexes.sql
+++ b/sql/swh-indexes.sql
@@ -2,7 +2,8 @@
create unique index concurrently content_pkey on content(sha1);
create unique index concurrently on content(sha1_git);
-create unique index concurrently on content(sha256);
+create index concurrently on content(sha256);
+create index concurrently on content(blake2s256);
create index concurrently on content(ctime); -- TODO use a BRIN index here (postgres >= 9.5)
create index concurrently on content(object_id);
@@ -77,7 +78,8 @@
create unique index concurrently on skipped_content(sha1);
create unique index concurrently on skipped_content(sha1_git);
-create unique index concurrently on skipped_content(sha256);
+create index concurrently on skipped_content(sha256);
+create index concurrently on skipped_content(blake2s256);
create index concurrently on skipped_content(object_id);
alter table skipped_content add constraint skipped_content_origin_fkey foreign key (origin) references origin(id) not valid;
diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql
--- a/sql/swh-schema.sql
+++ b/sql/swh-schema.sql
@@ -14,7 +14,7 @@
);
insert into dbversion(version, release, description)
- values(103, now(), 'Work In Progress');
+ values(104, now(), 'Work In Progress');
-- a SHA1 checksum (not necessarily originating from Git)
create domain sha1 as bytea check (length(value) = 20);
@@ -25,6 +25,9 @@
-- a SHA256 checksum
create domain sha256 as bytea check (length(value) = 32);
+-- a blake2 checksum
+create domain blake2s256 as bytea check (length(value) = 32);
+
-- UNIX path (absolute, relative, individual path component, etc.)
create domain unix_path as bytea;
@@ -37,14 +40,15 @@
-- content collisions not knowingly.
create table content
(
- sha1 sha1 not null,
- sha1_git sha1_git not null,
- sha256 sha256 not null,
- length bigint not null,
- ctime timestamptz not null default now(),
- -- creation time, i.e. time of (first) injection into the storage
- status content_status not null default 'visible',
- object_id bigserial
+ sha1 sha1 not null,
+ sha1_git sha1_git not null,
+ sha256 sha256 not null,
+ blake2s256 blake2s256,
+ length bigint not null,
+ ctime timestamptz not null default now(),
+ -- creation time, i.e. time of (first) injection into the storage
+ status content_status not null default 'visible',
+ object_id bigserial
);
@@ -165,15 +169,16 @@
-- out which origin contains that skipped content.
create table skipped_content
(
- sha1 sha1,
- sha1_git sha1_git,
- sha256 sha256,
- length bigint not null,
- ctime timestamptz not null default now(),
- status content_status not null default 'absent',
- reason text not null,
- origin bigint,
- object_id bigserial
+ sha1 sha1,
+ sha1_git sha1_git,
+ sha256 sha256,
+ blake2s256 blake2s256,
+ length bigint not null,
+ ctime timestamptz not null default now(),
+ status content_status not null default 'absent',
+ reason text not null,
+ origin bigint,
+ object_id bigserial
);
-- Log of all origin fetches (i.e., origin crawling) that have been done in the
diff --git a/sql/swh-triggers.sql b/sql/swh-triggers.sql
--- a/sql/swh-triggers.sql
+++ b/sql/swh-triggers.sql
@@ -7,7 +7,8 @@
perform pg_notify('new_content', json_build_object(
'sha1', encode(new.sha1, 'hex'),
'sha1_git', encode(new.sha1_git, 'hex'),
- 'sha256', encode(new.sha256, 'hex')
+ 'sha256', encode(new.sha256, 'hex'),
+ 'blake2s256', encode(new.blake2s256, 'hex')
)::text);
return null;
end;
@@ -45,7 +46,8 @@
perform pg_notify('new_skipped_content', json_build_object(
'sha1', encode(new.sha1, 'hex'),
'sha1_git', encode(new.sha1_git, 'hex'),
- 'sha256', encode(new.sha256, 'hex')
+ 'sha256', encode(new.sha256, 'hex'),
+ 'blake2s256', encode(new.blake2s256, 'hex')
)::text);
return null;
end;
diff --git a/sql/upgrades/104.sql b/sql/upgrades/104.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/104.sql
@@ -0,0 +1,163 @@
+-- SWH DB schema upgrade
+-- from_version: 103
+-- to_version: 104
+-- description: Compute new hash blake2s256
+
+insert into dbversion(version, release, description)
+ values(104, now(), 'Work In Progress');
+
+DROP FUNCTION swh_content_find(sha1 sha1, sha1_git sha1_git, sha256 sha256);
+
+DROP INDEX content_sha256_idx;
+
+DROP INDEX skipped_content_sha256_idx;
+
+create domain blake2s256 as bytea check (length(value) = 32);
+
+ALTER TABLE content
+ ADD COLUMN blake2s256 blake2s256;
+
+ALTER TABLE skipped_content
+ ADD COLUMN blake2s256 blake2s256;
+
+CREATE OR REPLACE FUNCTION notify_new_content() RETURNS trigger
+ LANGUAGE plpgsql
+ AS $$
+ begin
+ perform pg_notify('new_content', json_build_object(
+ 'sha1', encode(new.sha1, 'hex'),
+ 'sha1_git', encode(new.sha1_git, 'hex'),
+ 'sha256', encode(new.sha256, 'hex'),
+ 'blake2s256', encode(new.blake2s256, 'hex')
+ )::text);
+ return null;
+ end;
+$$;
+
+CREATE OR REPLACE FUNCTION notify_new_skipped_content() RETURNS trigger
+ LANGUAGE plpgsql
+ AS $$
+ begin
+ perform pg_notify('new_skipped_content', json_build_object(
+ 'sha1', encode(new.sha1, 'hex'),
+ 'sha1_git', encode(new.sha1_git, 'hex'),
+ 'sha256', encode(new.sha256, 'hex'),
+ 'blake2s256', encode(new.blake2s256, 'hex')
+ )::text);
+ return null;
+ end;
+$$;
+
+CREATE OR REPLACE FUNCTION swh_content_add() RETURNS void
+ LANGUAGE plpgsql
+ AS $$
+begin
+ insert into content (sha1, sha1_git, sha256, blake2s256, length, status)
+ select distinct sha1, sha1_git, sha256, blake2s256, length, status
+ from tmp_content
+ where (sha1, sha1_git, sha256, blake2s256) in
+ (select * from swh_content_missing());
+ -- TODO XXX use postgres 9.5 "UPSERT" support here, when available.
+ -- Specifically, using "INSERT .. ON CONFLICT IGNORE" we can avoid
+ -- the extra swh_content_missing() query here.
+ return;
+end
+$$;
+
+CREATE OR REPLACE FUNCTION swh_content_find(sha1 sha1 = NULL::bytea, sha1_git sha1_git = NULL::bytea, sha256 sha256 = NULL::bytea, blake2s256 blake2s256 = NULL::bytea) RETURNS content
+ LANGUAGE plpgsql
+ AS $$
+declare
+ con content;
+ filters text[] := array[] :: text[]; -- AND-clauses used to filter content
+ q text;
+begin
+ if sha1 is not null then
+ filters := filters || format('sha1 = %L', sha1);
+ end if;
+ if sha1_git is not null then
+ filters := filters || format('sha1_git = %L', sha1_git);
+ end if;
+ if sha256 is not null then
+ filters := filters || format('sha256 = %L', sha256);
+ end if;
+ if blake2s256 is not null then
+ filters := filters || format('blake2s256 = %L', blake2s256);
+ end if;
+
+ if cardinality(filters) = 0 then
+ return null;
+ else
+ q = format('select * from content where %s',
+ array_to_string(filters, ' and '));
+ execute q into con;
+ return con;
+ end if;
+end
+$$;
+
+drop type content_signature cascade;
+
+create type content_signature as (
+ sha1 sha1,
+ sha1_git sha1_git,
+ sha256 sha256,
+ blake2s256 blake2s256
+);
+
+
+CREATE OR REPLACE FUNCTION swh_content_missing() RETURNS SETOF content_signature
+ LANGUAGE plpgsql
+ AS $$
+begin
+ return query (
+ select sha1, sha1_git, sha256, blake2s256 from tmp_content as tmp
+ where not exists (
+ select 1 from content as c
+ where c.sha1 = tmp.sha1 and
+ c.sha1_git = tmp.sha1_git and
+ c.sha256 = tmp.sha256
+ )
+ );
+ return;
+end
+$$;
+
+CREATE OR REPLACE FUNCTION swh_skipped_content_add() RETURNS void
+ LANGUAGE plpgsql
+ AS $$
+begin
+ insert into skipped_content (sha1, sha1_git, sha256, blake2s256, length, status, reason, origin)
+ select distinct sha1, sha1_git, sha256, blake2s256, length, status, reason, origin
+ from tmp_skipped_content
+ where (coalesce(sha1, ''), coalesce(sha1_git, ''), coalesce(sha256, ''), coalesce(blake2s256, '') in
+ (select coalesce(sha1, ''), coalesce(sha1_git, ''), coalesce(sha256, ''), coalesce(blake2s256, '') from swh_skipped_content_missing());
+ -- TODO XXX use postgres 9.5 "UPSERT" support here, when available.
+ -- Specifically, using "INSERT .. ON CONFLICT IGNORE" we can avoid
+ -- the extra swh_content_missing() query here.
+ return;
+end
+$$;
+
+CREATE OR REPLACE FUNCTION swh_skipped_content_missing() RETURNS SETOF content_signature
+ LANGUAGE plpgsql
+ AS $$
+begin
+ return query
+ select sha1, sha1_git, sha256, blake2s256 from tmp_skipped_content t
+ where not exists
+ (select 1 from skipped_content s where
+ s.sha1 is not distinct from t.sha1 and
+ s.sha1_git is not distinct from t.sha1_git and
+ s.sha256 is not distinct from t.sha256);
+ return;
+end
+$$;
+
+CREATE INDEX content_blake2s256_idx ON content USING btree (blake2s256);
+
+CREATE INDEX content_sha256_idx ON content USING btree (sha256);
+
+CREATE INDEX skipped_content_blake2s256_idx ON skipped_content USING btree (blake2s256);
+
+CREATE INDEX skipped_content_sha256_idx ON skipped_content USING btree (sha256);
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -268,8 +268,12 @@
cur.execute("""select swh_content_update(ARRAY[%s] :: text[])""" %
keys_to_update)
- content_get_metadata_keys = ['sha1', 'sha1_git', 'sha256', 'length',
- 'status']
+ content_get_metadata_keys = [
+ 'sha1', 'sha1_git', 'sha256', 'blake2s256', 'length', 'status']
+
+ skipped_content_keys = [
+ 'sha1', 'sha1_git', 'sha256', 'blake2s256',
+ 'length', 'reason', 'status', 'origin']
def content_get_metadata_from_temp(self, cur=None):
cur = self._cursor(cur)
@@ -282,7 +286,7 @@
def content_missing_from_temp(self, cur=None):
cur = self._cursor(cur)
- cur.execute("""SELECT sha1, sha1_git, sha256
+ cur.execute("""SELECT sha1, sha1_git, sha256, blake2s256
FROM swh_content_missing()""")
yield from cursor_to_bytes(cur)
@@ -298,7 +302,7 @@
def skipped_content_missing_from_temp(self, cur=None):
cur = self._cursor(cur)
- cur.execute("""SELECT sha1, sha1_git, sha256
+ cur.execute("""SELECT sha1, sha1_git, sha256, blake2s256
FROM swh_skipped_content_missing()""")
yield from cursor_to_bytes(cur)
@@ -319,24 +323,30 @@
yield from cursor_to_bytes(cur)
- def content_find(self, sha1=None, sha1_git=None, sha256=None, cur=None):
+ content_find_cols = ['sha1', 'sha1_git', 'sha256', 'blake2s256', 'length',
+ 'ctime', 'status']
+
+ def content_find(self, sha1=None, sha1_git=None, sha256=None,
+ blake2s256=None, cur=None):
"""Find the content optionally on a combination of the following
- checksums sha1, sha1_git or sha256.
+ checksums sha1, sha1_git, sha256 or blake2s256.
Args:
sha1: sha1 content
git_sha1: the sha1 computed `a la git` sha1 of the content
sha256: sha256 content
+ blake2s256: blake2s256 content
Returns:
- The triplet (sha1, sha1_git, sha256) if found or None.
+ The tuple (sha1, sha1_git, sha256, blake2s256) if found or None.
"""
cur = self._cursor(cur)
- cur.execute("""SELECT sha1, sha1_git, sha256, length, ctime, status
- FROM swh_content_find(%s, %s, %s)
- LIMIT 1""", (sha1, sha1_git, sha256))
+ cur.execute("""SELECT %s
+ FROM swh_content_find(%%s, %%s, %%s, %%s)
+ LIMIT 1""" % ','.join(self.content_find_cols),
+ (sha1, sha1_git, sha256, blake2s256))
content = line_to_bytes(cur.fetchone())
if set(content) == {None}:
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -23,6 +23,9 @@
BULK_BLOCK_CONTENT_LEN_MAX = 10000
+CONTENT_HASH_KEYS = ['sha1', 'sha1_git', 'sha256', 'blake2s256']
+
+
class Storage():
"""SWH storage proxy, encompassing DB and object storage
@@ -89,6 +92,15 @@
"""
db = self.db
+ def _unique_key(hash, keys=CONTENT_HASH_KEYS):
+ """Given a hash (tuple or dict), return a unique key from the
+ aggregation of keys.
+
+ """
+ if isinstance(hash, tuple):
+ return hash
+ return tuple([hash[k] for k in keys])
+
content_by_status = defaultdict(list)
for d in content:
if 'status' not in d:
@@ -101,9 +113,9 @@
content_without_data = content_by_status['absent']
missing_content = set(self.content_missing(content_with_data))
- missing_skipped = set(
- sha1_git for sha1, sha1_git, sha256
- in self.skipped_content_missing(content_without_data))
+ missing_skipped = set(_unique_key(hashes) for hashes
+ in self.skipped_content_missing(
+ content_without_data))
with db.transaction() as cur:
if missing_content:
@@ -118,19 +130,19 @@
if cont['sha1'] in missing_content)
db.copy_to(content_filtered, 'tmp_content',
- ['sha1', 'sha1_git', 'sha256', 'length', 'status'],
+ db.content_get_metadata_keys,
cur, item_cb=add_to_objstorage)
# move metadata in place
db.content_add_from_temp(cur)
if missing_skipped:
- missing_filtered = (cont for cont in content_without_data
- if cont['sha1_git'] in missing_skipped)
+ missing_filtered = [cont for cont in content_without_data
+ if _unique_key(cont) in missing_skipped]
+
db.mktemp('skipped_content', cur)
db.copy_to(missing_filtered, 'tmp_skipped_content',
- ['sha1', 'sha1_git', 'sha256', 'length',
- 'reason', 'status', 'origin'], cur)
+ db.skipped_content_keys, cur)
# move metadata in place
db.skipped_content_add_from_temp(cur)
@@ -232,9 +244,9 @@
"""
db = self.db
- keys = ['sha1', 'sha1_git', 'sha256']
+ keys = CONTENT_HASH_KEYS
- if key_hash not in keys:
+ if key_hash not in CONTENT_HASH_KEYS:
raise ValueError("key_hash should be one of %s" % keys)
key_hash_idx = keys.index(key_hash)
@@ -278,7 +290,7 @@
Returns:
an iterable of signatures missing from the storage
"""
- keys = ['sha1', 'sha1_git', 'sha256']
+ keys = CONTENT_HASH_KEYS
db = self.db
@@ -310,15 +322,15 @@
if not set(content).intersection(ALGORITHMS):
raise ValueError('content keys must contain at least one of: '
- 'sha1, sha1_git, sha256')
+ 'sha1, sha1_git, sha256, blake2s256')
c = db.content_find(sha1=content.get('sha1'),
sha1_git=content.get('sha1_git'),
sha256=content.get('sha256'),
+ blake2s256=content.get('blake2s256'),
cur=cur)
if c:
- keys = ['sha1', 'sha1_git', 'sha256', 'length', 'ctime', 'status']
- return dict(zip(keys, c))
+ return dict(zip(db.content_find_cols, c))
return None
@db_transaction_generator
diff --git a/swh/storage/tests/test_db.py b/swh/storage/tests/test_db.py
--- a/swh/storage/tests/test_db.py
+++ b/swh/storage/tests/test_db.py
@@ -43,9 +43,11 @@
'sha256': hash_to_bytes(
'673650f936cb3b0a2f93ce09d81be107'
'48b1b203c19e8176b4eefc1964a0cf3a'),
+ 'blake2s256': hash_to_bytes('69217a3079908094e11121d042354a7c'
+ '1f55b6482ca1a51e1b250dfd1ed0eef9'),
'length': 3}],
'tmp_content',
- ['sha1', 'sha1_git', 'sha256', 'length'],
+ ['sha1', 'sha1_git', 'sha256', 'blake2s256', 'length'],
cur)
self.db.content_add_from_temp(cur)
self.cursor.execute('SELECT sha1 FROM content WHERE sha1 = %s',
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -65,6 +65,8 @@
'sha256': hash_to_bytes(
'673650f936cb3b0a2f93ce09d81be107'
'48b1b203c19e8176b4eefc1964a0cf3a'),
+ 'blake2s256': hash_to_bytes('d5fe1939576527e42cfd76a9455a2'
+ '432fe7f56669564577dd93c4280e76d661d'),
'status': 'visible',
}
@@ -78,6 +80,8 @@
'sha256': hash_to_bytes(
'859f0b154fdb2d630f45e1ecae4a8629'
'15435e663248bb8461d914696fc047cd'),
+ 'blake2s256': hash_to_bytes('849c20fad132b7c2d62c15de310adfe87be'
+ '94a379941bed295e8141c6219810d'),
'status': 'visible',
}
@@ -91,6 +95,8 @@
'sha256': hash_to_bytes(
'92fb72daf8c6818288a35137b72155f5'
'07e5de8d892712ab96277aaed8cf8a36'),
+ 'blake2s256': hash_to_bytes('76d0346f44e5a27f6bafdd9c2befd304af'
+ 'f83780f93121d801ab6a1d4769db11'),
'status': 'visible',
}
@@ -104,6 +110,8 @@
'sha256': hash_to_bytes(
'6bbd052ab054ef222c1c87be60cd191a'
'ddedd24cc882d1f5f7f7be61dc61bb3a'),
+ 'blake2s256': hash_to_bytes('306856b8fd879edb7b6f1aeaaf8db9bbecc9'
+ '93cd7f776c333ac3a782fa5c6eba'),
'status': 'absent',
}
@@ -111,6 +119,14 @@
'length': 1024 * 1024 * 200,
'sha1_git': hash_to_bytes(
'33e45d56f88993aae6a0198013efa80716fd8920'),
+ 'sha1': hash_to_bytes(
+ '43e45d56f88993aae6a0198013efa80716fd8920'),
+ 'sha256': hash_to_bytes(
+ '7bbd052ab054ef222c1c87be60cd191a'
+ 'ddedd24cc882d1f5f7f7be61dc61bb3a'),
+ 'blake2s256': hash_to_bytes(
+ 'ade18b1adecb33f891ca36664da676e1'
+ '2c772cc193778aac9a137b8dc5834b9b'),
'reason': 'Content too long',
'status': 'absent',
}
@@ -118,7 +134,15 @@
self.skipped_cont2 = {
'length': 1024 * 1024 * 300,
'sha1_git': hash_to_bytes(
- '33e45d56f88993aae6a0198013efa80716fd8921'),
+ '44e45d56f88993aae6a0198013efa80716fd8921'),
+ 'sha1': hash_to_bytes(
+ '54e45d56f88993aae6a0198013efa80716fd8920'),
+ 'sha256': hash_to_bytes(
+ '8cbd052ab054ef222c1c87be60cd191a'
+ 'ddedd24cc882d1f5f7f7be61dc61bb3a'),
+ 'blake2s256': hash_to_bytes(
+ '9ce18b1adecb33f891ca36664da676e1'
+ '2c772cc193778aac9a137b8dc5834b9b'),
'reason': 'Content too long',
'status': 'absent',
}
@@ -621,28 +645,36 @@
@istest
def skipped_content_add(self):
- cont = self.skipped_cont
- cont2 = self.skipped_cont2
+ cont = self.skipped_cont.copy()
+ cont2 = self.skipped_cont2.copy()
+ cont2['blake2s256'] = None
- self.storage.content_add([cont])
- self.storage.content_add([cont2])
+ self.storage.content_add([cont, cont, cont2])
- self.cursor.execute('SELECT sha1, sha1_git, sha256, length, status,'
- 'reason FROM skipped_content ORDER BY sha1_git')
+ self.cursor.execute('SELECT sha1, sha1_git, sha256, blake2s256, '
+ 'length, status, reason '
+ 'FROM skipped_content ORDER BY sha1_git')
- datum = self.cursor.fetchone()
+ datums = self.cursor.fetchall()
+
+ self.assertEquals(2, len(datums))
+ datum = datums[0]
self.assertEqual(
- (datum[0], datum[1].tobytes(), datum[2],
- datum[3], datum[4], datum[5]),
- (None, cont['sha1_git'], None,
- cont['length'], 'absent', 'Content too long'))
+ (datum[0].tobytes(), datum[1].tobytes(), datum[2].tobytes(),
+ datum[3].tobytes(), datum[4], datum[5], datum[6]),
+ (cont['sha1'], cont['sha1_git'], cont['sha256'],
+ cont['blake2s256'], cont['length'], 'absent',
+ 'Content too long')
+ )
- datum2 = self.cursor.fetchone()
+ datum2 = datums[1]
self.assertEqual(
- (datum2[0], datum2[1].tobytes(), datum2[2],
- datum2[3], datum2[4], datum2[5]),
- (None, cont2['sha1_git'], None,
- cont2['length'], 'absent', 'Content too long'))
+ (datum2[0].tobytes(), datum2[1].tobytes(), datum2[2].tobytes(),
+ datum2[3], datum2[4], datum2[5], datum2[6]),
+ (cont2['sha1'], cont2['sha1_git'], cont2['sha256'],
+ cont2['blake2s256'], cont2['length'], 'absent',
+ 'Content too long')
+ )
@istest
def content_missing(self):
@@ -1898,6 +1930,7 @@
'sha1': cont['sha1'],
'sha256': cont['sha256'],
'sha1_git': cont['sha1_git'],
+ 'blake2s256': cont['blake2s256'],
'length': cont['length'],
'status': 'visible'
})
@@ -1911,6 +1944,7 @@
'sha1': cont['sha1'],
'sha256': cont['sha256'],
'sha1_git': cont['sha1_git'],
+ 'blake2s256': cont['blake2s256'],
'length': cont['length'],
'status': 'visible'
})
@@ -1924,21 +1958,25 @@
'sha1': cont['sha1'],
'sha256': cont['sha256'],
'sha1_git': cont['sha1_git'],
+ 'blake2s256': cont['blake2s256'],
'length': cont['length'],
'status': 'visible'
})
# 4. with something to find
- actually_present = self.storage.content_find(
- {'sha1': cont['sha1'],
- 'sha1_git': cont['sha1_git'],
- 'sha256': cont['sha256']})
+ actually_present = self.storage.content_find({
+ 'sha1': cont['sha1'],
+ 'sha1_git': cont['sha1_git'],
+ 'sha256': cont['sha256'],
+ 'blake2s256': cont['blake2s256'],
+ })
actually_present.pop('ctime')
self.assertEqual(actually_present, {
'sha1': cont['sha1'],
'sha256': cont['sha256'],
'sha1_git': cont['sha1_git'],
+ 'blake2s256': cont['blake2s256'],
'length': cont['length'],
'status': 'visible'
})

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 12:16 PM (2 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223515

Event Timeline