Page MenuHomeSoftware Heritage
Paste P153

hglib vs dulwich finding blobs
ActivePublic

Authored by fiendish on Apr 4 2017, 4:18 PM.
# hgrepo and gitrepo are identical except hg and git.
# all times are on a fast SSD
# this takes about 20 seconds
hgblobs = {}
for li in hgrepo.log():
c = hgrepo[li]
fs = c.added()+c.modified()
for f in fs:
data = hgrepo.cat([os.path.join(hgrepo.root(), f)], c.rev())
hgblobs[data] = (f, c.rev())
# this (better!) takes about 14 seconds
hgblobs = {}
for f in hgrepo.manifest(all=True):
fpath = os.path.join(hgrepo.root(), f)
for li in hgrepo.log(files=[fpath]):
data = hgrepo.cat([fpath], int(li[0]))
hgblobs[data] = (f, li[0])
# this takes about 3 seconds
gitblobs = [v for k in gitrepo.object_store.packs for v in k.iterobjects() if v.type_name == b'blob']
# There are 1686 revs in the repos. 2374 blobs.