diff --git a/swh/loader/mercurial/hgutil.py b/swh/loader/mercurial/hgutil.py index 028e04c..c948623 100644 --- a/swh/loader/mercurial/hgutil.py +++ b/swh/loader/mercurial/hgutil.py @@ -1,104 +1,102 @@ # Copyright (C) 2020-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import io import os import signal import time import traceback from typing import Dict, NewType from billiard import Process, Queue # The internal Mercurial API is not guaranteed to be stable. from mercurial import context, error, hg, smartset, util # type: ignore import mercurial.ui # type: ignore NULLID = mercurial.node.nullid HgNodeId = NewType("HgNodeId", bytes) Repository = hg.localrepo BaseContext = context.basectx LRUCacheDict = util.lrucachedict HgSpanSet = smartset._spanset HgFilteredSet = smartset.filteredset LookupError = error.LookupError def repository(path: str) -> hg.localrepo: ui = mercurial.ui.ui.load() return hg.repository(ui, path.encode()) def branches(repo: hg.localrepo) -> Dict[bytes, HgNodeId]: """List repository named branches and their tip node.""" result = {} for tag, heads, tip, isclosed in repo.branchmap().iterbranches(): - if isclosed: - continue result[tag] = tip return result class CloneTimeout(Exception): pass class CloneFailure(Exception): pass def _clone_task(src: str, dest: str, errors: Queue) -> None: """Clone task to run in a subprocess. Args: src: clone source dest: clone destination errors: message queue to communicate errors """ try: hg.clone( ui=mercurial.ui.ui.load(), peeropts={}, source=src.encode(), dest=dest.encode(), update=False, ) except Exception as e: exc_buffer = io.StringIO() traceback.print_exc(file=exc_buffer) errors.put_nowait(exc_buffer.getvalue()) raise e def clone(src: str, dest: str, timeout: float) -> None: """Clone a repository with timeout. Args: src: clone source dest: clone destination timeout: timeout in seconds """ errors: Queue = Queue() process = Process(target=_clone_task, args=(src, dest, errors)) process.start() process.join(timeout) if process.is_alive(): process.terminate() # Give it a second (literally), then kill it # Can't use `process.join(1)` here, billiard appears to be bugged # https://github.com/celery/billiard/issues/270 killed = False for _ in range(10): time.sleep(0.1) if not process.is_alive(): break else: killed = True os.kill(process.pid, signal.SIGKILL) raise CloneTimeout(src, timeout, killed) if not errors.empty(): raise CloneFailure(src, dest, errors.get()) diff --git a/swh/loader/mercurial/tests/data/example.json b/swh/loader/mercurial/tests/data/example.json index 67ba3fa..9b0baca 100644 --- a/swh/loader/mercurial/tests/data/example.json +++ b/swh/loader/mercurial/tests/data/example.json @@ -1 +1 @@ -{"directories": ["048960a9eff9a9f22ce2fc2e2bc9b5f73cdfc26a", "09a1bb68db049b4e37540e52ebde76f59126b3a8", "0dad640e1eb9f31cb9d874158318f1f180be9b3a", "181a22e7ad8bbad9bb5846f51c377a7597a0c914", "218ccb1594f7026492c72309974b44aba353d7dc", "93e88b135dc8c3420cd4984e21d8d1eb2781ddce", "d476a11ddfcfce07236a0a03f78e3c1a73bc20ae", "ecf37a29314efe473b399b700c7e5eacc063ba6e", "fa5e6af79e30fc26ab4acbd96388fde22b4c2f36"], "revisions": ["1171aa960a675f8b8327199ff084b6e7c879361d", "23459c9c498542cde67d8d130bc4c0b3084edf5f", "486d227d252ee8a3a01ef40348964f68e21018a5", "65de9d553502aa1f1cb20df179a6ea04e6d2039e", "8f392d7f64419bf4672a75a07f61ce243a7f2c67", "acd77cd84bfca51b3d4f928109a9de52a45618f2", "bb1befca13ceb1a8ebde25cec05966be3eed9bca", "bf91ae31bdb938c2927e741b53af815380340ea7", "c88ea3f5892a5e726739a10eca3afe5d7fa648ce"], "releases": [], "snapshot": "831e126d30afd81b62030547778225577fe5d0d6"} \ No newline at end of file +{"directories": ["048960a9eff9a9f22ce2fc2e2bc9b5f73cdfc26a", "09a1bb68db049b4e37540e52ebde76f59126b3a8", "0dad640e1eb9f31cb9d874158318f1f180be9b3a", "181a22e7ad8bbad9bb5846f51c377a7597a0c914", "218ccb1594f7026492c72309974b44aba353d7dc", "93e88b135dc8c3420cd4984e21d8d1eb2781ddce", "d476a11ddfcfce07236a0a03f78e3c1a73bc20ae", "ecf37a29314efe473b399b700c7e5eacc063ba6e", "fa5e6af79e30fc26ab4acbd96388fde22b4c2f36"], "revisions": ["1171aa960a675f8b8327199ff084b6e7c879361d", "23459c9c498542cde67d8d130bc4c0b3084edf5f", "486d227d252ee8a3a01ef40348964f68e21018a5", "65de9d553502aa1f1cb20df179a6ea04e6d2039e", "8f392d7f64419bf4672a75a07f61ce243a7f2c67", "acd77cd84bfca51b3d4f928109a9de52a45618f2", "bb1befca13ceb1a8ebde25cec05966be3eed9bca", "bf91ae31bdb938c2927e741b53af815380340ea7", "c88ea3f5892a5e726739a10eca3afe5d7fa648ce"], "releases": [], "snapshot": "ac42dfd0f2308197c5b6e5653ad13c8da23d5040"} diff --git a/swh/loader/mercurial/tests/data/the-sandbox.json b/swh/loader/mercurial/tests/data/the-sandbox.json index 0e9dbf6..936dc52 100644 --- a/swh/loader/mercurial/tests/data/the-sandbox.json +++ b/swh/loader/mercurial/tests/data/the-sandbox.json @@ -1 +1 @@ -{"directories": ["180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "9cd8160c67ac4b0bc97e2e2cd918a580425167d3", "e2e117569b086ceabeeedee4acd95f35298d4553"], "revisions": ["17a62618eb6e91a1d5d8e1246ccedae020d3b222", "18012a93d5aadc331c468dac84b524430f4abc19", "1ee770fd10ea2d8c4f6e68a1dbe79378a86611e0", "24f45e41637240b7f9e16d2791b5eacb4a406d0f", "25f5b27dfa5ed15d336188ef46bef743d88327d4", "2652147529269778757d96e09aaf081695548218", "2973e5dc9568ac491b198f6b7f10c44ddc04e0a3", "2d4a801c9a9645fcd3a9f4c06418d8393206b1f3", "31cd7c5f669868651c57e3a2ba25ac45f76fa5cf", "32eb0354a660128e205bf7c3a84b46040ef70d92", "34192ceef239b8b72141efcc58b1d7f1676a18c9", "3565e7d385af0745ec208d719e469c2f58be8e94", "3ed4b85d30401fe32ae3b1d650f215a588293a9e", "40def747398c76ceec1bd248e3a6cb2a52e22dc5", "4d640e8064fe69b4c851dfd43915c431e80c7497", "4e2dc6d6073f0b6d348f84ded52f9143b10344b9", "4ef794980f820d44be94b2f0d53eb34d4241638c", "5017ce0b285351da09a2029ea2cf544f79b593c7", "553b09724bd30d9691b290e157b27a73e2d3e537", "5ee9ea92ed8cc1737b7670e39dab6081c64f2598", "5f4eba626c3f826820c4475d2d81410759ec911b", "61d762d65afb3150e2653d6735068241779c1fcf", "62ff4741eac1821190f6c2cdab7c8a9d7db64ad0", "6910964416438ca8d1698f6295871d727c4d4851", "70e750bb046101fdced06f428e73fee471509c56", "74335db9f45a5d1c8133ff7a7db5ed7a8d4a197b", "769db00b34b9e085dc699c8f1550c95793d0e904", "88b80615ed8561be74a700b92883ec0374ddacb0", "94be9abcf9558213ff301af0ecd8223451ce991d", "9c9e0ff08f215a5a5845ce3dbfc5b48c8050bdaf", "9e912851eb64e3a1e08fbb587de7a4c897ce5a0a", "9f82d95bd3edfb7f18b1a21d6171170395ea44ce", "a1f000fb8216838aa2a120738cc6c7fef2d1b4d8", "a41e2a548ba51ee47f22baad8e88994853d3e2f5", "a701d39a17a9f48c61a06eee08bd9ac0b8e3838b", "a9c4534552df370f43f0ef97146f393ef2f2a08c", "aafb69fd7496ca617f741d38c40808ff2382aabe", "b6932cb7f59e746899e4804f3d496126d1343615", "be34b8c7857a6c04e41cc06b26338d8e59cb2601", "be44d5e6cc66580f59c108f8bff5911ee91a22e4", "bec4c0a31b0b2502f44f34aeb9827cd090cca621", "c313df50bfcaa773dcbe038d00f8bd770ba997f8", "c346f6ff7f42f2a8ff867f92ab83a6721057d86c", "c4a95d5097519dedac437fddf0ef775136081241", "c77e776d22548d47a8d96463a3556172776cd59b", "c875bad563a73a25c5f3379828b161b1441a7c5d", "caef0cb155eb6c55215aa59aabe04a9c702bbe6a", "cb36b894129ca7910bb81c457c72d69d5ff111bc", "d2164061453ecb03d4347a05a77db83f706b8e15", "dafa445964230e808148db043c126063ea1dc9b6", "db9e625ba90056304897a94c92e5d27bc60f112d", "dc3e3ab7fe257d04769528e5e17ad9f1acb44659", "dcba06661c607fe55ec67b1712d153b69f65e38c", "dcddcc32740d2de0e1403e21a5c4ed837b352992", "ddecbc16f4c916c39eacfcb2302e15a9e70a231e", "e326a7bbb5bc00f1d8cacd6108869dedef15569c", "e874cd5967efb1f45282e9f5ce87cc68a898a6d0", "f2afbb94b319ef5d60823859875284afb95dcc18"], "releases": [], "snapshot": "3b8fe58e467deb7597b12a5fd3b2c096b8c02028"} \ No newline at end of file +{"directories": ["180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "180bd57623a7c2c47a8c43514a5f4d903503d0aa", "9cd8160c67ac4b0bc97e2e2cd918a580425167d3", "e2e117569b086ceabeeedee4acd95f35298d4553"], "revisions": ["17a62618eb6e91a1d5d8e1246ccedae020d3b222", "18012a93d5aadc331c468dac84b524430f4abc19", "1ee770fd10ea2d8c4f6e68a1dbe79378a86611e0", "24f45e41637240b7f9e16d2791b5eacb4a406d0f", "25f5b27dfa5ed15d336188ef46bef743d88327d4", "2652147529269778757d96e09aaf081695548218", "2973e5dc9568ac491b198f6b7f10c44ddc04e0a3", "2d4a801c9a9645fcd3a9f4c06418d8393206b1f3", "31cd7c5f669868651c57e3a2ba25ac45f76fa5cf", "32eb0354a660128e205bf7c3a84b46040ef70d92", "34192ceef239b8b72141efcc58b1d7f1676a18c9", "3565e7d385af0745ec208d719e469c2f58be8e94", "3ed4b85d30401fe32ae3b1d650f215a588293a9e", "40def747398c76ceec1bd248e3a6cb2a52e22dc5", "4d640e8064fe69b4c851dfd43915c431e80c7497", "4e2dc6d6073f0b6d348f84ded52f9143b10344b9", "4ef794980f820d44be94b2f0d53eb34d4241638c", "5017ce0b285351da09a2029ea2cf544f79b593c7", "553b09724bd30d9691b290e157b27a73e2d3e537", "5ee9ea92ed8cc1737b7670e39dab6081c64f2598", "5f4eba626c3f826820c4475d2d81410759ec911b", "61d762d65afb3150e2653d6735068241779c1fcf", "62ff4741eac1821190f6c2cdab7c8a9d7db64ad0", "6910964416438ca8d1698f6295871d727c4d4851", "70e750bb046101fdced06f428e73fee471509c56", "74335db9f45a5d1c8133ff7a7db5ed7a8d4a197b", "769db00b34b9e085dc699c8f1550c95793d0e904", "88b80615ed8561be74a700b92883ec0374ddacb0", "94be9abcf9558213ff301af0ecd8223451ce991d", "9c9e0ff08f215a5a5845ce3dbfc5b48c8050bdaf", "9e912851eb64e3a1e08fbb587de7a4c897ce5a0a", "9f82d95bd3edfb7f18b1a21d6171170395ea44ce", "a1f000fb8216838aa2a120738cc6c7fef2d1b4d8", "a41e2a548ba51ee47f22baad8e88994853d3e2f5", "a701d39a17a9f48c61a06eee08bd9ac0b8e3838b", "a9c4534552df370f43f0ef97146f393ef2f2a08c", "aafb69fd7496ca617f741d38c40808ff2382aabe", "b6932cb7f59e746899e4804f3d496126d1343615", "be34b8c7857a6c04e41cc06b26338d8e59cb2601", "be44d5e6cc66580f59c108f8bff5911ee91a22e4", "bec4c0a31b0b2502f44f34aeb9827cd090cca621", "c313df50bfcaa773dcbe038d00f8bd770ba997f8", "c346f6ff7f42f2a8ff867f92ab83a6721057d86c", "c4a95d5097519dedac437fddf0ef775136081241", "c77e776d22548d47a8d96463a3556172776cd59b", "c875bad563a73a25c5f3379828b161b1441a7c5d", "caef0cb155eb6c55215aa59aabe04a9c702bbe6a", "cb36b894129ca7910bb81c457c72d69d5ff111bc", "d2164061453ecb03d4347a05a77db83f706b8e15", "dafa445964230e808148db043c126063ea1dc9b6", "db9e625ba90056304897a94c92e5d27bc60f112d", "dc3e3ab7fe257d04769528e5e17ad9f1acb44659", "dcba06661c607fe55ec67b1712d153b69f65e38c", "dcddcc32740d2de0e1403e21a5c4ed837b352992", "ddecbc16f4c916c39eacfcb2302e15a9e70a231e", "e326a7bbb5bc00f1d8cacd6108869dedef15569c", "e874cd5967efb1f45282e9f5ce87cc68a898a6d0", "f2afbb94b319ef5d60823859875284afb95dcc18"], "releases": [], "snapshot": "f5347d142821cc00f18fb4e2d3253cdefe6ad645"} diff --git a/swh/loader/mercurial/tests/test_from_disk.py b/swh/loader/mercurial/tests/test_from_disk.py index 991b374..bf7305a 100644 --- a/swh/loader/mercurial/tests/test_from_disk.py +++ b/swh/loader/mercurial/tests/test_from_disk.py @@ -1,486 +1,535 @@ # Copyright (C) 2020-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime from hashlib import sha1 import os from pathlib import Path import subprocess import attr import pytest from swh.loader.mercurial.utils import parse_visit_date from swh.loader.tests import ( assert_last_visit_matches, check_snapshot, get_stats, prepare_repository_from_archive, ) from swh.model.from_disk import Content, DentryPerms from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.model.identifiers import ObjectType from swh.model.model import RevisionType, Snapshot, SnapshotBranch, TargetType from swh.storage import get_storage from swh.storage.algos.snapshot import snapshot_get_latest from ..from_disk import HgDirectory, HgLoaderFromDisk from .loader_checker import ExpectedSwhids, LoaderChecker VISIT_DATE = parse_visit_date("2016-05-03 15:16:32+00") assert VISIT_DATE is not None def random_content() -> Content: """Create minimal content object.""" data = str(datetime.now()).encode() return Content({"sha1_git": sha1(data).digest(), "perms": DentryPerms.content}) def test_hg_directory_creates_missing_directories(): directory = HgDirectory() directory[b"path/to/some/content"] = random_content() def test_hg_directory_get(): content = random_content() directory = HgDirectory() assert directory.get(b"path/to/content") is None assert directory.get(b"path/to/content", content) == content directory[b"path/to/content"] = content assert directory.get(b"path/to/content") == content def test_hg_directory_deletes_empty_directories(): directory = HgDirectory() content = random_content() directory[b"path/to/content"] = content directory[b"path/to/some/deep/content"] = random_content() del directory[b"path/to/some/deep/content"] assert directory.get(b"path/to/some/deep") is None assert directory.get(b"path/to/some") is None assert directory.get(b"path/to/content") == content def test_hg_directory_when_directory_replaces_file(): directory = HgDirectory() directory[b"path/to/some"] = random_content() directory[b"path/to/some/content"] = random_content() # Those tests assert expectations on repository loading # by reading expected values from associated json files # produced by the `swh-hg-identify` command line utility. # # It has more granularity than historical tests. # Assertions will tell if the error comes from the directories # revisions or release rather than only checking the snapshot. # # With more work it should event be possible to know which part # of an object is faulty. -def test_examples(swh_storage, datadir, tmp_path): - for archive_name in ("hello", "transplant", "the-sandbox", "example"): - archive_path = os.path.join(datadir, f"{archive_name}.tgz") - json_path = os.path.join(datadir, f"{archive_name}.json") - repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) +@pytest.mark.parametrize( + "archive_name", ("hello", "transplant", "the-sandbox", "example") +) +def test_examples(swh_storage, datadir, tmp_path, archive_name): + archive_path = Path(datadir, f"{archive_name}.tgz") + json_path = Path(datadir, f"{archive_name}.json") + repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) - LoaderChecker( - loader=HgLoaderFromDisk(swh_storage, repo_url), - expected=ExpectedSwhids.load(json_path), - ).check() + LoaderChecker( + loader=HgLoaderFromDisk(swh_storage, repo_url), + expected=ExpectedSwhids.load(json_path), + ).check() # This test has as been adapted from the historical `HgBundle20Loader` tests # to ensure compatibility of `HgLoaderFromDisk`. # Hashes as been produced by copy pasting the result of the implementation # to prevent regressions. def test_loader_hg_new_visit_no_release(swh_storage, datadir, tmp_path): """Eventful visit should yield 1 snapshot""" archive_name = "the-sandbox" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) loader = HgLoaderFromDisk(swh_storage, url=repo_url) assert loader.load() == {"status": "eventful"} - tip_revision_develop = "a9c4534552df370f43f0ef97146f393ef2f2a08c" - tip_revision_default = "70e750bb046101fdced06f428e73fee471509c56" + mapping = { + b"default": "70e750bb046101fdced06f428e73fee471509c56", + b"develop": "a9c4534552df370f43f0ef97146f393ef2f2a08c", + b"feature/fun_time": "4d640e8064fe69b4c851dfd43915c431e80c7497", + b"feature/green2_loader": "94be9abcf9558213ff301af0ecd8223451ce991d", + b"feature/greenloader": "9f82d95bd3edfb7f18b1a21d6171170395ea44ce", + b"feature/my_test": "dafa445964230e808148db043c126063ea1dc9b6", + b"feature/read2_loader": "9e912851eb64e3a1e08fbb587de7a4c897ce5a0a", + b"feature/readloader": "ddecbc16f4c916c39eacfcb2302e15a9e70a231e", + b"feature/red": "cb36b894129ca7910bb81c457c72d69d5ff111bc", + b"feature/split5_loader": "3ed4b85d30401fe32ae3b1d650f215a588293a9e", + b"feature/split_causing": "c346f6ff7f42f2a8ff867f92ab83a6721057d86c", + b"feature/split_loader": "5f4eba626c3f826820c4475d2d81410759ec911b", + b"feature/split_loader5": "5017ce0b285351da09a2029ea2cf544f79b593c7", + b"feature/split_loading": "4e2dc6d6073f0b6d348f84ded52f9143b10344b9", + b"feature/split_redload": "2d4a801c9a9645fcd3a9f4c06418d8393206b1f3", + b"feature/splitloading": "88b80615ed8561be74a700b92883ec0374ddacb0", + b"feature/test": "61d762d65afb3150e2653d6735068241779c1fcf", + b"feature/test_branch": "be44d5e6cc66580f59c108f8bff5911ee91a22e4", + b"feature/test_branching": "d2164061453ecb03d4347a05a77db83f706b8e15", + b"feature/test_dog": "2973e5dc9568ac491b198f6b7f10c44ddc04e0a3", + } + + expected_branches = { + k: SnapshotBranch(target=hash_to_bytes(v), target_type=TargetType.REVISION) + for k, v in mapping.items() + } + expected_branches[b"HEAD"] = SnapshotBranch( + target=b"develop", target_type=TargetType.ALIAS + ) + expected_snapshot = Snapshot( - id=hash_to_bytes("3b8fe58e467deb7597b12a5fd3b2c096b8c02028"), - branches={ - b"develop": SnapshotBranch( - target=hash_to_bytes(tip_revision_develop), - target_type=TargetType.REVISION, - ), - b"default": SnapshotBranch( - target=hash_to_bytes(tip_revision_default), - target_type=TargetType.REVISION, - ), - b"HEAD": SnapshotBranch(target=b"develop", target_type=TargetType.ALIAS,), - }, + id=hash_to_bytes("f5347d142821cc00f18fb4e2d3253cdefe6ad645"), + branches=expected_branches, ) assert_last_visit_matches( loader.storage, repo_url, status="full", type="hg", snapshot=expected_snapshot.id, ) check_snapshot(expected_snapshot, loader.storage) stats = get_stats(loader.storage) assert stats == { "content": 2, "directory": 3, "origin": 1, "origin_visit": 1, "release": 0, "revision": 58, "skipped_content": 0, "snapshot": 1, } # This test has as been adapted from the historical `HgBundle20Loader` tests # to ensure compatibility of `HgLoaderFromDisk`. # Hashes as been produced by copy pasting the result of the implementation # to prevent regressions. def test_loader_hg_new_visit_with_release(swh_storage, datadir, tmp_path): """Eventful visit with release should yield 1 snapshot""" archive_name = "hello" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) loader = HgLoaderFromDisk(swh_storage, url=repo_url, visit_date=VISIT_DATE,) actual_load_status = loader.load() assert actual_load_status == {"status": "eventful"} # then stats = get_stats(loader.storage) assert stats == { "content": 3, "directory": 3, "origin": 1, "origin_visit": 1, "release": 1, "revision": 3, "skipped_content": 0, "snapshot": 1, } # cf. test_loader.org for explaining from where those hashes tip_release = hash_to_bytes("515c4d72e089404356d0f4b39d60f948b8999140") release = loader.storage.release_get([tip_release])[0] assert release is not None tip_revision_default = hash_to_bytes("c3dbe4fbeaaa98dd961834e4007edb3efb0e2a27") revision = loader.storage.revision_get([tip_revision_default])[0] assert revision is not None expected_snapshot = Snapshot( id=hash_to_bytes("d35668e02e2ba4321dc951cd308cf883786f918a"), branches={ b"default": SnapshotBranch( target=tip_revision_default, target_type=TargetType.REVISION, ), b"0.1": SnapshotBranch(target=tip_release, target_type=TargetType.RELEASE,), b"HEAD": SnapshotBranch(target=b"default", target_type=TargetType.ALIAS,), }, ) check_snapshot(expected_snapshot, loader.storage) assert_last_visit_matches( loader.storage, repo_url, type=RevisionType.MERCURIAL.value, status="full", snapshot=expected_snapshot.id, ) # This test has as been adapted from the historical `HgBundle20Loader` tests # to ensure compatibility of `HgLoaderFromDisk`. # Hashes as been produced by copy pasting the result of the implementation # to prevent regressions. def test_visit_repository_with_transplant_operations(swh_storage, datadir, tmp_path): """Visit a mercurial repository visit transplant operations within should yield a snapshot as well. """ archive_name = "transplant" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) loader = HgLoaderFromDisk(swh_storage, url=repo_url, visit_date=VISIT_DATE,) # load hg repository actual_load_status = loader.load() assert actual_load_status == {"status": "eventful"} # collect swh revisions assert_last_visit_matches( loader.storage, repo_url, type=RevisionType.MERCURIAL.value, status="full" ) revisions = [] snapshot = snapshot_get_latest(loader.storage, repo_url) for branch in snapshot.branches.values(): if branch.target_type.value != "revision": continue revisions.append(branch.target) # extract original changesets info and the transplant sources hg_changesets = set() transplant_sources = set() for rev in loader.storage.revision_log(revisions): extids = list( loader.storage.extid_get_from_target(ObjectType.REVISION, [rev["id"]]) ) assert len(extids) == 1 hg_changesets.add(hash_to_hex(extids[0].extid)) for k, v in rev["extra_headers"]: if k == b"transplant_source": transplant_sources.add(v.decode("ascii")) # check extracted data are valid assert len(hg_changesets) > 0 assert len(transplant_sources) > 0 assert transplant_sources <= hg_changesets def _partial_copy_storage( old_storage, origin_url: str, mechanism: str, copy_revisions: bool ): """Create a new storage, and only copy ExtIDs or head revisions to it.""" new_storage = get_storage(cls="memory") snapshot = snapshot_get_latest(old_storage, origin_url) assert snapshot heads = [branch.target for branch in snapshot.branches.values()] if mechanism == "extid": extids = old_storage.extid_get_from_target(ObjectType.REVISION, heads) new_storage.extid_add(extids) if copy_revisions: # copy revisions, but erase their metadata to make sure the loader doesn't # fallback to revision.metadata["nodeid"] revisions = [ attr.evolve(rev, metadata={}) for rev in old_storage.revision_get(heads) if rev ] new_storage.revision_add(revisions) else: assert mechanism == "same storage" return old_storage # copy origin, visit, status new_storage.origin_add(old_storage.origin_get([origin_url])) visit = old_storage.origin_visit_get_latest(origin_url) new_storage.origin_visit_add([visit]) statuses = old_storage.origin_visit_status_get(origin_url, visit.visit).results new_storage.origin_visit_status_add(statuses) new_storage.snapshot_add([snapshot]) return new_storage @pytest.mark.parametrize("mechanism", ("extid", "same storage")) def test_load_unchanged_repo_should_be_uneventful( swh_storage, datadir, tmp_path, mechanism ): """Checks the loader can find which revisions it already loaded, using ExtIDs.""" archive_name = "hello" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) repo_path = repo_url.replace("file://", "") loader = HgLoaderFromDisk(swh_storage, repo_path) assert loader.load() == {"status": "eventful"} assert get_stats(loader.storage) == { "content": 3, "directory": 3, "origin": 1, "origin_visit": 1, "release": 1, "revision": 3, "skipped_content": 0, "snapshot": 1, } old_storage = swh_storage # Create a new storage, and only copy ExtIDs or head revisions to it. # This should be enough for the loader to know revisions were already loaded new_storage = _partial_copy_storage( old_storage, repo_path, mechanism=mechanism, copy_revisions=True ) # Create a new loader (to start with a clean slate, eg. remove the caches), # with the new, partial, storage loader = HgLoaderFromDisk(new_storage, repo_path) assert loader.load() == {"status": "uneventful"} if mechanism == "same storage": # Should have all the objects assert get_stats(loader.storage) == { "content": 3, "directory": 3, "origin": 1, "origin_visit": 2, "release": 1, "revision": 3, "skipped_content": 0, "snapshot": 1, } else: # Should have only the objects we directly inserted from the test, plus # a new visit assert get_stats(loader.storage) == { "content": 0, "directory": 0, "origin": 1, "origin_visit": 2, "release": 0, "revision": 1, "skipped_content": 0, "snapshot": 1, } +def test_closed_branch_incremental(swh_storage, datadir, tmp_path): + """Test that a repository with a closed branch does not trip an incremental load""" + archive_name = "example" + archive_path = os.path.join(datadir, f"{archive_name}.tgz") + repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) + repo_path = repo_url.replace("file://", "") + + loader = HgLoaderFromDisk(swh_storage, repo_path) + + # Test 3 loads: full, and two incremental. + assert loader.load() == {"status": "eventful"} + expected_stats = { + "content": 7, + "directory": 16, + "origin": 1, + "origin_visit": 1, + "release": 0, + "revision": 9, + "skipped_content": 0, + "snapshot": 1, + } + assert get_stats(loader.storage) == expected_stats + assert loader.load() == {"status": "uneventful"} + assert get_stats(loader.storage) == {**expected_stats, "origin_visit": 1 + 1} + assert loader.load() == {"status": "uneventful"} + assert get_stats(loader.storage) == {**expected_stats, "origin_visit": 2 + 1} + + def test_load_unchanged_repo__dangling_extid(swh_storage, datadir, tmp_path): """Checks the loader will load revisions targeted by an ExtID if the revisions are missing from the storage""" archive_name = "hello" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) repo_path = repo_url.replace("file://", "") loader = HgLoaderFromDisk(swh_storage, repo_path) assert loader.load() == {"status": "eventful"} assert get_stats(loader.storage) == { "content": 3, "directory": 3, "origin": 1, "origin_visit": 1, "release": 1, "revision": 3, "skipped_content": 0, "snapshot": 1, } old_storage = swh_storage # Create a new storage, and only copy ExtIDs or head revisions to it. # This should be enough for the loader to know revisions were already loaded new_storage = _partial_copy_storage( old_storage, repo_path, mechanism="extid", copy_revisions=False ) # Create a new loader (to start with a clean slate, eg. remove the caches), # with the new, partial, storage loader = HgLoaderFromDisk(new_storage, repo_path) assert get_stats(loader.storage) == { "content": 0, "directory": 0, "origin": 1, "origin_visit": 1, "release": 0, "revision": 0, "skipped_content": 0, "snapshot": 1, } assert loader.load() == {"status": "eventful"} assert get_stats(loader.storage) == { "content": 3, "directory": 3, "origin": 1, "origin_visit": 2, "release": 1, "revision": 3, "skipped_content": 0, "snapshot": 1, } def test_missing_filelog_should_not_crash(swh_storage, datadir, tmp_path): archive_name = "missing-filelog" archive_path = os.path.join(datadir, f"{archive_name}.tgz") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) directory = repo_url.replace("file://", "") loader = HgLoaderFromDisk( storage=swh_storage, url=repo_url, directory=directory, # specify directory to avoid clone visit_date=VISIT_DATE, ) actual_load_status = loader.load() assert actual_load_status == {"status": "eventful"} assert_last_visit_matches(swh_storage, repo_url, status="partial", type="hg") def hg_strip(repo: str, revset: str) -> None: """Removes `revset` and all of their descendants from the local repository.""" # Previously called `hg strip`, it was renamed to `hg debugstrip` in Mercurial 5.7 # because it's most likely not what most users want to do (they should use some kind # of history-rewriting tool like `histedit` or `prune`). # But here, it's exactly what we want to do. subprocess.check_call(["hg", "debugstrip", revset], cwd=repo) def test_load_repo_with_new_commits(swh_storage, datadir, tmp_path): archive_name = "hello" archive_path = Path(datadir, f"{archive_name}.tgz") json_path = Path(datadir, f"{archive_name}.json") repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) # first load with missing commits hg_strip(repo_url.replace("file://", ""), "tip") loader = HgLoaderFromDisk(swh_storage, repo_url) assert loader.load() == {"status": "eventful"} assert get_stats(loader.storage) == { "content": 2, "directory": 2, "origin": 1, "origin_visit": 1, "release": 0, "revision": 2, "skipped_content": 0, "snapshot": 1, } # second load with all commits repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) loader = HgLoaderFromDisk(swh_storage, repo_url) checker = LoaderChecker(loader=loader, expected=ExpectedSwhids.load(json_path),) checker.check() assert get_stats(loader.storage) == { "content": 3, "directory": 3, "origin": 1, "origin_visit": 2, "release": 1, "revision": 3, "skipped_content": 0, "snapshot": 2, }