diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py --- a/swh/indexer/metadata.py +++ b/swh/indexer/metadata.py @@ -147,7 +147,12 @@ try: root_dir = rev['directory'] - dir_ls = self.storage.directory_ls(root_dir, recursive=False) + dir_ls = list(self.storage.directory_ls(root_dir, recursive=False)) + if [entry['type'] for entry in dir_ls] == ['dir']: + # If the root is just a single directory, recurse into it + # eg. PyPI packages, GNU tarballs + subdir = dir_ls[0]['target'] + dir_ls = self.storage.directory_ls(subdir, recursive=False) files = [entry for entry in dir_ls if entry['type'] == 'file'] detected_files = detect_metadata(files) (mappings, metadata) = self.translate_revision_intrinsic_metadata( diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -1135,3 +1135,61 @@ # then self.assertEqual(expected_results, results) + + def test_revision_metadata_indexer_single_root_dir(self): + metadata_indexer = RevisionMetadataIndexer( + config=REVISION_METADATA_CONFIG) + fill_obj_storage(metadata_indexer.objstorage) + fill_storage(metadata_indexer.storage) + + # Add a parent directory, that is the only directory at the root + # of the revision + rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') + subdir_id = metadata_indexer.storage._revisions[rev_id]['directory'] + metadata_indexer.storage._revisions[rev_id]['directory'] = b'123456' + metadata_indexer.storage.directory_add([{ + 'id': b'123456', + 'entries': [{ + 'target': subdir_id, + 'type': 'dir', + 'length': None, + 'name': b'foobar-1.0.0', + 'sha1': None, + 'perms': 16384, + 'sha1_git': None, + 'status': None, + 'sha256': None + }], + }]) + + tool = metadata_indexer.idx_storage.indexer_configuration_get( + {'tool_'+k: v for (k, v) in TRANSLATOR_TOOL.items()}) + assert tool is not None + + metadata_indexer.idx_storage.content_metadata_add([{ + 'indexer_configuration_id': tool['id'], + 'id': b'cde', + 'metadata': YARN_PARSER_METADATA, + }]) + + sha1_gits = [ + hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), + ] + metadata_indexer.run(sha1_gits, 'update-dups') + + results = list( + metadata_indexer.idx_storage. + revision_intrinsic_metadata_get(sha1_gits)) + + expected_results = [{ + 'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), + 'tool': TRANSLATOR_TOOL, + 'metadata': YARN_PARSER_METADATA, + 'mappings': ['npm'], + }] + + for result in results: + del result['tool']['id'] + + # then + self.assertEqual(expected_results, results)