diff --git a/vlorentz/test_mappings.py b/vlorentz/test_mappings.py new file mode 100644 index 0000000..4336a20 --- /dev/null +++ b/vlorentz/test_mappings.py @@ -0,0 +1,30 @@ +import os +import os.path +import pprint + +from swh.indexer.metadata_dictionary import MAPPINGS + +BASE_DIR = os.path.expanduser('~/datasets/') + +CATEGORIES = { + ('GemspecMapping', 'gemspec'), + ('PythonPkginfoMapping', 'pkginfo'), +} + +def test_category(mapping_name, category): + mapping = MAPPINGS[mapping_name] + dataset_dir = os.path.join(BASE_DIR, category) + + for filename in os.listdir(dataset_dir): + path = os.path.join(dataset_dir, filename) + with open(path, 'rb') as fd: + file_content = fd.read() + print('Parsing {}:'.format(path)) + pprint.pprint(mapping.translate(file_content)) + +def main(): + for (mapping_name, category) in CATEGORIES: + test_category(mapping_name, category) + +if __name__ == '__main__': + main()