diff --git a/vlorentz/get_gemspec_examples.py b/vlorentz/get_gemspec_examples.py deleted file mode 100644 index 699931f..0000000 --- a/vlorentz/get_gemspec_examples.py +++ /dev/null @@ -1,60 +0,0 @@ -import csv -import os -import os.path -import subprocess - -from swh.model.hashutil import hash_to_bytes -from swh.objstorage.api.client import RemoteObjStorage -from swh.objstorage.exc import ObjNotFoundError - -BASE_DIR = os.path.expanduser('~/datasets/') - -GEMSPEC_LIST_QUERY = """ -select encode(name, 'escape'), encode(sha1, 'hex') -from directory_entry_file -inner join content on (sha1_git = target) -where encode(name, 'escape') like '%.gemspec' -order by id desc -limit 100;""" -GEMSPECS_DIR = os.path.join(BASE_DIR, 'gemspecs') -GEMSPEC_LIST_PATH = os.path.join(BASE_DIR, 'gemspec_list.csv') - -objstorage_client = RemoteObjStorage( - url='http://uffizi.internal.softwareheritage.org:5003/') - -def get_gemspec_list(): - if os.path.isfile(GEMSPEC_LIST_PATH): - print('Using cached gemspec list') - return - else: - print('Getting gemspec list') - csv = subprocess.check_output([ - 'psql', 'service=swh-replica', '-c', GEMSPEC_LIST_QUERY, - '-t', '-A', '-F,']) - print('Done') - with open(GEMSPEC_LIST_PATH, 'wb') as fd: - fd.write(csv) - -def download_gemspecs(): - with open(GEMSPEC_LIST_PATH) as fd: - for (name, sha1) in csv.reader(fd): - download_gemspec(name, sha1) - -def download_gemspec(name, sha1): - path = os.path.join(GEMSPECS_DIR, '{}_{}'.format(name, sha1)) - if os.path.isfile(path): - print('{} {}:\tskipped (already have it)'.format(name, sha1)) - return - try: - obj = objstorage_client.get(sha1) - except ObjNotFoundError: - print('{} {}:\tnot in objstorage'.format(sha1, name)) - return - else: - print('{} {}:\tdownloaded'.format(sha1, name)) - with open(path, 'wb') as fd: - fd.write(obj) - -os.makedirs(GEMSPECS_DIR, exist_ok=True) -get_gemspec_list() -download_gemspecs() diff --git a/vlorentz/run_afl.txt b/vlorentz/run_afl.txt new file mode 100644 index 0000000..4c98166 --- /dev/null +++ b/vlorentz/run_afl.txt @@ -0,0 +1,3 @@ +py-afl-fuzz -t 2000 -m 200 -i ~/datasets/gemspec -o fuzzing-results -- python3 ~/swh-environment/snippets/vlorentz/run_mapping.py GemspecMapper @@ + +py-afl-fuzz -t 2000 -m 200 -i ~/datasets/pkginfo -o fuzzing-results -- python3 ~/swh-environment/snippets/vlorentz/run_mapping.py GemspecMapper @@ diff --git a/vlorentz/run_mapping.py b/vlorentz/run_mapping.py new file mode 100644 index 0000000..cfff06c --- /dev/null +++ b/vlorentz/run_mapping.py @@ -0,0 +1,42 @@ +import os +import os.path +import pprint +import sys +import traceback + +import afl + +#afl.init() + +from swh.indexer.metadata_dictionary import MAPPINGS + +try: + mapping_name = sys.argv[1] + file_name = sys.argv[2] +except KeyError: + print('Syntax: ./run_mapping.py {GemspecMapping,NpmMapping,...}') + exit(1) + +#afl.init() +while afl.loop(1000): +#if True: + #sys.stdin.buffer.seek(0) + #file_content = sys.stdin.buffer.read() + with open(file_name, 'rb') as fd: + file_content = fd.read() + """ + with open('/tmp/data', 'ab') as fd: + fd.write(repr(file_content).encode() + b'\n') + fd.write(repr(sys.argv).encode() + b'\n')""" + assert b'abcd' not in file_content + try: + MAPPINGS[mapping_name].translate(file_content) + except: + with open('/tmp/tb.txt', 'a') as fd: + fd.write('tb:\n') + fd.write(repr(list(MAPPINGS)) + '\n') + fd.write(traceback.format_exc()) + fd.flush() + raise + +os._exit(0)