#!/usr/bin/env python3

# Copyright (C) 2021  The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information

"""Merge multiple ORC files into a single one."""

import argparse

import pyorc


def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "-o", "--output", type=argparse.FileType(mode="wb"), required=True
    )
    parser.add_argument("files", type=argparse.FileType(mode="rb"), nargs="+")
    args = parser.parse_args()

    schema = str(pyorc.Reader(args.files[0]).schema)

    with pyorc.Writer(args.output, schema) as writer:
        for i, f in enumerate(args.files):
            reader = pyorc.Reader(f)
            if str(reader.schema) != schema:
                raise RuntimeError(
                    "Inconsistent ORC schemas.\n"
                    "\tFirst file schema: {}\n"
                    "\tFile #{} schema: {}".format(schema, i, str(reader.schema))
                )
            for line in reader:
                writer.write(line)


if __name__ == "__main__":
    main()
