Page MenuHomeSoftware Heritage

orc-merge
No OneTemporary

orc-merge

#!/usr/bin/env python3
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Merge multiple ORC files into a single one."""
import argparse
import pyorc
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"-o", "--output", type=argparse.FileType(mode="wb"), required=True
)
parser.add_argument("files", type=argparse.FileType(mode="rb"), nargs="+")
args = parser.parse_args()
schema = str(pyorc.Reader(args.files[0]).schema)
with pyorc.Writer(args.output, schema) as writer:
for i, f in enumerate(args.files):
reader = pyorc.Reader(f)
if str(reader.schema) != schema:
raise RuntimeError(
"Inconsistent ORC schemas.\n"
"\tFirst file schema: {}\n"
"\tFile #{} schema: {}".format(schema, i, str(reader.schema))
)
for line in reader:
writer.write(line)
if __name__ == "__main__":
main()

File Metadata

Mime Type
text/x-python
Expires
Thu, Jul 3, 12:13 PM (3 d, 56 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3239343

Event Timeline