Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9314150
orc-merge
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
1 KB
Subscribers
None
orc-merge
View Options
#!/usr/bin/env python3
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Merge multiple ORC files into a single one."""
import
argparse
import
pyorc
def
main
():
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
.
add_argument
(
"-o"
,
"--output"
,
type
=
argparse
.
FileType
(
mode
=
"wb"
),
required
=
True
)
parser
.
add_argument
(
"files"
,
type
=
argparse
.
FileType
(
mode
=
"rb"
),
nargs
=
"+"
)
args
=
parser
.
parse_args
()
schema
=
str
(
pyorc
.
Reader
(
args
.
files
[
0
])
.
schema
)
with
pyorc
.
Writer
(
args
.
output
,
schema
)
as
writer
:
for
i
,
f
in
enumerate
(
args
.
files
):
reader
=
pyorc
.
Reader
(
f
)
if
str
(
reader
.
schema
)
!=
schema
:
raise
RuntimeError
(
"Inconsistent ORC schemas.
\n
"
"
\t
First file schema:
{}
\n
"
"
\t
File #
{}
schema:
{}
"
.
format
(
schema
,
i
,
str
(
reader
.
schema
))
)
for
line
in
reader
:
writer
.
write
(
line
)
if
__name__
==
"__main__"
:
main
()
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Thu, Jul 3, 12:13 PM (3 d, 56 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3239343
Attached To
rDDATASET Datasets
Event Timeline
Log In to Comment