Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Paste
P345
(An Untitled Masterwork)
Active
Public
Actions
Authored by
seirl
on Jan 7 2019, 7:15 PM.
Edit Paste
Archive Paste
View Raw File
Subscribe
Mute Notifications
Award Token
Flag For Later
Tags
None
Subscribers
None
-- DONE
CREATE
TABLE
ctas_dataset_dir_to_rev
WITH
(
format
=
'TEXTFILE'
,
external_location
=
's3://softwareheritage/edges_dataset/dir_to_rev/'
,
field_delimiter
=
' '
)
AS
SELECT
to_hex
(
directory
.
id
)
as
source
,
to_hex
(
target
)
as
dest
FROM
directory
CROSS
JOIN
UNNEST
(
rev_entries
)
as
t
(
rev_id
)
INNER
JOIN
directory_entry_rev
ON
directory_entry_rev
.
id
=
rev_id
;
-- TODO: error:
-- Query exhausted resources at this scale factor.
CREATE
TABLE
ctas_dataset_dir_to_file
WITH
(
format
=
'TEXTFILE'
,
external_location
=
's3://softwareheritage/edges_dataset/dir_to_file/'
,
field_delimiter
=
' '
)
AS
SELECT
to_hex
(
directory
.
id
)
as
source
,
to_hex
(
target
)
as
dest
FROM
directory
CROSS
JOIN
UNNEST
(
file_entries
)
as
t
(
file_id
)
INNER
JOIN
directory_entry_file
ON
directory_entry_file
.
id
=
file_id
;
-- TODO: error:
-- Query exhausted resources at this scale factor.
CREATE
TABLE
ctas_dataset_dir_to_dir
WITH
(
format
=
'TEXTFILE'
,
external_location
=
's3://softwareheritage/edges_dataset/dir_to_dir/'
,
field_delimiter
=
' '
)
AS
SELECT
to_hex
(
directory
.
id
)
as
source
,
to_hex
(
target
)
as
dest
FROM
directory
CROSS
JOIN
UNNEST
(
dir_entries
)
as
t
(
dir_id
)
INNER
JOIN
directory_entry_dir
ON
directory_entry_dir
.
id
=
dir_id
;
-- DONE
CREATE
TABLE
ctas_dataset_rev_to_dir
WITH
(
format
=
'TEXTFILE'
,
external_location
=
's3://softwareheritage/edges_dataset/rev_to_dir/'
,
field_delimiter
=
' '
)
AS
SELECT
to_hex
(
id
)
as
source
,
to_hex
(
directory
)
as
dest
FROM
revision
;
-- DONE
CREATE
TABLE
ctas_dataset_revision
WITH
(
format
=
'TEXTFILE'
,
external_location
=
's3://softwareheritage/edges_dataset/revision/'
,
field_delimiter
=
' '
)
AS
SELECT
to_hex
(
parent_id
)
as
source
,
to_hex
(
id
)
as
dest
FROM
revision_history
;
-- DONE
CREATE
TABLE
ctas_dataset_release
WITH
(
format
=
'TEXTFILE'
,
external_location
=
's3://softwareheritage/edges_dataset/release/'
,
field_delimiter
=
' '
)
AS
SELECT
to_hex
(
id
)
as
source
,
to_hex
(
target
)
as
dest
FROM
release
;
-- DONE
CREATE
TABLE
ctas_dataset_snapshot
WITH
(
format
=
'TEXTFILE'
,
external_location
=
's3://softwareheritage/edges_dataset/snapshot/'
,
field_delimiter
=
' '
)
AS
SELECT
to_hex
(
snapshot
.
id
)
as
source
,
to_hex
(
snapshot_branch
.
target
)
as
dest
FROM
snapshot_branch
INNER
JOIN
snapshot_branches
on
snapshot_branches
.
branch_id
=
snapshot_branch
.
object_id
INNER
JOIN
snapshot
on
snapshot_branches
.
snapshot_id
=
snapshot
.
object_id
;
-- TODO: error:
-- HIVE_BAD_DATA: Field snapshot_id's type DOUBLE in parquet is incompatible
-- with type bigint defined in table schema.
CREATE
TABLE
ctas_dataset_origin
WITH
(
format
=
'TEXTFILE'
,
external_location
=
's3://softwareheritage/edges_dataset/origin/'
,
field_delimiter
=
' '
)
AS
SELECT
origin
as
source
,
to_hex
(
snapshot
.
id
)
as
dest
FROM
origin_visit
INNER
JOIN
snapshot
on
snapshot_id
=
snapshot
.
object_id
;
Event Timeline
seirl
created this paste.
Jan 7 2019, 7:15 PM
2019-01-07 19:15:55 (UTC+1)
Log In to Comment