-- Import the Software Heritage License Dataset into a SQLite database -- Sample usage: "sqlite3 licenses.sqlite '.read import-dataset.sql'" -- Related: https://forge.softwareheritage.org/T4683 DROP TABLE IF EXISTS "license_blobs" ; CREATE TABLE "license_blobs" ("swhid" TEXT, "sha1" TEXT, "name" TEXT); .import --csv --skip 1 "| zstdcat license-blobs.csv.zst" "license_blobs" CREATE INDEX idx_license_blobs_sha1 ON license_blobs (sha1); CREATE INDEX idx_license_blobs_swhid ON license_blobs (swhid); DROP TABLE IF EXISTS "blobs_earliest"; CREATE TABLE "blobs_earliest" ("swhid" TEXT, "commit" TEXT, "timestamp" INTEGER, "occurrences" INTEGER); .mode tabs .import --skip 1 "| zstdcat blobs-earliest.csv.zst" "blobs_earliest" .mode list CREATE INDEX idx_blobs_earliest_swhid ON blobs_earliest (swhid); DROP TABLE IF EXISTS "blobs_fileinfo"; CREATE TABLE "blobs_fileinfo" ("sha1" TEXT, "mime_type" TEXT, "encoding" TEXT, "line_count" INTEGER, "word_count" INTEGER, "size" INTEGER); .import --csv --skip 1 "| zstdcat blobs-fileinfo.csv.zst" "blobs_fileinfo" CREATE INDEX idx_blobs_fileinfo_sha1 ON blobs_fileinfo (sha1); DROP TABLE IF EXISTS "blobs_nb_origins"; CREATE TABLE "blobs_nb_origins" ("swhid" TEXT, "count" INTEGER); .mode tabs .import "| zstdcat blobs-nb-origins.csv.zst" "blobs_nb_origins" .mode list CREATE INDEX idx_blobs_nb_origins_swhid ON blobs_nb_origins (swhid); DROP TABLE IF EXISTS "blobs_origins"; CREATE TABLE "blobs_origins" ("swhid" TEXT, "url" TEXT); .mode tabs .import "| zstdcat blobs-origins.csv.zst" "blobs_origins" .mode list CREATE INDEX idx_blobs_origins_swhid ON blobs_origins (swhid); DROP TABLE IF EXISTS "blobs_scancode"; CREATE TABLE "blobs_scancode" ("sha1" TEXT, "license" TEXT, score REAL); .import --csv --skip 1 "| zstdcat blobs-scancode.csv.zst" "blobs_scancode" CREATE INDEX idx_blobs_scancode_sha1 ON blobs_scancode (sha1);