diff --git a/tools/git2graph/Makefile b/tools/git2graph/Makefile index 958ea0d..2b804f7 100644 --- a/tools/git2graph/Makefile +++ b/tools/git2graph/Makefile @@ -1,25 +1,25 @@ CC = gcc LD = gcc BATS = bats -LIBS = libgit2 +LIBS = libgit2 glib-2.0 -CFLAGS = -Wall -Werror $(shell pkg-config --cflags $(LIBS)) +CFLAGS = -Wall -Werror $(shell pkg-config --cflags $(LIBS)) -g LDFLAGS = $(shell pkg-config --libs $(LIBS)) BATS_FLAGS = all: git2graph git2graph: git2graph.o $(LD) -o $@ $^ $(CFLAGS) $(LDFLAGS) %.o: %.c $(CC) $(CFLAGS) -c $< test: all $(BATS) $(BATS_FLAGS) tests/ clean: rm -f *.o git2graph .PHONY: all clean test diff --git a/tools/git2graph/git2graph.c b/tools/git2graph/git2graph.c index 2d058f6..45dcd56 100644 --- a/tools/git2graph/git2graph.c +++ b/tools/git2graph/git2graph.c @@ -1,388 +1,514 @@ /* * Copyright (C) 2019 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU General Public License version 3, or any later version * See top-level LICENSE file for more information */ /* Crawl a Git repository and output it as a graph, i.e., as a pair of textual * files . The nodes file will contain a list of graph nodes as * Software Heritage (SWH) Persistent Identifiers (PIDs); the edges file a list * of graph edges as PID pairs. */ #include #include #include #include #include #include #include #include #include +#include #define SWH_PREFIX "swh:1" #define SWH_DIR "swh:1:dir" #define SWH_REV "swh:1:rev" #define SWH_PIDSZ (GIT_OID_HEXSZ + 10) // size of a SWH PID // line-lengths in nodes and edges file #define NODES_LINELEN (SWH_PIDSZ + 1) #define EDGES_LINELEN (SWH_PIDSZ * 2 + 2) // Output buffer sizes for nodes and edges files. To guarantee atomic and // non-interleaved writes (which matter when used concurrently writing to a // shared FIFO), these sizes must be <= PIPE_BUF and multiples of // {NODES,EDGES}_LINELEN. #define NODES_OUTSZ ((PIPE_BUF / NODES_LINELEN) * NODES_LINELEN) #define EDGES_OUTSZ ((PIPE_BUF / EDGES_LINELEN) * EDGES_LINELEN) // GIT_OBJ_* constants extension for non-git objects #define SWH_OBJ_SNP 5 // snapshots (swh:1:snp:...) #define SWH_OBJ_ORI 6 // origins (swh:1:ori:...) #define SWH_OBJ_LOC 7 // lines of code (swh:1:loc:...) #define OBJ_TYPES 8 +#define ELT_SEP "," // element separator in lists +#define PAIR_SEP ":" // key/value separator in paris + /* map from libgit2's git_otype (+ SWH-specific types above) to SWH PID type * qualifiers */ static char *_git_otype2swh[OBJ_TYPES] = { - "ERR", // 0 == GIT_OBJ__EXT1 (unused) + "*", // 0 == GIT_OBJ__EXT1 (unused in libgit2, used as wildcard here) "rev", // 1 == GIT_OBJ_COMMIT "dir", // 2 == GIT_OBJ_TREE "cnt", // 3 == GIT_OBJ_BLOB "rel", // 4 == GIT_OBJ_TAG "snp", // 5 == SWH_OBJ_SNP "ori", // 6 == SWH_OBJ_ORI "loc", // 7 == SWH_OBJ_LOC }; +#define GIT_OBJ_ANY GIT_OBJ__EXT1 + /* Convert a git object type (+ SWH-specific types above) to the corresponding * SWH PID type. */ #define git_otype2swh(type) _git_otype2swh[(type)] +/* Parse object type (libgit's + SWH-specific types) from 3-letter type + * qualifiers. Return either object type, or 0 in case of "*" wildcard, or -1 + * in case of parse error. */ +int parse_otype(char *str) { + for (int i = 0; i < OBJ_TYPES; i++) { + if (strcmp(str, _git_otype2swh[i]) == 0) + return i; + } + return -1; +} + /* Allowed edge types matrix. Each cell denotes whether edges from a given * SRC_TYPE to a given DST_TYPE should be produced or not. */ static int _allowed_edges[OBJ_TYPES][OBJ_TYPES] = { // TO rev dir cnt rel snp ori loc | // ---------------------------------------------------------------- {true, true, true, true, true, true, true, true}, // | FROM {true, true, true, true, true, true, true, true}, // | rev {true, true, true, true, true, true, true, true}, // | dir {true, true, true, true, true, true, true, true}, // | cnt {true, true, true, true, true, true, true, true}, // | rel {true, true, true, true, true, true, true, true}, // | snp {true, true, true, true, true, true, true, true}, // | ori {true, true, true, true, true, true, true, true}, // | loc }; /* Allowed node type vector. */ static int _allowed_nodes[OBJ_TYPES] = { true, // true, // rev true, // dir true, // cnt true, // rel true, // snp true, // ori true, // loc }; #define is_edge_allowed(src_type, dst_type) _allowed_edges[(src_type)][(dst_type)] #define is_node_allowed(type) _allowed_nodes[(type)] /* extra payload for callback invoked on Git objects */ typedef struct { git_odb *odb; // Git object DB git_repository *repo; // Git repository FILE *nodes_out; // stream to write nodes to, or NULL FILE *edges_out; // stream to write edges to, or NULL } cb_payload; /* Invoke a libgit2 method and exits with an error message in case of * failure. * * Reused from libgit2 examples, specifically common.c, available under CC0. */ void check_lg2(int error, const char *message, const char *extra) { const git_error *lg2err; const char *lg2msg = "", *lg2spacer = ""; if (!error) return; if ((lg2err = giterr_last()) != NULL && lg2err->message != NULL) { lg2msg = lg2err->message; lg2spacer = " - "; } if (extra) fprintf(stderr, "%s '%s' [%d]%s%s\n", message, extra, error, lg2spacer, lg2msg); else fprintf(stderr, "%s [%d]%s%s\n", message, error, lg2spacer, lg2msg); exit(1); } /* Compute allowed node types base on allowed edge types, which is a sane * default. The result should be overridden in case one wants to output * specific nodes, but not their outgoing edges. */ void init_allowed_nodes_from_edges( int allowed_edges[OBJ_TYPES][OBJ_TYPES], int allowed_nodes[OBJ_TYPES]) { - for (int src_type = 0; src_type < OBJ_TYPES; src_type++) { - allowed_nodes[src_type] = false; - for (int dst_type = 0; dst_type < OBJ_TYPES; dst_type++) { - allowed_nodes[src_type] = allowed_nodes[src_type] \ - || allowed_edges[src_type][dst_type]; - } + for (int i = 0; i < OBJ_TYPES; i++) { + allowed_nodes[i] = false; // disallowed by default + // allowed if either a edge can originate from it... + for (int src_type = 0; src_type < OBJ_TYPES; src_type++) + allowed_nodes[i] = allowed_nodes[i] \ + || allowed_edges[src_type][i]; + // ...or lead to it + for (int dst_type = 0; dst_type < OBJ_TYPES; dst_type++) + allowed_nodes[i] = allowed_nodes[i] \ + || allowed_edges[i][dst_type]; } } /* Emit commit edges. */ void emit_commit_edges(const git_commit *commit, const char *swhpid, FILE *out) { unsigned int i, max_i; char oidstr[GIT_OID_HEXSZ + 1]; // to PID // rev -> dir if (is_edge_allowed(GIT_OBJ_COMMIT, GIT_OBJ_TREE)) { git_oid_tostr(oidstr, sizeof(oidstr), git_commit_tree_id(commit)); fprintf(out, "%s %s:%s\n", swhpid, SWH_DIR, oidstr); } // rev -> rev if (is_edge_allowed(GIT_OBJ_COMMIT, GIT_OBJ_COMMIT)) { max_i = (unsigned int)git_commit_parentcount(commit); for (i = 0; i < max_i; ++i) { git_oid_tostr(oidstr, sizeof(oidstr), git_commit_parent_id(commit, i)); fprintf(out, "%s %s:%s\n", swhpid, SWH_REV, oidstr); } } } /* Emit tag edges. */ void emit_tag_edges(const git_tag *tag, const char *swhpid, FILE *out) { char oidstr[GIT_OID_HEXSZ + 1]; int target_type; // rel -> * target_type = git_tag_target_type(tag); if (is_edge_allowed(GIT_OBJ_TAG, target_type)) { git_oid_tostr(oidstr, sizeof(oidstr), git_tag_target_id(tag)); fprintf(out, "%s %s:%s:%s\n", swhpid, SWH_PREFIX, git_otype2swh(target_type), oidstr); } } /* Emit tree edges. */ void emit_tree_edges(const git_tree *tree, const char *swhpid, FILE *out) { size_t i, max_i = (int)git_tree_entrycount(tree); char oidstr[GIT_OID_HEXSZ + 1]; const git_tree_entry *te; int entry_type; // dir -> * for (i = 0; i < max_i; ++i) { te = git_tree_entry_byindex(tree, i); entry_type = git_tree_entry_type(te); if (is_edge_allowed(GIT_OBJ_TREE, entry_type)) { git_oid_tostr(oidstr, sizeof(oidstr), git_tree_entry_id(te)); fprintf(out, "%s %s:%s:%s\n", swhpid, SWH_PREFIX, git_otype2swh(entry_type), oidstr); } } } /* Emit node and edges for current object. */ int emit_obj(const git_oid *id, void *payload) { char oidstr[GIT_OID_HEXSZ + 1]; char swhpid[SWH_PIDSZ + 1]; size_t len; int obj_type; git_commit *commit; git_tag *tag; git_tree *tree; git_odb *odb = ((cb_payload *) payload)->odb; git_repository *repo = ((cb_payload *) payload)->repo; FILE *nodes_out = ((cb_payload *) payload)->nodes_out; FILE *edges_out = ((cb_payload *) payload)->edges_out; check_lg2(git_odb_read_header(&len, &obj_type, odb, id), "cannot read object header", NULL); if (!is_node_allowed(obj_type)) // no outbound edges allowed, skip node return 0; // emit node sprintf(swhpid, "swh:1:%s:", git_otype2swh(obj_type)); git_oid_tostr(swhpid + 10, sizeof(oidstr), id); if (nodes_out != NULL) fprintf(nodes_out, "%s\n", swhpid); // emit edges if (edges_out != NULL) { switch (obj_type) { case GIT_OBJ_BLOB: // graph leaf: no edges to emit break; case GIT_OBJ_COMMIT: check_lg2(git_commit_lookup(&commit, repo, id), "cannot find commit", NULL); emit_commit_edges(commit, swhpid, edges_out); git_commit_free(commit); break; case GIT_OBJ_TAG: check_lg2(git_tag_lookup(&tag, repo, id), "cannot find tag", NULL); emit_tag_edges(tag, swhpid, edges_out); git_tag_free(tag); break; case GIT_OBJ_TREE: check_lg2(git_tree_lookup(&tree, repo, id), "cannot find tree", NULL); emit_tree_edges(tree, swhpid, edges_out); git_tree_free(tree); break; default: git_oid_tostr(oidstr, sizeof(oidstr), id); fprintf(stderr, "ignoring unknown object: %s\n", oidstr); break; } } return 0; } void exit_usage(char *msg) { if (msg != NULL) fprintf(stderr, "Error: %s\n\n", msg); fprintf(stderr, "Usage: git2graph [OPTION..] GIT_REPO_DIR\n"); fprintf(stderr, "\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, " -e, --edges-file=PATH file where to store edges\n"); fprintf(stderr, " -n, --nodes-file=PATH file where to store nodes\n"); + fprintf(stderr, " -E, --edges-filter=EDGES_EXPR only emit selected edges\n"); + fprintf(stderr, " -N, --nodes-filter=NODES_EXPR only emit selected nodes\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "EDGES_EXPR is a comma separate list of src_TYPE:dst_TYPE pairs\n"); + fprintf(stderr, "NODES_EXPR is a comme separate list of node TYPEs\n"); + fprintf(stderr, "TYPE is one of: cnt, dir, loc, ori, rel, rev, snp\n"); fprintf(stderr, "\nNote: you can use \"-\" for stdout in file names.\n"); exit(EXIT_FAILURE); } /* command line arguments */ typedef struct { char *nodes_path; char *edges_path; + char *nodes_filter; + char *edges_filter; char *repo_dir; } cli_args; cli_args *parse_cli(int argc, char **argv) { int opt; cli_args *args = malloc(sizeof(cli_args)); if (args == NULL) { perror("Cannot allocate memory."); exit(EXIT_FAILURE); } else { args->nodes_path = NULL; args->edges_path = NULL; + args->nodes_filter = NULL; + args->edges_filter = NULL; args->repo_dir = NULL; } static struct option long_opts[] = { {"edges-file", required_argument, 0, 'e' }, {"nodes-file", required_argument, 0, 'n' }, + {"edges-filter", required_argument, 0, 'E' }, + {"nodes-filter", required_argument, 0, 'N' }, {"help", no_argument, 0, 'h' }, {0, 0, 0, 0 } }; - while ((opt = getopt_long(argc, argv, "e:n:h", long_opts, + while ((opt = getopt_long(argc, argv, "e:n:E:N:h", long_opts, NULL)) != -1) { switch (opt) { case 'e': args->edges_path = optarg; break; case 'n': args->nodes_path = optarg; break; + case 'E': args->edges_filter = optarg; break; + case 'N': args->nodes_filter = optarg; break; case 'h': default: exit_usage(NULL); } } if (argv[optind] == NULL) exit_usage(NULL); args->repo_dir = argv[optind]; return args; } /* open output stream specified on the command line (if at all) */ FILE *open_out_stream(char *cli_path, char *buf, int bufsiz) { FILE *stream; if (cli_path == NULL) stream = NULL; else if (strcmp(cli_path, "-") == 0) stream = stdout; else if((stream = fopen(cli_path, "w")) == NULL) { fprintf(stderr, "can't open file: %s\n", cli_path); exit(EXIT_FAILURE); } // ensure atomic and non-interleaved writes if (stream != NULL) setvbuf(stream, buf, _IOFBF, bufsiz); return stream; } +void fill_matrix(int matrix[OBJ_TYPES][OBJ_TYPES], int val) { + for (int i = 0; i < OBJ_TYPES; i++) + for (int j = 0; j < OBJ_TYPES; j++) + matrix[i][j] = val; +} + + +void fill_row(int matrix[OBJ_TYPES][OBJ_TYPES], int row, int val) { + for (int j = 0; j < OBJ_TYPES; j++) + matrix[row][j] = val; +} + + +void fill_column(int matrix[OBJ_TYPES][OBJ_TYPES], int col, int val) { + for (int i = 0; i < OBJ_TYPES; i++) + matrix[i][col] = val; +} + + +void fill_vector(int vector[OBJ_TYPES], int val) { + for (int i = 0; i < OBJ_TYPES; i++) + vector[i] = val; +} + + +/* Dump node/edge filters to a given stream. For debugging purposes. */ +void _dump_filters(FILE *out, int matrix[OBJ_TYPES][OBJ_TYPES], int vector[OBJ_TYPES]) { + fprintf(out, "TO rev dir cnt rel snp ori loc FROM\n"); + for(int i = 0; i < OBJ_TYPES; i++) { + for(int j = 0; j < OBJ_TYPES; j++) + fprintf(out, "%d ", matrix[i][j]); + fprintf(out, "%s\n", _git_otype2swh[i]); + } + + fprintf(out, " rev dir cnt rel snp ori loc\n"); + for (int i = 0; i < OBJ_TYPES; i++) + fprintf(out, "%d ", vector[i]); +} + + +/* set up nodes and edges restrictions, interpreting command line filters */ +void init_graph_filters(char *nodes_filter, char *edges_filter) { + char **filters; + char **types; + char **ptr; + int src_type, dst_type; + + if (edges_filter != NULL) { + fill_matrix(_allowed_edges, false); // nothing allowed by default + filters = g_strsplit(edges_filter, ELT_SEP, -1); // "typ:typ" pairs + for (ptr = filters; *ptr; ptr++) { + types = g_strsplit(*ptr, PAIR_SEP, 2); // 2 "typ" fragments + + src_type = parse_otype(types[0]); + dst_type = parse_otype(types[1]); + if (src_type == GIT_OBJ_ANY && dst_type == GIT_OBJ_ANY) { + // "*:*" wildcard + fill_matrix(_allowed_edges, true); + break; // all edges allowed already + } else if (src_type == GIT_OBJ_ANY) { // "*:typ" wildcard + fill_column(_allowed_edges, dst_type, true); + } else if (dst_type == GIT_OBJ_ANY) { // "typ:*" wildcard + fill_row(_allowed_edges, src_type, true); + } else // "src_type:dst_type" + _allowed_edges[src_type][dst_type] = true; + + g_strfreev(types); + } + g_strfreev(filters); + } + + if (nodes_filter != NULL) { + fill_vector(_allowed_nodes, false); // nothing allowed by default + filters = g_strsplit(nodes_filter, ELT_SEP, -1); // "typ" fragments + for (ptr = filters; *ptr; ptr++) { + src_type = parse_otype(*ptr); + if (src_type == GIT_OBJ_ANY) { // "*" wildcard + fill_vector(_allowed_nodes, true); + break; // all nodes allowed already + } else + _allowed_nodes[src_type] = true; + } + g_strfreev(filters); + } else { // no explicit node filtering request, derive allowed nodes + // from allowed edges + init_allowed_nodes_from_edges(_allowed_edges, _allowed_nodes); + } +} + + int main(int argc, char **argv) { git_repository *repo; git_odb *odb; int rc; cli_args *args; cb_payload *payload; FILE *nodes_out, *edges_out; char nodes_buf[NODES_OUTSZ]; char edges_buf[EDGES_OUTSZ]; args = parse_cli(argc, argv); - init_allowed_nodes_from_edges(_allowed_edges, _allowed_nodes); + init_graph_filters(args->nodes_filter, args->edges_filter); + // _dump_filters(stdout, _allowed_edges, _allowed_nodes); git_libgit2_init(); check_lg2(git_repository_open(&repo, args->repo_dir), "cannot open repository", NULL); check_lg2(git_repository_odb(&odb, repo), "cannot get object DB", NULL); nodes_out = open_out_stream(args->nodes_path, nodes_buf, NODES_OUTSZ); edges_out = open_out_stream(args->edges_path, edges_buf, EDGES_OUTSZ); assert(NODES_OUTSZ <= PIPE_BUF && (NODES_OUTSZ % NODES_LINELEN == 0)); assert(EDGES_OUTSZ <= PIPE_BUF && (EDGES_OUTSZ % EDGES_LINELEN == 0)); payload = malloc(sizeof(cb_payload)); payload->odb = odb; payload->repo = repo; payload->nodes_out = nodes_out; payload->edges_out = edges_out; rc = git_odb_foreach(odb, emit_obj, payload); check_lg2(rc, "failure during object iteration", NULL); git_odb_free(odb); git_repository_free(repo); free(payload); exit(rc); } diff --git a/tools/git2graph/tests/data/graphs/directories/edges.csv b/tools/git2graph/tests/data/graphs/directories/edges.csv new file mode 100644 index 0000000..50a3f55 --- /dev/null +++ b/tools/git2graph/tests/data/graphs/directories/edges.csv @@ -0,0 +1,16 @@ +swh:1:dir:0f9566327353acd6cba286508a56e71376fcfda3 swh:1:cnt:100b0dec8c53a40e4de7714b2c612dad5fad9985 +swh:1:dir:0f9566327353acd6cba286508a56e71376fcfda3 swh:1:cnt:257cc5642cb1a054f08cc83f2d943e56fd3ebe99 +swh:1:dir:0f9566327353acd6cba286508a56e71376fcfda3 swh:1:cnt:5716ca5987cbf97d6bb54920bea6adde242d87e6 +swh:1:dir:205f6b799e7d5c2524468ca006a0131aa57ecce7 swh:1:cnt:257cc5642cb1a054f08cc83f2d943e56fd3ebe99 +swh:1:dir:2312eb97a90b5e561508b4197c89f092f8fd5ef8 swh:1:cnt:5716ca5987cbf97d6bb54920bea6adde242d87e6 +swh:1:dir:2312eb97a90b5e561508b4197c89f092f8fd5ef8 swh:1:cnt:b210800439ffe3f2db0d47d9aab1969b38a770a5 +swh:1:dir:5917a22fb466d2088d926749b7362836f3f05687 swh:1:cnt:1fe912cdd835ae6be5feb79acafaa5fa8ea60f23 +swh:1:dir:5917a22fb466d2088d926749b7362836f3f05687 swh:1:cnt:b210800439ffe3f2db0d47d9aab1969b38a770a5 +swh:1:dir:89ff1a2aefcbff0f09197f0fd8beeb19a7b6e51c swh:1:cnt:257cc5642cb1a054f08cc83f2d943e56fd3ebe99 +swh:1:dir:89ff1a2aefcbff0f09197f0fd8beeb19a7b6e51c swh:1:cnt:5716ca5987cbf97d6bb54920bea6adde242d87e6 +swh:1:dir:a83dd64716d4b1afeb9821d2018ade21696a6d9c swh:1:cnt:1fe912cdd835ae6be5feb79acafaa5fa8ea60f23 +swh:1:dir:a83dd64716d4b1afeb9821d2018ade21696a6d9c swh:1:cnt:76018072e09c5d31c8c6e3113b8aa0fe625195ca +swh:1:dir:a83dd64716d4b1afeb9821d2018ade21696a6d9c swh:1:cnt:b210800439ffe3f2db0d47d9aab1969b38a770a5 +swh:1:dir:e03c0f3158ec6b1432c83e2c093a8a293a4f58e5 swh:1:cnt:257cc5642cb1a054f08cc83f2d943e56fd3ebe99 +swh:1:dir:e03c0f3158ec6b1432c83e2c093a8a293a4f58e5 swh:1:cnt:5716ca5987cbf97d6bb54920bea6adde242d87e6 +swh:1:dir:e03c0f3158ec6b1432c83e2c093a8a293a4f58e5 swh:1:cnt:76018072e09c5d31c8c6e3113b8aa0fe625195ca diff --git a/tools/git2graph/tests/data/graphs/directories/nodes.csv b/tools/git2graph/tests/data/graphs/directories/nodes.csv new file mode 100644 index 0000000..505096b --- /dev/null +++ b/tools/git2graph/tests/data/graphs/directories/nodes.csv @@ -0,0 +1,13 @@ +swh:1:cnt:100b0dec8c53a40e4de7714b2c612dad5fad9985 +swh:1:cnt:1fe912cdd835ae6be5feb79acafaa5fa8ea60f23 +swh:1:cnt:257cc5642cb1a054f08cc83f2d943e56fd3ebe99 +swh:1:cnt:5716ca5987cbf97d6bb54920bea6adde242d87e6 +swh:1:cnt:76018072e09c5d31c8c6e3113b8aa0fe625195ca +swh:1:cnt:b210800439ffe3f2db0d47d9aab1969b38a770a5 +swh:1:dir:0f9566327353acd6cba286508a56e71376fcfda3 +swh:1:dir:205f6b799e7d5c2524468ca006a0131aa57ecce7 +swh:1:dir:2312eb97a90b5e561508b4197c89f092f8fd5ef8 +swh:1:dir:5917a22fb466d2088d926749b7362836f3f05687 +swh:1:dir:89ff1a2aefcbff0f09197f0fd8beeb19a7b6e51c +swh:1:dir:a83dd64716d4b1afeb9821d2018ade21696a6d9c +swh:1:dir:e03c0f3158ec6b1432c83e2c093a8a293a4f58e5 diff --git a/tools/git2graph/tests/data/graphs/releases/edges.csv b/tools/git2graph/tests/data/graphs/releases/edges.csv new file mode 100644 index 0000000..1e3aab4 --- /dev/null +++ b/tools/git2graph/tests/data/graphs/releases/edges.csv @@ -0,0 +1,2 @@ +swh:1:rel:1720af781051a8cafdf3cf134c263ec5c5e72412 swh:1:rev:4d267d40bc0dbbfaf1f5096de9873ca42ae03039 +swh:1:rel:d48ad9915be780fcfa296985f69df35e144864a5 swh:1:rev:945cc4759b4cc02c7ed57bcafeea82f3656f7bc6 diff --git a/tools/git2graph/tests/data/graphs/releases/nodes.csv b/tools/git2graph/tests/data/graphs/releases/nodes.csv new file mode 100644 index 0000000..5e13ab1 --- /dev/null +++ b/tools/git2graph/tests/data/graphs/releases/nodes.csv @@ -0,0 +1,4 @@ +swh:1:rel:1720af781051a8cafdf3cf134c263ec5c5e72412 +swh:1:rel:d48ad9915be780fcfa296985f69df35e144864a5 +swh:1:rev:4d267d40bc0dbbfaf1f5096de9873ca42ae03039 +swh:1:rev:945cc4759b4cc02c7ed57bcafeea82f3656f7bc6 diff --git a/tools/git2graph/tests/data/graphs/revisions/edges.csv b/tools/git2graph/tests/data/graphs/revisions/edges.csv new file mode 100644 index 0000000..aaadab9 --- /dev/null +++ b/tools/git2graph/tests/data/graphs/revisions/edges.csv @@ -0,0 +1,7 @@ +swh:1:rev:20cca959bae94594f60450f339b408581f1b401f swh:1:rev:4d267d40bc0dbbfaf1f5096de9873ca42ae03039 +swh:1:rev:261586c455130b4bf10a5be7ffb0bf4077581b56 swh:1:rev:4d267d40bc0dbbfaf1f5096de9873ca42ae03039 +swh:1:rev:4d267d40bc0dbbfaf1f5096de9873ca42ae03039 swh:1:rev:bfbf4af79c903a8b2d8eacfacddef16467062fd9 +swh:1:rev:8fcfd562b8abe4573313d02e864b7df7d31537f6 swh:1:rev:945cc4759b4cc02c7ed57bcafeea82f3656f7bc6 +swh:1:rev:945cc4759b4cc02c7ed57bcafeea82f3656f7bc6 swh:1:rev:4d267d40bc0dbbfaf1f5096de9873ca42ae03039 +swh:1:rev:9bf3ce249cf3d74ef57d5a1fb4227e26818553f0 swh:1:rev:261586c455130b4bf10a5be7ffb0bf4077581b56 +swh:1:rev:9bf3ce249cf3d74ef57d5a1fb4227e26818553f0 swh:1:rev:8fcfd562b8abe4573313d02e864b7df7d31537f6 diff --git a/tools/git2graph/tests/data/graphs/revisions/nodes.csv b/tools/git2graph/tests/data/graphs/revisions/nodes.csv new file mode 100644 index 0000000..e324159 --- /dev/null +++ b/tools/git2graph/tests/data/graphs/revisions/nodes.csv @@ -0,0 +1,7 @@ +swh:1:rev:20cca959bae94594f60450f339b408581f1b401f +swh:1:rev:261586c455130b4bf10a5be7ffb0bf4077581b56 +swh:1:rev:4d267d40bc0dbbfaf1f5096de9873ca42ae03039 +swh:1:rev:8fcfd562b8abe4573313d02e864b7df7d31537f6 +swh:1:rev:945cc4759b4cc02c7ed57bcafeea82f3656f7bc6 +swh:1:rev:9bf3ce249cf3d74ef57d5a1fb4227e26818553f0 +swh:1:rev:bfbf4af79c903a8b2d8eacfacddef16467062fd9 diff --git a/tools/git2graph/tests/filters.bats b/tools/git2graph/tests/filters.bats new file mode 100644 index 0000000..6a010ce --- /dev/null +++ b/tools/git2graph/tests/filters.bats @@ -0,0 +1,18 @@ +#!/usr/bin/env bats + +load repo_helper + +@test "export revisions" { + run_git2graph "$TEST_REPO_DIR" "$TEST_TMPDIR" -E rev:rev -N rev + assert_equal_graphs ${DATA_DIR}/graphs/revisions ${TEST_TMPDIR} +} + +@test "export directories" { + run_git2graph "$TEST_REPO_DIR" "$TEST_TMPDIR" -E dir:* -N cnt,dir + assert_equal_graphs ${DATA_DIR}/graphs/directories ${TEST_TMPDIR} +} + +@test "export releases" { + run_git2graph "$TEST_REPO_DIR" "$TEST_TMPDIR" -E rel:* -N rel,rev + assert_equal_graphs ${DATA_DIR}/graphs/releases ${TEST_TMPDIR} +} diff --git a/tools/git2graph/tests/repo_helper.bash b/tools/git2graph/tests/repo_helper.bash index 3be800d..0d2c5dc 100644 --- a/tools/git2graph/tests/repo_helper.bash +++ b/tools/git2graph/tests/repo_helper.bash @@ -1,39 +1,41 @@ DATA_DIR="${BATS_TEST_DIRNAME}/data" TEST_REPO_TGZ="${DATA_DIR}/sample-repo.tgz" setup () { TEST_TMPDIR=$(mktemp -td swh-graph-test.XXXXXXXXXX) (cd "$TEST_TMPDIR" ; tar xaf "$TEST_REPO_TGZ") TEST_REPO_DIR="${TEST_TMPDIR}/sample-repo" } teardown () { rm -rf "$TEST_TMPDIR" } # Invoke git2graph (SUT) on the given repo_dir and store its results in the CSV # files nodes.csv and edges.csv located under the given dest_dir. run_git2graph () { repo_dir="$1" dest_dir="$2" + shift 2 + nodes_file="${dest_dir}/nodes.csv" edges_file="${dest_dir}/edges.csv" if [ ! -d "$dest_dir" ] ; then mkdir -p "$dest_dir" fi - ./git2graph -n >(sort > "$nodes_file") -e >(sort > "$edges_file") "$repo_dir" + ./git2graph "$@" -n >(sort > "$nodes_file") -e >(sort > "$edges_file") "$repo_dir" } # Ensure that two graphs, each specified as a dir that should contain a pair of # sorted, textual files called nodes.csv and edges.csv. Comparison is done # using diff. assert_equal_graphs () { dir_1="$1" dir_2="$2" diff "${dir_1}/nodes.csv" "${dir_2}/nodes.csv" && diff "${dir_1}/edges.csv" "${dir_2}/edges.csv" }