Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9125311
ConnectedComponents.java
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
8 KB
Subscribers
None
ConnectedComponents.java
View Options
/*
* Copyright (c) 2020 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU General Public License version 3, or any later version
* See top-level LICENSE file for more information
*/
package
org.softwareheritage.graph.experiments.topology
;
import
com.google.common.primitives.Longs
;
import
com.martiansoftware.jsap.*
;
import
it.unimi.dsi.big.webgraph.LazyLongIterator
;
import
it.unimi.dsi.bits.LongArrayBitVector
;
import
it.unimi.dsi.fastutil.Arrays
;
import
it.unimi.dsi.io.ByteDiskQueue
;
import
it.unimi.dsi.logging.ProgressLogger
;
import
org.softwareheritage.graph.*
;
import
java.io.File
;
import
java.io.FileWriter
;
import
java.io.IOException
;
import
java.io.PrintWriter
;
import
java.util.*
;
public
class
ConnectedComponents
{
private
Subgraph
graph
;
private
void
load_graph
(
String
graphBasename
,
String
nodeTypes
)
throws
IOException
{
System
.
err
.
println
(
"Loading graph "
+
graphBasename
+
" ..."
);
var
underlyingGraph
=
SwhBidirectionalGraph
.
loadMapped
(
graphBasename
);
var
underlyingGraphSym
=
underlyingGraph
.
symmetrize
();
graph
=
new
Subgraph
(
underlyingGraphSym
,
new
AllowedNodes
(
nodeTypes
));
System
.
err
.
println
(
"Graph loaded."
);
}
private
static
JSAPResult
parse_args
(
String
[]
args
)
{
JSAPResult
config
=
null
;
try
{
SimpleJSAP
jsap
=
new
SimpleJSAP
(
ConnectedComponents
.
class
.
getName
(),
""
,
new
Parameter
[]{
new
FlaggedOption
(
"graphPath"
,
JSAP
.
STRING_PARSER
,
JSAP
.
NO_DEFAULT
,
JSAP
.
REQUIRED
,
'g'
,
"graph"
,
"Basename of the compressed graph"
),
new
FlaggedOption
(
"outdir"
,
JSAP
.
STRING_PARSER
,
JSAP
.
NO_DEFAULT
,
JSAP
.
REQUIRED
,
'o'
,
"outdir"
,
"Directory where to put the results"
),
new
Switch
(
"byOrigins"
,
JSAP
.
NO_SHORTFLAG
,
"by-origins"
,
"Compute size of components by number of origins"
),
new
FlaggedOption
(
"nodeTypes"
,
JSAP
.
STRING_PARSER
,
JSAP
.
NO_DEFAULT
,
JSAP
.
REQUIRED
,
'n'
,
"nodetypes"
,
"Allowed node types (comma-separated)"
),});
config
=
jsap
.
parse
(
args
);
if
(
jsap
.
messagePrinted
())
{
System
.
exit
(
1
);
}
}
catch
(
JSAPException
e
)
{
e
.
printStackTrace
();
}
return
config
;
}
private
HashMap
<
Long
,
Long
>
/* ArrayList<ArrayList<Long>> */
compute
(
ProgressLogger
pl
,
boolean
byOrigin
)
throws
IOException
{
final
long
n
=
graph
.
numNodes
();
final
long
maxN
=
graph
.
maxNodeNumber
();
// Allow enough memory to behave like in-memory queue
int
bufferSize
=
(
int
)
Math
.
min
(
Arrays
.
MAX_ARRAY_SIZE
&
~
0x7
,
8L
*
maxN
);
// Use a disk based queue to store BFS frontier
final
File
queueFile
=
File
.
createTempFile
(
ConnectedComponents
.
class
.
getSimpleName
(),
"queue"
);
final
ByteDiskQueue
queue
=
ByteDiskQueue
.
createNew
(
queueFile
,
bufferSize
,
true
);
final
byte
[]
byteBuf
=
new
byte
[
Long
.
BYTES
];
// WARNING: no 64-bit version of this data-structure, but it can support
// indices up to 2^37
LongArrayBitVector
visited
=
LongArrayBitVector
.
ofLength
(
maxN
);
pl
.
expectedUpdates
=
n
;
pl
.
itemsName
=
"nodes"
;
pl
.
start
(
"Starting connected components visit..."
);
// ArrayList<ArrayList<Long>> components = new ArrayList<>();
HashMap
<
Long
,
Long
>
componentDistribution
=
new
HashMap
<>();
var
it
=
graph
.
nodeIterator
();
while
(
it
.
hasNext
())
{
long
i
=
it
.
nextLong
();
if
(
visited
.
getBoolean
(
i
))
continue
;
// ArrayList<Long> component = new ArrayList<>();
long
componentNodes
=
0
;
queue
.
enqueue
(
Longs
.
toByteArray
(
i
));
visited
.
set
(
i
);
while
(!
queue
.
isEmpty
())
{
queue
.
dequeue
(
byteBuf
);
final
long
currentNode
=
Longs
.
fromByteArray
(
byteBuf
);
// component.add(currentNode);
if
(!
byOrigin
||
graph
.
getNodeType
(
currentNode
)
==
SwhType
.
ORI
)
componentNodes
+=
1
;
final
LazyLongIterator
iterator
=
graph
.
successors
(
currentNode
);
long
succ
;
while
((
succ
=
iterator
.
nextLong
())
!=
-
1
)
{
if
(
visited
.
getBoolean
(
succ
))
continue
;
visited
.
set
(
succ
);
queue
.
enqueue
(
Longs
.
toByteArray
(
succ
));
}
pl
.
update
();
}
/*
* if (component.size() > 0) { components.add(component); }
*/
if
(
componentNodes
>
0
)
componentDistribution
.
merge
(
componentNodes
,
1L
,
Long:
:
sum
);
}
pl
.
done
();
// return components;
return
componentDistribution
;
}
private
static
void
printDistribution
(
ArrayList
<
ArrayList
<
Long
>>
components
,
Formatter
out
)
{
TreeMap
<
Long
,
Long
>
distribution
=
new
TreeMap
<>();
for
(
ArrayList
<
Long
>
component
:
components
)
{
distribution
.
merge
((
long
)
component
.
size
(),
1L
,
Long:
:
sum
);
}
for
(
Map
.
Entry
<
Long
,
Long
>
entry
:
distribution
.
entrySet
())
{
out
.
format
(
"%d %d\n"
,
entry
.
getKey
(),
entry
.
getValue
());
}
out
.
close
();
}
private
static
void
printLargestComponent
(
ArrayList
<
ArrayList
<
Long
>>
components
,
Formatter
out
)
{
int
indexLargest
=
0
;
for
(
int
i
=
1
;
i
<
components
.
size
();
++
i
)
{
if
(
components
.
get
(
i
).
size
()
>
components
.
get
(
indexLargest
).
size
())
indexLargest
=
i
;
}
ArrayList
<
Long
>
component
=
components
.
get
(
indexLargest
);
for
(
Long
node
:
component
)
{
out
.
format
(
"%d\n"
,
node
);
}
out
.
close
();
}
private
static
void
printAllComponents
(
ArrayList
<
ArrayList
<
Long
>>
components
,
Formatter
out
)
{
for
(
int
i
=
1
;
i
<
components
.
size
();
++
i
)
{
ArrayList
<
Long
>
component
=
components
.
get
(
i
);
for
(
Long
node
:
component
)
{
out
.
format
(
"%d "
,
node
);
}
out
.
format
(
"\n"
);
}
out
.
close
();
}
public
static
void
main
(
String
[]
args
)
{
JSAPResult
config
=
parse_args
(
args
);
String
graphPath
=
config
.
getString
(
"graphPath"
);
String
outdirPath
=
config
.
getString
(
"outdir"
);
String
nodeTypes
=
config
.
getString
(
"nodeTypes"
);
boolean
byOrigin
=
config
.
getBoolean
(
"byOrigins"
);
ConnectedComponents
connectedComponents
=
new
ConnectedComponents
();
try
{
connectedComponents
.
load_graph
(
graphPath
,
nodeTypes
);
}
catch
(
IOException
e
)
{
System
.
out
.
println
(
"Could not load graph: "
+
e
);
System
.
exit
(
2
);
}
ProgressLogger
logger
=
new
ProgressLogger
();
// noinspection ResultOfMethodCallIgnored
new
File
(
outdirPath
).
mkdirs
();
try
{
// ArrayList<ArrayList<Long>> components = connectedComponents.compute(logger);
// components.sort(Comparator.comparing(ArrayList<Long>::size).reversed());
// printDistribution(components, new Formatter(outdirPath + "/distribution.txt"));
// printLargestComponent(components, new Formatter(outdirPath + "/largest_component.txt"));
// printAllComponents(components, new Formatter(outdirPath + "/all_components.txt"));
HashMap
<
Long
,
Long
>
componentDistribution
=
connectedComponents
.
compute
(
logger
,
byOrigin
);
PrintWriter
f
=
new
PrintWriter
(
new
FileWriter
(
outdirPath
+
"/distribution.txt"
));
TreeMap
<
Long
,
Long
>
sortedDistribution
=
new
TreeMap
<>(
componentDistribution
);
for
(
Map
.
Entry
<
Long
,
Long
>
entry
:
sortedDistribution
.
entrySet
())
{
f
.
println
(
entry
.
getKey
()
+
" "
+
entry
.
getValue
());
}
f
.
close
();
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
}
logger
.
done
();
}
}
File Metadata
Details
Attached
Mime Type
text/x-c
Expires
Sat, Jun 21, 8:28 PM (3 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3363202
Attached To
rDGRPH Compressed graph representation
Event Timeline
Log In to Comment