Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9348485
ClusteringCoefficient.java
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
12 KB
Subscribers
None
ClusteringCoefficient.java
View Options
package
org.softwareheritage.graph.experiments.topology
;
import
com.martiansoftware.jsap.*
;
import
it.unimi.dsi.Util
;
import
it.unimi.dsi.big.webgraph.ImmutableGraph
;
import
it.unimi.dsi.big.webgraph.LazyLongIterator
;
import
it.unimi.dsi.fastutil.BigArrays
;
import
it.unimi.dsi.fastutil.longs.LongBigArrays
;
import
it.unimi.dsi.logging.ProgressLogger
;
import
it.unimi.dsi.util.XoRoShiRo128PlusRandom
;
import
org.softwareheritage.graph.Graph
;
import
org.softwareheritage.graph.Node
;
import
java.io.*
;
import
java.util.*
;
import
java.util.concurrent.*
;
public
class
ClusteringCoefficient
{
private
final
Graph
graph
;
private
final
String
outdirPath
;
private
final
ConcurrentHashMap
<
Long
,
Long
>
result_full
;
private
final
ConcurrentHashMap
<
Long
,
Long
>
result_dircnt
;
private
final
ConcurrentHashMap
<
Long
,
Long
>
result_rev
;
private
final
ConcurrentHashMap
<
Long
,
Long
>
result_revrel
;
private
final
ConcurrentHashMap
<
Long
,
Long
>
result_orisnp
;
public
ClusteringCoefficient
(
String
graphBasename
,
String
outdirPath
)
throws
IOException
{
this
.
outdirPath
=
outdirPath
;
System
.
err
.
println
(
"Loading graph "
+
graphBasename
+
" ..."
);
Graph
directedGraph
=
Graph
.
loadMapped
(
graphBasename
);
this
.
graph
=
directedGraph
.
symmetrize
();
System
.
err
.
println
(
"Graph loaded."
);
result_full
=
new
ConcurrentHashMap
<>();
result_dircnt
=
new
ConcurrentHashMap
<>();
result_rev
=
new
ConcurrentHashMap
<>();
result_revrel
=
new
ConcurrentHashMap
<>();
result_orisnp
=
new
ConcurrentHashMap
<>();
}
private
static
JSAPResult
parse_args
(
String
[]
args
)
{
JSAPResult
config
=
null
;
try
{
SimpleJSAP
jsap
=
new
SimpleJSAP
(
AveragePaths
.
class
.
getName
(),
""
,
new
Parameter
[]{
new
FlaggedOption
(
"graphPath"
,
JSAP
.
STRING_PARSER
,
JSAP
.
NO_DEFAULT
,
JSAP
.
REQUIRED
,
'g'
,
"graph"
,
"Basename of the compressed graph"
),
new
FlaggedOption
(
"outdir"
,
JSAP
.
STRING_PARSER
,
JSAP
.
NO_DEFAULT
,
JSAP
.
REQUIRED
,
'o'
,
"outdir"
,
"Directory where to put the results"
),
new
FlaggedOption
(
"numThreads"
,
JSAP
.
INTEGER_PARSER
,
"32"
,
JSAP
.
NOT_REQUIRED
,
't'
,
"numthreads"
,
"Number of threads"
),});
config
=
jsap
.
parse
(
args
);
if
(
jsap
.
messagePrinted
())
{
System
.
exit
(
1
);
}
}
catch
(
JSAPException
e
)
{
e
.
printStackTrace
();
}
return
config
;
}
private
void
run
(
int
numThreads
)
throws
InterruptedException
{
final
long
END_OF_QUEUE
=
-
1L
;
ArrayBlockingQueue
<
Long
>
queue
=
new
ArrayBlockingQueue
<>(
numThreads
);
ExecutorService
service
=
Executors
.
newFixedThreadPool
(
numThreads
+
1
);
service
.
submit
(()
->
{
try
{
Graph
thread_graph
=
graph
.
copy
();
long
[][]
randomPerm
=
Util
.
identity
(
thread_graph
.
numNodes
());
LongBigArrays
.
shuffle
(
randomPerm
,
new
XoRoShiRo128PlusRandom
());
long
n
=
thread_graph
.
numNodes
();
ProgressLogger
pl
=
new
ProgressLogger
();
pl
.
expectedUpdates
=
n
;
pl
.
itemsName
=
"nodes"
;
pl
.
start
(
"Filling processor queue..."
);
for
(
long
j
=
0
;
j
<
n
;
++
j
)
{
long
node
=
BigArrays
.
get
(
randomPerm
,
j
);
queue
.
put
(
node
);
if
(
j
%
10000
==
0
)
{
printResult
();
}
pl
.
update
();
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
finally
{
for
(
int
i
=
0
;
i
<
numThreads
;
++
i
)
{
try
{
queue
.
put
(
END_OF_QUEUE
);
}
catch
(
InterruptedException
e
)
{
e
.
printStackTrace
();
}
}
}
});
for
(
int
i
=
0
;
i
<
numThreads
;
++
i
)
{
service
.
submit
(()
->
{
try
{
Graph
thread_graph
=
graph
.
copy
();
while
(
true
)
{
Long
node
=
null
;
try
{
node
=
queue
.
take
();
}
catch
(
InterruptedException
e
)
{
e
.
printStackTrace
();
}
if
(
node
==
null
||
node
==
END_OF_QUEUE
)
{
return
;
}
computeAt
(
thread_graph
,
node
);
}
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
}
});
}
service
.
shutdown
();
service
.
awaitTermination
(
365
,
TimeUnit
.
DAYS
);
}
private
void
computeAt
(
Graph
graph
,
long
node
)
{
long
d
=
graph
.
outdegree
(
node
);
if
(
d
<
2
)
{
return
;
}
Node
.
Type
nodeType
=
graph
.
getNodeType
(
node
);
HashSet
<
Long
>
neighborhood
=
new
HashSet
<>();
long
succ
;
final
LazyLongIterator
iterator
=
graph
.
successors
(
node
);
while
((
succ
=
iterator
.
nextLong
())
!=
-
1
)
{
neighborhood
.
add
(
succ
);
}
long
triangles_full
=
0
;
long
triangles_dircnt
=
0
;
long
triangles_rev
=
0
;
long
triangles_revrel
=
0
;
long
triangles_orisnp
=
0
;
for
(
Long
neighbor
:
neighborhood
)
{
Node
.
Type
neighborNodeType
=
graph
.
getNodeType
(
neighbor
);
final
LazyLongIterator
it
=
graph
.
successors
(
neighbor
);
while
((
succ
=
it
.
nextLong
())
!=
-
1
)
{
if
(
neighborhood
.
contains
(
succ
))
{
Node
.
Type
succNodeType
=
graph
.
getNodeType
(
succ
);
triangles_full
++;
if
((
nodeType
==
Node
.
Type
.
DIR
||
nodeType
==
Node
.
Type
.
CNT
)
&&
(
neighborNodeType
==
Node
.
Type
.
DIR
||
neighborNodeType
==
Node
.
Type
.
CNT
)
&&
(
succNodeType
==
Node
.
Type
.
DIR
||
succNodeType
==
Node
.
Type
.
CNT
))
{
triangles_dircnt
++;
}
else
if
((
nodeType
==
Node
.
Type
.
REV
||
nodeType
==
Node
.
Type
.
REL
)
&&
(
neighborNodeType
==
Node
.
Type
.
REV
||
neighborNodeType
==
Node
.
Type
.
REL
)
&&
(
succNodeType
==
Node
.
Type
.
REV
||
succNodeType
==
Node
.
Type
.
REL
))
{
triangles_revrel
++;
if
(
nodeType
==
Node
.
Type
.
REV
&&
neighborNodeType
==
Node
.
Type
.
REV
&&
succNodeType
==
Node
.
Type
.
REV
)
triangles_rev
++;
}
else
if
((
nodeType
==
Node
.
Type
.
ORI
||
nodeType
==
Node
.
Type
.
SNP
)
&&
(
neighborNodeType
==
Node
.
Type
.
ORI
||
neighborNodeType
==
Node
.
Type
.
SNP
)
&&
(
succNodeType
==
Node
.
Type
.
ORI
||
succNodeType
==
Node
.
Type
.
SNP
))
{
triangles_orisnp
++;
}
}
}
}
result_full
.
merge
(
triangles_full
,
1L
,
Long:
:
sum
);
result_dircnt
.
merge
(
triangles_dircnt
,
1L
,
Long:
:
sum
);
result_rev
.
merge
(
triangles_rev
,
1L
,
Long:
:
sum
);
result_revrel
.
merge
(
triangles_revrel
,
1L
,
Long:
:
sum
);
result_orisnp
.
merge
(
triangles_orisnp
,
1L
,
Long:
:
sum
);
}
public
void
printSortedDistribution
(
String
distribPath
,
Map
<
Long
,
Long
>
distrib
)
throws
IOException
{
PrintWriter
f
=
new
PrintWriter
(
new
FileWriter
(
distribPath
));
TreeMap
<
Long
,
Long
>
sortedDistribution
=
new
TreeMap
<>(
distrib
);
for
(
Map
.
Entry
<
Long
,
Long
>
entry
:
sortedDistribution
.
entrySet
())
{
f
.
println
(
entry
.
getKey
()
+
" "
+
entry
.
getValue
());
}
f
.
close
();
}
public
void
printResult
()
throws
IOException
{
new
File
(
outdirPath
).
mkdirs
();
printSortedDistribution
(
outdirPath
+
"/distribution-full.txt"
,
result_full
);
printSortedDistribution
(
outdirPath
+
"/distribution-dircnt.txt"
,
result_dircnt
);
printSortedDistribution
(
outdirPath
+
"/distribution-rev.txt"
,
result_rev
);
printSortedDistribution
(
outdirPath
+
"/distribution-relrev.txt"
,
result_revrel
);
printSortedDistribution
(
outdirPath
+
"/distribution-orisnp.txt"
,
result_orisnp
);
}
public
static
void
main
(
String
[]
args
)
throws
IOException
,
InterruptedException
{
JSAPResult
config
=
parse_args
(
args
);
String
graphPath
=
config
.
getString
(
"graphPath"
);
String
outdir
=
config
.
getString
(
"outdir"
);
int
numThreads
=
config
.
getInt
(
"numThreads"
);
ClusteringCoefficient
cc
=
new
ClusteringCoefficient
(
graphPath
,
outdir
);
cc
.
run
(
numThreads
);
cc
.
printResult
();
}
// Old unused functions
private
long
oppositeEdges
(
ImmutableGraph
graph
,
long
node
)
{
HashSet
<
Long
>
neighborhood
=
new
HashSet
<>();
long
succ
;
final
LazyLongIterator
iterator
=
graph
.
successors
(
node
);
while
((
succ
=
iterator
.
nextLong
())
!=
-
1
)
{
neighborhood
.
add
(
succ
);
}
long
closed_triplets
=
0
;
for
(
Long
neighbor
:
neighborhood
)
{
final
LazyLongIterator
it
=
graph
.
successors
(
neighbor
);
while
((
succ
=
it
.
nextLong
())
!=
-
1
)
{
if
(
neighborhood
.
contains
(
succ
))
{
closed_triplets
++;
}
}
}
return
closed_triplets
;
}
public
void
compute
(
ProgressLogger
pl
,
Formatter
out_local
,
Formatter
out_global
)
{
final
long
n
=
this
.
graph
.
numNodes
();
pl
.
expectedUpdates
=
n
;
pl
.
itemsName
=
"nodes"
;
long
nodes_d2
=
0
;
double
cum_lcc
=
0
;
double
cum_lcc_c0
=
0
;
double
cum_lcc_c1
=
0
;
HashMap
<
Double
,
Long
>
distribution
=
new
HashMap
<>();
for
(
long
node
=
0
;
node
<
n
;
node
++)
{
long
d
=
graph
.
outdegree
(
node
);
if
(
d
>=
2
)
{
double
t
=
(
d
*
(
d
-
1
));
double
m
=
oppositeEdges
(
graph
,
node
);
double
lcc
=
m
/
t
;
distribution
.
merge
(
lcc
,
1L
,
Long:
:
sum
);
cum_lcc
+=
lcc
;
nodes_d2
++;
}
else
{
cum_lcc_c1
++;
}
pl
.
update
();
}
pl
.
done
();
for
(
Map
.
Entry
<
Double
,
Long
>
entry
:
distribution
.
entrySet
())
{
out_local
.
format
(
"%f %d\n"
,
entry
.
getKey
(),
entry
.
getValue
());
}
double
gC
=
cum_lcc
/
nodes_d2
;
double
gC0
=
cum_lcc_c0
/
n
;
double
gC1
=
cum_lcc_c1
/
n
;
out_global
.
format
(
"C: %f\n"
,
gC
);
out_global
.
format
(
"C0: %f\n"
,
gC0
);
out_global
.
format
(
"C1: %f\n"
,
gC1
);
}
public
void
compute_approx
(
Formatter
out_global
)
{
final
long
n
=
this
.
graph
.
numNodes
();
long
trials
=
0
;
long
triangles
=
0
;
while
(
true
)
{
long
node
=
ThreadLocalRandom
.
current
().
nextLong
(
0
,
n
);
long
d
=
graph
.
outdegree
(
node
);
if
(
d
>=
2
)
{
Long
u
=
null
;
Long
v
=
null
;
long
u_idx
=
ThreadLocalRandom
.
current
().
nextLong
(
0
,
d
);
long
v_idx
=
ThreadLocalRandom
.
current
().
nextLong
(
0
,
d
-
1
);
if
(
v_idx
>=
u_idx
)
{
v_idx
++;
}
long
succ
;
final
LazyLongIterator
node_iterator
=
graph
.
successors
(
node
);
for
(
long
succ_idx
=
0
;
(
succ
=
node_iterator
.
nextLong
())
!=
-
1
;
succ_idx
++)
{
if
(
succ_idx
==
u_idx
)
{
u
=
succ
;
}
if
(
succ_idx
==
v_idx
)
{
v
=
succ
;
}
}
final
LazyLongIterator
u_iterator
=
graph
.
successors
(
u
);
while
((
succ
=
u_iterator
.
nextLong
())
!=
-
1
)
{
if
(
succ
==
v
)
triangles
++;
}
}
trials
++;
if
(
trials
%
100
==
0
||
true
)
{
double
gC
=
(
double
)
triangles
/
(
double
)
trials
;
out_global
.
format
(
"C: %f (triangles: %d, trials: %d)\n"
,
gC
,
triangles
,
trials
);
System
.
out
.
format
(
"C: %f (triangles: %d, trials: %d)\n"
,
gC
,
triangles
,
trials
);
}
}
}
}
File Metadata
Details
Attached
Mime Type
text/x-java
Expires
Jul 4 2025, 6:33 PM (5 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3287509
Attached To
rDGRPH Compressed graph representation
Event Timeline
Log In to Comment