Fixed database typo and removed unnecessary class identifier.

This commit is contained in:
Batuhan Berk Başoğlu 2020-10-14 10:10:37 -04:00
parent 00ad49a143
commit 45fb349a7d
5098 changed files with 952558 additions and 85 deletions

View file

@ -0,0 +1,2 @@
Algorithms
----------

View file

@ -0,0 +1,338 @@
# source target
1 2
1 10
2 1
2 10
3 7
4 7
4 209
5 132
6 150
7 3
7 4
7 9
8 106
8 115
9 1
9 2
9 7
10 1
10 2
11 133
11 218
12 88
13 214
14 24
14 52
16 10
16 19
17 64
17 78
18 55
18 103
18 163
19 18
20 64
20 180
21 16
21 22
22 21
22 64
22 106
23 20
23 22
23 64
24 14
24 31
24 122
27 115
28 29
29 28
30 19
31 24
31 32
31 122
31 147
31 233
32 31
32 86
34 35
34 37
35 34
35 43
36 132
36 187
37 38
37 90
37 282
38 42
38 43
38 210
40 20
42 15
42 38
43 34
43 35
43 38
45 107
46 61
46 72
48 23
49 30
49 64
49 108
49 115
49 243
50 30
50 47
50 55
50 125
50 163
52 218
52 224
54 111
54 210
55 65
55 67
55 105
55 108
55 222
56 18
56 64
57 65
57 125
58 20
58 30
58 50
58 103
58 180
59 164
63 125
64 8
64 50
64 70
64 256
66 20
66 84
66 106
66 125
67 22
67 50
67 113
68 50
70 50
70 64
71 72
74 29
74 75
74 215
75 74
75 215
76 58
76 104
77 103
78 64
78 68
80 207
80 210
82 8
82 77
82 83
82 97
82 163
83 82
83 226
83 243
84 29
84 154
87 101
87 189
89 90
90 89
90 94
91 86
92 19
92 30
92 106
94 72
94 89
94 90
95 30
96 75
96 256
97 80
97 128
98 86
100 86
101 87
103 77
103 104
104 58
104 77
104 103
106 22
107 38
107 114
107 122
108 49
108 55
111 121
111 128
111 210
113 253
114 107
116 30
116 140
118 129
118 138
120 88
121 128
122 31
123 32
124 244
125 132
126 163
126 180
128 38
128 111
129 118
132 29
132 30
133 30
134 135
134 150
135 134
137 144
138 118
138 129
139 142
141 157
141 163
142 139
143 2
144 137
145 151
146 137
146 165
146 169
146 171
147 31
147 128
148 146
148 169
148 171
148 282
149 128
149 148
149 172
150 86
151 145
152 4
153 134
154 155
156 161
157 141
161 156
165 144
165 148
167 149
169 15
169 148
169 171
170 115
170 173
170 183
170 202
171 72
171 148
171 169
173 170
175 100
176 10
178 181
181 178
182 38
182 171
183 96
185 50
186 127
187 50
187 65
188 30
188 50
189 87
189 89
190 35
190 38
190 122
190 182
191 54
191 118
191 129
191 172
192 149
192 167
195 75
197 50
197 188
198 218
198 221
198 222
200 65
200 220
201 113
202 156
203 232
204 194
207 38
207 122
207 124
208 30
208 50
210 38
210 207
211 37
213 35
213 38
214 13
214 14
214 171
214 213
215 75
217 39
218 68
218 222
221 198
222 198
222 218
223 39
225 3
226 22
229 65
230 68
231 43
232 95
232 203
233 99
234 68
234 230
237 244
238 145
242 3
242 113
244 237
249 96
250 156
252 65
254 65
258 113
268 4
270 183
272 6
275 96
280 183
280 206
282 37
285 75
290 285
293 290

View file

@ -0,0 +1,109 @@
"""
===========
Beam Search
===========
Beam search with dynamic beam width.
The progressive widening beam search repeatedly executes a beam search
with increasing beam width until the target node is found.
"""
import math
import matplotlib.pyplot as plt
import networkx as nx
def progressive_widening_search(G, source, value, condition, initial_width=1):
"""Progressive widening beam search to find a node.
The progressive widening beam search involves a repeated beam
search, starting with a small beam width then extending to
progressively larger beam widths if the target node is not
found. This implementation simply returns the first node found that
matches the termination condition.
`G` is a NetworkX graph.
`source` is a node in the graph. The search for the node of interest
begins here and extends only to those nodes in the (weakly)
connected component of this node.
`value` is a function that returns a real number indicating how good
a potential neighbor node is when deciding which neighbor nodes to
enqueue in the breadth-first search. Only the best nodes within the
current beam width will be enqueued at each step.
`condition` is the termination condition for the search. This is a
function that takes a node as input and return a Boolean indicating
whether the node is the target. If no node matches the termination
condition, this function raises :exc:`NodeNotFound`.
`initial_width` is the starting beam width for the beam search (the
default is one). If no node matching the `condition` is found with
this beam width, the beam search is restarted from the `source` node
with a beam width that is twice as large (so the beam width
increases exponentially). The search terminates after the beam width
exceeds the number of nodes in the graph.
"""
# Check for the special case in which the source node satisfies the
# termination condition.
if condition(source):
return source
# The largest possible value of `i` in this range yields a width at
# least the number of nodes in the graph, so the final invocation of
# `bfs_beam_edges` is equivalent to a plain old breadth-first
# search. Therefore, all nodes will eventually be visited.
log_m = math.ceil(math.log2(len(G)))
for i in range(log_m):
width = initial_width * pow(2, i)
# Since we are always starting from the same source node, this
# search may visit the same nodes many times (depending on the
# implementation of the `value` function).
for u, v in nx.bfs_beam_edges(G, source, value, width):
if condition(v):
return v
# At this point, since all nodes have been visited, we know that
# none of the nodes satisfied the termination condition.
raise nx.NodeNotFound("no node satisfied the termination condition")
###############################################################################
# Search for a node with high centrality.
# ---------------------------------------
#
# We generate a random graph, compute the centrality of each node, then perform
# the progressive widening search in order to find a node of high centrality.
G = nx.gnp_random_graph(100, 0.5)
centrality = nx.eigenvector_centrality(G)
avg_centrality = sum(centrality.values()) / len(G)
def has_high_centrality(v):
return centrality[v] >= avg_centrality
source = 0
value = centrality.get
condition = has_high_centrality
found_node = progressive_widening_search(G, source, value, condition)
c = centrality[found_node]
print(f"found node {found_node} with centrality {c}")
# Draw graph
pos = nx.spring_layout(G)
options = {
"node_color": "blue",
"node_size": 20,
"edge_color": "grey",
"linewidths": 0,
"width": 0.1,
}
nx.draw(G, pos, **options)
# Draw node with high centrality as large and red
nx.draw_networkx_nodes(G, pos, nodelist=[found_node], node_size=100, node_color="r")
plt.show()

View file

@ -0,0 +1,79 @@
"""
==========
Blockmodel
==========
Example of creating a block model using the quotient_graph function in NX. Data
used is the Hartford, CT drug users network::
@article{weeks2002social,
title={Social networks of drug users in high-risk sites: Finding the connections},
url = {https://doi.org/10.1023/A:1015457400897},
doi = {10.1023/A:1015457400897},
author={Weeks, Margaret R and Clair, Scott and Borgatti, Stephen P and Radda, Kim and Schensul, Jean J},
journal={{AIDS and Behavior}},
volume={6},
number={2},
pages={193--206},
year={2002},
publisher={Springer}
}
"""
from collections import defaultdict
import matplotlib.pyplot as plt
import networkx as nx
import numpy
from scipy.cluster import hierarchy
from scipy.spatial import distance
def create_hc(G):
"""Creates hierarchical cluster of graph G from distance matrix"""
path_length = nx.all_pairs_shortest_path_length(G)
distances = numpy.zeros((len(G), len(G)))
for u, p in path_length:
for v, d in p.items():
distances[u][v] = d
# Create hierarchical cluster
Y = distance.squareform(distances)
Z = hierarchy.complete(Y) # Creates HC using farthest point linkage
# This partition selection is arbitrary, for illustrive purposes
membership = list(hierarchy.fcluster(Z, t=1.15))
# Create collection of lists for blockmodel
partition = defaultdict(list)
for n, p in zip(list(range(len(G))), membership):
partition[p].append(n)
return list(partition.values())
G = nx.read_edgelist("hartford_drug.edgelist")
# Extract largest connected component into graph H
H = G.subgraph(next(nx.connected_components(G)))
# Makes life easier to have consecutively labeled integer nodes
H = nx.convert_node_labels_to_integers(H)
# Create parititions with hierarchical clustering
partitions = create_hc(H)
# Build blockmodel graph
BM = nx.quotient_graph(H, partitions, relabel=True)
# Draw original graph
pos = nx.spring_layout(H, iterations=100)
plt.subplot(211)
nx.draw(H, pos, with_labels=False, node_size=10)
# Draw block model with weighted edges and nodes sized by number of internal nodes
node_size = [BM.nodes[x]["nnodes"] * 10 for x in BM.nodes()]
edge_width = [(2 * d["weight"]) for (u, v, d) in BM.edges(data=True)]
# Set positions to mean of positions of internal nodes from original graph
posBM = {}
for n in BM:
xy = numpy.array([pos[u] for u in BM.nodes[n]["graph"]])
posBM[n] = xy.mean(axis=0)
plt.subplot(212)
nx.draw(BM, posBM, node_size=node_size, width=edge_width, with_labels=False)
plt.axis("off")
plt.show()

View file

@ -0,0 +1,42 @@
"""
==========
Davis Club
==========
Davis Southern Club Women
Shows how to make unipartite projections of the graph and compute the
properties of those graphs.
These data were collected by Davis et al. in the 1930s.
They represent observed attendance at 14 social events by 18 Southern women.
The graph is bipartite (clubs, women).
"""
import matplotlib.pyplot as plt
import networkx as nx
import networkx.algorithms.bipartite as bipartite
G = nx.davis_southern_women_graph()
women = G.graph["top"]
clubs = G.graph["bottom"]
print("Biadjacency matrix")
print(bipartite.biadjacency_matrix(G, women, clubs))
# project bipartite graph onto women nodes
W = bipartite.projected_graph(G, women)
print()
print("#Friends, Member")
for w in women:
print(f"{W.degree(w)} {w}")
# project bipartite graph onto women nodes keeping number of co-occurence
# the degree computed is weighted and counts the total number of shared contacts
W = bipartite.weighted_projected_graph(G, women)
print()
print("#Friend meetings, Member")
for w in women:
print(f"{W.degree(w, weight='weight')} {w}")
nx.draw(G)
plt.show()

View file

@ -0,0 +1,40 @@
"""
=============
Decomposition
=============
Example of creating a junction tree from a directed graph.
"""
import networkx as nx
from networkx.algorithms import moral
from networkx.algorithms.tree.decomposition import junction_tree
from networkx.drawing.nx_agraph import graphviz_layout as layout
import matplotlib.pyplot as plt
B = nx.DiGraph()
B.add_nodes_from(["A", "B", "C", "D", "E", "F"])
B.add_edges_from(
[("A", "B"), ("A", "C"), ("B", "D"), ("B", "F"), ("C", "E"), ("E", "F")]
)
options = {"with_labels": True, "node_color": "white", "edgecolors": "blue"}
bayes_pos = layout(B, prog="neato")
ax1 = plt.subplot(1, 3, 1)
plt.title("Bayesian Network")
nx.draw_networkx(B, pos=bayes_pos, **options)
mg = moral.moral_graph(B)
plt.subplot(1, 3, 2, sharex=ax1, sharey=ax1)
plt.title("Moralized Graph")
nx.draw_networkx(mg, pos=bayes_pos, **options)
jt = junction_tree(B)
plt.subplot(1, 3, 3)
plt.title("Junction Tree")
nsize = [2000 * len(n) for n in list(jt.nodes())]
nx.draw_networkx(jt, pos=layout(jt, prog="neato"), node_size=nsize, **options)
plt.tight_layout()
plt.show()

View file

@ -0,0 +1,30 @@
"""
=====================
Krackhardt Centrality
=====================
Centrality measures of Krackhardt social network.
"""
import matplotlib.pyplot as plt
import networkx as nx
G = nx.krackhardt_kite_graph()
print("Betweenness")
b = nx.betweenness_centrality(G)
for v in G.nodes():
print(f"{v:2} {b[v]:.3f}")
print("Degree centrality")
d = nx.degree_centrality(G)
for v in G.nodes():
print(f"{v:2} {d[v]:.3f}")
print("Closeness centrality")
c = nx.closeness_centrality(G)
for v in G.nodes():
print(f"{v:2} {c[v]:.3f}")
nx.draw(G)
plt.show()

View file

@ -0,0 +1,35 @@
"""
===
Rcm
===
Cuthill-McKee ordering of matrices
The reverse Cuthill-McKee algorithm gives a sparse matrix ordering that
reduces the matrix bandwidth.
"""
import networkx as nx
from networkx.utils import reverse_cuthill_mckee_ordering
import numpy as np
# build low-bandwidth numpy matrix
G = nx.grid_2d_graph(3, 3)
rcm = list(reverse_cuthill_mckee_ordering(G))
print("ordering", rcm)
print("unordered Laplacian matrix")
A = nx.laplacian_matrix(G)
x, y = np.nonzero(A)
# print(f"lower bandwidth: {(y - x).max()}")
# print(f"upper bandwidth: {(x - y).max()}")
print(f"bandwidth: {(y - x).max() + (x - y).max() + 1}")
print(A)
B = nx.laplacian_matrix(G, nodelist=rcm)
print("low-bandwidth Laplacian matrix")
x, y = np.nonzero(B)
# print(f"lower bandwidth: {(y - x).max()}")
# print(f"upper bandwidth: {(x - y).max()}")
print(f"bandwidth: {(y - x).max() + (x - y).max() + 1}")
print(B)