Reputation: 117
data1 = { 'node1': [1,1,1,2],
'node2': [2,3,5,4],
'weight': [1,1,1,1], }
df1 = pd.DataFrame(data1, columns = ['node1','node2','weight'])
I want to create an adjacency matrix from pandas dataframe.The dataframe has edgelist of the undirected graph
OUTPUT:
0 1 1 0 1
1 0 0 1 0
1 0 0 0 0
0 1 0 0 0
1 0 0 0 0
My code:
def adjmat():
print 'begun creating adjen mat'
data = sc.loadtxt('training.csv', dtype='str', delimiter=',',skiprows=1)
data = sc.transpose(data)
row1 = data[1].astype(int)
row2 = data[2].astype(int)
weight=data[3].astype(int)
n=0
n1=0
n2=0
n1=max(row1)
n2=max(row2)
if n1>n2:
Amat=sc.zeros((n1,n1))
#matrix=sc.zeros((n1,n1))
n=n1
else:
Amat=sc.zeros((n2,n2))
#matrix=sc.zeros((n2,n2))
n=n2
for i in range(0,len(row1)):
row=row1[i]
col=row2[i]
Amat[row-1][col-1]=weight[i]
i_lower = np.tril_indices(n, -1)
Amat[i_lower] = Amat.T[i_lower]
return Amat
I am looking for code which will be scalable.right now I am deaing with dataset which has 100,000 nodes and this code is not able to handle such large dataset.
Upvotes: 2
Views: 2820
Reputation: 2365
Using networkx.....
data1 = { 'node1': [1,1,1,2],
'node2': [2,3,5,4],
'weight': [1,1,1,1], }
df1 = pd.DataFrame(data1, columns = ['node1','node2','weight'])
G=nx.from_pandas_dataframe(df1,'node1','node2','weight')
Adjtraining = nx.adjacency_matrix(G)
print Adjtraining.todense()
output
[[0 1 1 0 1]
[1 0 0 1 0]
[1 0 0 0 0]
[0 1 0 0 0]
[1 0 0 0 0]]
Upvotes: 1