Reputation: 3635
I have a 2D grid of ones and zeros. Cluster is defined as nondiagonal set of neighboring ones. For example, if we look at a grid:
[[0 0 0 0 0]
[1 1 1 1 1]
[1 0 0 0 1]
[0 1 0 0 1]
[1 1 1 1 0]]
One cluster would be set of coordinates (actually I use lists for this, but its not important):
c1=[[1, 0], [1, 1], [1, 2], [1, 3], [1, 4], [2, 1], [2, 4], [3, 4]]
The other cluster in this grid is given by:
c2=[[3,1], [4, 0], [4, 1], [4, 2], [4, 3]]
Now, I have made a method that for a given starting coordinate (if it's value is 1), returns a cluster to which that point belongs (for example, if I choose a [1,1] coordinate it would return c1).
For testing I'll choose a point (1, 1)
and a small grid. This is the output when result is good:
Number of recursions: 10
Length of cluster: 10
[[1 1 1 0 1]
[1 1 0 1 1]
[0 1 0 0 1]
[1 1 1 0 0]
[0 1 0 1 1]]
[[1 1 1 0 0]
[1 1 0 0 0]
[0 1 0 0 0]
[1 1 1 0 0]
[0 1 0 0 0]]
I was trying to get some idea how fast my algorithm is when cluster size is getting larger. If I run the program and then rerun it, and do that many times, it always gives the good result. If I use a loop, it starts giving wrong results. Here is one possible output test scenario:
Number of recursions: 10
Length of cluster: 10
[[1 1 1 0 1]
[1 1 0 1 1]
[0 1 0 0 1]
[1 1 1 0 0]
[0 1 0 1 1]]
[[1 1 1 0 0]
[1 1 0 0 0]
[0 1 0 0 0]
[1 1 1 0 0]
[0 1 0 0 0]]
Number of recursions: 8
Length of cluster: 8
[[0 1 1 1 0]
[1 1 1 0 0]
[1 0 0 0 0]
[1 1 1 0 1]
[1 1 0 0 0]]
[[0 0 0 0 0] - the first one is always good, this one already has an error
[1 1 0 0 0]
[1 0 0 0 0]
[1 1 1 0 0]
[1 1 0 0 0]]
Number of recursions: 1
Length of cluster: 1
[[1 1 1 1 1]
[0 1 0 1 0]
[0 1 0 0 0]
[0 1 0 0 0]
[0 1 1 0 1]]
[[0 0 0 0 0] - till end
[0 1 0 0 0]
[0 0 0 0 0]
[0 0 0 0 0]
[0 0 0 0 0]]
Number of recursions: 1
Length of cluster: 1
[[1 1 1 1 1]
[0 1 1 0 0]
[1 0 1 1 1]
[1 1 0 1 0]
[0 1 1 1 0]]
[[0 0 0 0 0]
[0 1 0 0 0]
[0 0 0 0 0]
[0 0 0 0 0]
[0 0 0 0 0]]
... till end
I will give the code for loop (it's no problem giving you all code, but it's too big, and the error is probably due to something I do inside a loop):
import numpy as np
from time import time
def test(N, p, testTime, length):
assert N>0
x=1
y=1
a=PercolationGrid(N) #this is a class that creates a grid
a.useFixedProbability(p) #the probability that given point will be 1
a.grid[x,y]=1 #I put the starting point as 1 manually
cluster=Cluster(a)
t0=time()
cluster.getCluster(x,y) #this is what I'm testing how fast is it
t1=time()
stats=cluster.getStats() #get the length of cluster and some other data
testTime.append(t1-t0)
testTime.sort()
length.append(stats[1]) #[1] is the length stat that interests me
length.sort() #both sorts are so I can use plot later
print a.getGrid() #show whole grid
clusterGrid=np.zeros(N*N, dtype='int8').reshape(N, N) #create zero grid where I'll "put" the cluster of interest
c1=cluster.getClusterCoordinates() #this is recursive method (if it has any importance)
for xy in c1:
k=xy[0]
m=xy[1]
clusterGrid[k, m]=1
print clusterGrid
del a, cluster, clusterGrid
testTime=[]
length=[]
p=0.59
N=35
np.set_printoptions(threshold='nan') #so the output doesn't shrink
for i in range(10):
test(N, p, testTime, length)
I assume that I do something wrong with freeing memory or something (if it's not some trivial error in loop I can't see)? I use python 2.7.3 on 64bit Linux.
EDIT:
I'm aware that people here should not review whole codes, but specific problems, but I can't find what's happening, the only suggestion is that maybe I have some static variables, but it seems to me that that is not the case. So, if someone has good will and energy you can browse through a code and maybe you'll see something. I started using classes not while ago, so be prepared for lot of bad stuff.
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import time
class ProbabilityGrid(object):
"""
This class gives 2D quadratic array (a grid) which is filled with
float values from 0-1, which in many cases represent probabilities
"""
def __init__(self, size=2, dataType='float16'):
"""initialization of a grid with 0. values"""
assert size>1
assert dataType=='float64' or dataType=='float32' or dataType=='float16'
self.n=size
self.dataType=dataType
self.grid=np.zeros((size, size), dtype=dataType)
def getGrid(self):
"""returns a 2D probability array"""
return self.grid
def getSize(self):
"""returns a size of a 2D array"""
return self.size
def fillRandom(self):
"""fills the grid with uniformly random values from 0 to 1"""
n=self.n
self.grid=np.random.rand(n, n)
def fixedProbabilities(self, p):
"""fills the grid with fixed value from 0 to 1"""
assert p<1.0
self.grid=p*np.ones((self.n, self.n))
class PercolationGrid(object):
"""
percolation quadratic grid filled with 1 and 0, int8
which represent a state.
Percolation grid is closly connected to probabilies grid.
ProbabilityGrid gives the starting probabilities will the [i,j] spot
be filled or not. All functions change the PercolationGrid.grid when
ProbabilityGrid.grid changes, so in a way their values are connected
"""
def __init__(self, size=2, dataType='int8'):
"""
initialization of PercolationGrid, sets uniformly 0 and 1 to grid
"""
assert size>1
assert dataType=='int64' or dataType=='int32' or dataType=='int8'
self.n=size
self.dataType=dataType
self.grid=np.zeros((size, size), dtype=dataType)
self.pGrid=ProbabilityGrid(self.n)
self.pGrid.fillRandom()
self.useProbabilityGrid()
#def fillRandom(self, min=0, max=1, distribution='uniform'):
# n=self.n
# self.grid=np.random.random_integers(min, max, n*n).reshape(n, n)
def getGrid(self):
"""returns a 2D percolation array"""
return self.grid
def useProbabilityGrid(self): #use probability grid to get Percolation grid of 0s and 1es
"""
this method fills the PercolationGrid.grid according to probabilities
from Probability.grid
"""
comparisonGrid=np.random.rand(self.n, self.n)
self.grid=np.array(np.floor(self.pGrid.grid-comparisonGrid)+1, dtype=self.dataType)
# Here I used a trick. To simulate whether 1 will apear with probability p,
# we can use uniform random generator which returns values from 0 to 1. If
# the value<p then we get 1, if value>p it's 0.
# But instead looping over each element, it's much faster to make same sized
# grid of random, uniform values from 0 to 1, calculate the difference, add 1
# and use floor function which round everything larger than 1 to 1, and lower
# to 0. Then value-p+1 will give 0 if value<p, 1 if value>p. The result is
# converted to data type of percolation array.
def useFixedProbability(self, p):
"""
this method fills the PercolationGrid according to fixed probabilities
of being filled, for example, a large grid with parameter p set to 0.33
should, aproximatly have one third of places filed with ones and 2/3 with 0
"""
self.pGrid.fixedProbabilities(p)
self.useProbabilityGrid()
def probabilityCheck(self):
""" this method checks the number of ones vs number of elements,
good for checking if the filling of a grid was close to probability
we had in mind. Of course, the accuracy is larger as grid size grows.
For smaller grid sizes you can still check the probability by
running the test multiple times.
"""
sum=self.grid.sum()
print float(sum)/float(self.n*self.n)
#this works because values can only be 0 or 1, so the sum/size gives
#the ratio of ones vs size
def setGrid(self, grid):
shape=grid.shape
i,j=shape[0], shape[1]
assert i>1 and j>1
if i!=j:
print ("The grid needs to be NxN shape, N>1")
self.grid=grid
def setProbabilities(self, grid):
shape=grid.shape
i,j=shape[0], shape[1]
assert i>1 and j>1
if i!=j:
print ("The grid needs to be NxN shape, N>1")
self.pGrid.grid=grid
self.useProbabilityGrid()
def showPercolations(self):
fig1=plt.figure()
fig2=plt.figure()
ax1=fig1.add_subplot(111)
ax2=fig2.add_subplot(111)
myColors=[(1.0, 1.0, 1.0, 1.0), (1.0, 0.0, 0.0, 1.0)]
mycmap=mpl.colors.ListedColormap(myColors)
subplt1=ax1.matshow(self.pGrid.grid, cmap='jet')
cbar1=fig1.colorbar(subplt1)
subplt2=ax2.matshow(self.grid, cmap=mycmap)
cbar2=fig2.colorbar(subplt2, ticks=[0.25,0.75])
cbar2.ax.set_yticklabels(['None', 'Percolated'], rotation='vertical')
class Cluster(object):
"""This is a class of percolation clusters"""
def __init__(self, array):
self.grid=array.getGrid()
self.N=len(self.grid[0,])
self.cluster={}
self.numOfSteps=0
#next 4 functions return True if field next to given field is 1 or False if it's 0
def moveLeft(self, i, j):
moveLeft=False
assert i<self.N
assert j<self.N
if j>0 and self.grid[i, j-1]==1:
moveLeft=True
return moveLeft
def moveRight(self, i, j):
moveRight=False
assert i<self.N
assert j<self.N
if j<N-1 and self.grid[i, j+1]==1:
moveRight=True
return moveRight
def moveDown(self, i, j):
moveDown=False
assert i<self.N
assert j<self.N
if i<N-1 and self.grid[i+1, j]==1:
moveDown=True
return moveDown
def moveUp(self, i, j):
moveUp=False
assert i<self.N
assert j<self.N
if i>0 and self.grid[i-1, j]==1:
moveUp=True
return moveUp
def listOfOnes(self):
"""nested list of connected ones in each row"""
outlist=[]
for i in xrange(self.N):
outlist.append([])
helplist=[]
for j in xrange(self.N):
if self.grid[i, j]==0:
if (j>0 and self.grid[i, j-1]==0) or (j==0 and self.grid[i, j]==0):
continue # condition needed because of edges
outlist[i].append(helplist)
helplist=[]
continue
helplist.append((i, j))
if self.grid[i, j]==1 and j==self.N-1:
outlist[i].append(helplist)
return outlist
def getCluster(self, i=0, j=0, moveD=[1, 1, 1, 1]):
#(left, right, up, down)
#moveD short for moveDirections, 1 means that it tries to move it to that side, 0 so it doesn't try
self.numOfSteps=self.numOfSteps+1
if self.grid[i, j]==1:
self.cluster[(i, j)]=True
else:
print "the starting coordinate is not in any cluster"
return
if moveD[0]==1:
try: #if it comes to same point from different directions we'd get an infinite recursion, checking if it already been on that point prevents that
self.cluster[(i, j-1)]
moveD[0]=0
except:
if self.moveLeft(i, j)==False: #check if 0 or 1 is left to (i, j)
moveD[0]=0
else:
self.getCluster(i, j-1, [1, 0, 1, 1]) #right is 0, because we came from left
if moveD[1]==1:
try:
self.cluster[(i, j+1)]
moveD[1]=0
except:
if self.moveRight(i, j)==False:
moveD[1]=0
else:
self.getCluster(i, j+1, [0, 1, 1, 1])
if moveD[2]==1:
try:
self.cluster[(i-1, j)]
moveD[2]=0
except:
if self.moveUp(i, j)==False:
moveD[2]=0
else:
self.getCluster(i-1, j, [1, 1, 1, 0])
if moveD[3]==1:
try:
self.cluster[(i+1, j)]
moveD[3]=0
except:
if self.moveDown(i, j)==False:
moveD[3]=0
else:
self.getCluster(i+1, j, [1, 1, 0, 1])
if moveD==(0, 0, 0, 0):
return
def getClusterCoordinates(self):
return self.cluster
def getStats(self):
print "Number of recursions:", self.numOfSteps
print "Length of cluster:", len(self.cluster)
return (self.numOfSteps, len(self.cluster))
Upvotes: 4
Views: 307
Reputation: 1141
Your error is coming from the getCluster method. When setting moveD to [1,1,1,1] you are essentially setting a static variable(do not quote me on this). This is causing the information from the previous executions to carry over.
Here is a link to a blog post that shows an example of this.
Below is a working version of the getCluster method that both fixes the default arguement problem and removes the extraneous moveD assignments that manifested the problematic behavior.
def getCluster(self, i=0, j=0, moveD=None):
#(left, right, up, down)
#moveD short for moveDirections, 1 means that it tries to move it to that side, 0 so it doesn't try
if moveD == None: moveD = [1, 1, 1, 1]
self.numOfSteps=self.numOfSteps+1
if self.grid[i, j]==1:
self.cluster[(i, j)]=True
else:
print "the starting coordinate is not in any cluster"
return
if moveD[0]==1:
try: #if it comes to same point from different directions we'd get an infinite recursion, checking if it already been on that point prevents that
self.cluster[(i, j-1)]
except:
if self.moveLeft(i, j)==True: #check if 0 or 1 is left to (i, j)
self.getCluster(i, j-1, [1, 0, 1, 1]) #right is 0, because we came from left
if moveD[1]==1:
try:
self.cluster[(i, j+1)]
except:
if self.moveRight(i, j)==True:
self.getCluster(i, j+1, [0, 1, 1, 1])
if moveD[2]==1:
try:
self.cluster[(i-1, j)]
except:
if self.moveUp(i, j)==True:
self.getCluster(i-1, j, [1, 1, 1, 0])
if moveD[3]==1:
try:
self.cluster[(i+1, j)]
except:
if self.moveDown(i, j)==True:
self.getCluster(i+1, j, [1, 1, 0, 1])
Upvotes: 1