Reputation: 853
I have multile .txt files in a directory, say, d:\memdump\0.txt,1.txt,...10.txt sample text file is given below:
Applications Memory Usage (kB):
Uptime: 7857410 Realtime: 7857410
** MEMINFO in pid 23875 [com.example.twolibs] **
Shared Private Heap Heap Heap
Pss Dirty Dirty Size Alloc Free
------ ------ ------ ------ ------ ------
Native 0 0 0 13504 10836 459
Dalvik 6806 7740 6580 24076 18523 5553
Stack 80 0 80
Cursor 0 0 0
Ashmem 0 0 0
Other dev 14741 836 1028
.so mmap 1367 448 1028
.jar mmap 0 0 0
.apk mmap 225 0 0
.ttf mmap 0 0 0
.dex mmap 1225 340 16
Other mmap 5 8 4
Unknown 3473 564 3432
TOTAL 27922 9936 12168 37580 29359 6012
Objects
Views: 62 ViewRootImpl: 2
AppContexts: 5 Activities: 2
Assets: 3 AssetManagers: 3
Local Binders: 9 Proxy Binders: 18
Death Recipients: 0
OpenSSL Sockets: 0
SQL
MEMORY_USED: 0
PAGECACHE_OVERFLOW: 0 MALLOC_SIZE: 0
I have to parse these files to get values of PID, Native Heap Size, Native Heap Alloc size, Dalvik Heap Size, Dalvik Heap Alloc size and plot a graph with these heap sizes as below
I am using the following code to achieve this:
import glob
import os
import re
import numpy as np
import matplotlib.pyplot as plt
os.chdir("D:\Python_Trainings\MemInfo\Data")
pid_arr = []
native_heapsize_arr = []
dalvik_heapsize_arr = []
native_heapalloc_arr = []
dalvik_heapalloc_arr = []
pkg_name_arr = []
#Method to parse the memory dump files
def parse_dumpFiles():
for data_file in glob.glob("*.txt"):
try:
fo = open(data_file,"r")
for line in fo:
pid_match = re.search('pid\s+(\d+)',line)
pkg_name_match = re.search("\[(\w+\.+\w+\.+\w+)\]",line)
native_heapsize_match = re.search('(Native+\s+\d+\s+\d+\s+\d+\s+)+(\d+)',line)
dalvik_heapsize_match = re.search('(Dalvik+\s+\d+\s+\d+\s+\d+\s+)+(\d+)',line)
native_heapalloc_match = re.search('(Native+\s+\d+\s+\d+\s+\d+\s+\d+\s+)+(\d+)',line)
dalvik_heapalloc_match = re.search('(Dalvik+\s+\d+\s+\d+\s+\d+\s+\d+\s+)+(\d+)',line)
if pid_match:
pid_arr.append(int(pid_match.group(1)))
if native_heapsize_match:
native_heapsize_arr.append(native_heapsize_match.group(2))
if dalvik_heapsize_match:
dalvik_heapsize_arr.append(dalvik_heapsize_match.group(2))
if native_heapalloc_match:
native_heapalloc_arr.append(native_heapalloc_match.group(2))
if dalvik_heapalloc_match:
dalvik_heapalloc_arr.append(dalvik_heapalloc_match.group(2))
if pkg_name_match:
if pkg_name_match.group(1) not in pkg_name_arr:
pkg_name_arr.append(pkg_name_match.group(1))
except IOError:
print "Error: can\'t find file or read data"
finally:
fo.close()
#end of parse_dumpFiles() Method
#Method to plot from Memory Dumps
def plt_MemDump(pid_arr, native_heapsize_arr, dalvik_heapsize_arr, native_heapalloc_arr, dalvik_heapalloc_arr, pkg_name_arr):
#Create a figure and axes with room for the table
fig = plt.figure()
ax = plt.axes([0.2, 0.2, 0.7, 0.7])
#Create labels for the rows and columns as tuples
colLabels = ('0','10', '20', '30', '40', '50', '60', '70', '80', '90', '100')
rowLabels = ('Native Heap Size','Native Heap Allocated','Dalvik Heap Size','Dalvik Heap Allocated','PID')
#Table data as a numpy array
tableData = np.asarray([native_heapsize_arr,dalvik_heapsize_arr,native_heapalloc_arr,dalvik_heapalloc_arr,pid_arr],dtype=int)
#Get the current color cycle as a list, then reset the cycle to be at the beginning
colors = []
while True:
colors.append(ax._get_lines.color_cycle.next())
if colors[0] == colors[-1] and len(colors)>1:
colors.pop(-1)
break
for i in xrange(len(colors)-1):
ax._get_lines.color_cycle.next()
#Show the table
the_table = plt.table(cellText=tableData, rowLabels=rowLabels, rowColours=colors, colLabels=colLabels, loc='bottom')
#Make some line plots
xAxis_val = [0,10,20,30,40,50,60,70,80,90,100]
ax.plot(xAxis_val,native_heapsize_arr, linewidth=2.5, marker="o", label="Native Heap Size")
ax.plot(xAxis_val,dalvik_heapsize_arr, linewidth=2.5, marker="D", label="Dalvik Heap Size")
ax.plot(xAxis_val,native_heapalloc_arr, linewidth=2.5, marker="^",label="Native Heap Allocated")
ax.plot(xAxis_val,dalvik_heapalloc_arr, linewidth=2.5, marker="h", label="Dalvik Heap Allocated")
#show legend
plt.legend(loc='upper right', fontsize=10)
#set the column color where PID is different from 1st PID
c=the_table.get_celld()[(5,3)]
c.set_color('r')
i=0
while i<=10:
c=the_table.get_celld()[(5,i)]
if(c.get_text().get_text()!=((the_table.get_celld()[(5,0)]).get_text().get_text())):
c.set_color('r')
(the_table.get_celld()[(4,i)]).set_color('r')
(the_table.get_celld()[(3,i)]).set_color('r')
(the_table.get_celld()[(2,i)]).set_color('r')
(the_table.get_celld()[(1,i)]).set_color('r')
i+=1
#Turn off x-axis ticks and show the plot
plt.xticks([])
#Configure Y axis
plt.ylim(0,60000)
plt.yticks([10000,20000,30000,40000,50000,60000])
plt.grid(True)
#Setting the name of the window title of the plot
fig.canvas.set_window_title(pkg_name_arr[0] + "- Memory Dump Plot")
#Setting the Title of the plot
plt.title(pkg_name_arr[0],color='r',fontsize=20)
#Setting Y Label
plt.ylabel('Heap Size', fontsize=14, color='r')
#show plot
plt.show()
#end of plt_MemDump() Method
parse_dumpFiles()
plt_MemDump(pid_arr, native_heapsize_arr, dalvik_heapsize_arr, native_heapalloc_arr, dalvik_heapalloc_arr, pkg_name_arr)
Now I want to mark the columns of the table with some color where PID value differ with 1st PID value.(eg,column 30,60 & 100).
Can anybody help me to achieve this?
Upvotes: 9
Views: 14216
Reputation: 8668
matplotlib.pyplot.table
gives you already the tool to do what you need:
cellColours
(of the same shape of cellText
): let you chose the colour for each cellcolColours
: works as rowColours
, but for the column headersIf you want all the cells in a column to have a specific colour you can do something like this
cellcolours = np.empty_like(tableData, dtype='object')
for i, cl in enumerate(colLabels):
if cl > 50:
cellcolours[:,i] = 'r'
and then call plt.table
(although I suggest you to change to ax.table
) adding the cellColours=cellcolours
keyword.
If you want also the column headers coloured, just do something like above
If you want to be able to change the cells after you create the table, table.get_celld()
returns a dictionary of cells, whose keys are tuples of the position of each cell, eg:
{(0, 0): <matplotlib.table.Cell at 0x5d750d0>, # these are column headers
(0, 1): <matplotlib.table.Cell at 0x5d75290>,
(0, 2): <matplotlib.table.Cell at 0x5d75450>,
(0, 3): <matplotlib.table.Cell at 0x5d75610>,
[...]
(1, -1): <matplotlib.table.Cell at 0x5d757d0>, # this is a row header
(1, 0): <matplotlib.table.Cell at 0x5a58110>, # this is a cell
(1, 1): <matplotlib.table.Cell at 0x5d74150>,
(1, 2): <matplotlib.table.Cell at 0x5d74290>,
(1, 3): <matplotlib.table.Cell at 0x5d743d0>,
[...]
(2, -1): <matplotlib.table.Cell at 0x5d75990>, # this is a row header
[...] }
You can access each cell using the tuple, e.g.
c=table.get_celld()[(1,1)]
You can set the cell color with c.set_color()
and inspect the text in the cell with c.get_text().get_text()
(the first get_text
returns a Text
instance, the second the string in it)
Upvotes: 14