How to set background color of a column in a matplotlib table

Question

I have multile .txt files in a directory, say, d:\memdump\0.txt,1.txt,...10.txt sample text file is given below:

 Applications Memory Usage (kB):
 Uptime: 7857410 Realtime: 7857410
** MEMINFO in pid 23875 [com.example.twolibs] **
                 Shared  Private     Heap     Heap     Heap
               Pss    Dirty     Dirty     Size    Alloc     Free
              ------   ------   ------   ------   ------   ------
  Native        0        0        0       13504    10836      459
  Dalvik     6806     7740     6580       24076    18523     5553
  Stack       80        0       80                           
  Cursor        0        0        0                           
  Ashmem        0        0        0                           
  Other dev    14741      836     1028                           
 .so mmap     1367      448     1028                           
 .jar mmap        0        0        0                           
 .apk mmap      225        0        0                           
 .ttf mmap        0        0        0                           
 .dex mmap     1225      340       16                           
  Other mmap        5        8        4                           
  Unknown     3473      564     3432                           
  TOTAL    27922     9936    12168    37580    29359     6012
  Objects
       Views:       62         ViewRootImpl:        2
  AppContexts:        5           Activities:        2
      Assets:        3        AssetManagers:        3
  Local Binders:        9        Proxy Binders:       18
  Death Recipients:        0
  OpenSSL Sockets:        0
  SQL
    MEMORY_USED:        0
   PAGECACHE_OVERFLOW:        0          MALLOC_SIZE:        0

I have to parse these files to get values of PID, Native Heap Size, Native Heap Alloc size, Dalvik Heap Size, Dalvik Heap Alloc size and plot a graph with these heap sizes as below

enter image description here

I am using the following code to achieve this:

import glob
import os
import re
import numpy as np
import matplotlib.pyplot as plt

os.chdir("D:\Python_Trainings\MemInfo\Data")
pid_arr = []
native_heapsize_arr = []
dalvik_heapsize_arr = []
native_heapalloc_arr = []
dalvik_heapalloc_arr = []
pkg_name_arr = []

#Method to parse the memory dump files
def parse_dumpFiles():
  for data_file in glob.glob("*.txt"):
    try:
      fo = open(data_file,"r")
      for line in fo:
        pid_match = re.search('pid\s+(\d+)',line)
        pkg_name_match = re.search("$$(\w+\.+\w+\.+\w+)$$",line)
        native_heapsize_match = re.search('(Native+\s+\d+\s+\d+\s+\d+\s+)+(\d+)',line)
        dalvik_heapsize_match = re.search('(Dalvik+\s+\d+\s+\d+\s+\d+\s+)+(\d+)',line)
        native_heapalloc_match = re.search('(Native+\s+\d+\s+\d+\s+\d+\s+\d+\s+)+(\d+)',line)
        dalvik_heapalloc_match = re.search('(Dalvik+\s+\d+\s+\d+\s+\d+\s+\d+\s+)+(\d+)',line)
        if pid_match:
          pid_arr.append(int(pid_match.group(1)))
        if native_heapsize_match:
          native_heapsize_arr.append(native_heapsize_match.group(2))
        if dalvik_heapsize_match:
          dalvik_heapsize_arr.append(dalvik_heapsize_match.group(2))
        if native_heapalloc_match:
          native_heapalloc_arr.append(native_heapalloc_match.group(2))
        if dalvik_heapalloc_match:
          dalvik_heapalloc_arr.append(dalvik_heapalloc_match.group(2))
        if pkg_name_match:
          if pkg_name_match.group(1) not in pkg_name_arr:
            pkg_name_arr.append(pkg_name_match.group(1))

    except IOError:
      print "Error: can\'t find file or read data"
    finally:
      fo.close()
#end of parse_dumpFiles() Method

#Method to plot from Memory Dumps
def plt_MemDump(pid_arr, native_heapsize_arr, dalvik_heapsize_arr,   native_heapalloc_arr, dalvik_heapalloc_arr, pkg_name_arr):

#Create a figure and axes with room for the table
  fig = plt.figure()
  ax = plt.axes([0.2, 0.2, 0.7, 0.7])

#Create labels for the rows and columns as tuples
  colLabels = ('0','10', '20', '30', '40', '50', '60', '70', '80', '90', '100')
  rowLabels = ('Native Heap Size','Native Heap Allocated','Dalvik Heap Size','Dalvik Heap Allocated','PID')

#Table data as a numpy array
  tableData = np.asarray([native_heapsize_arr,dalvik_heapsize_arr,native_heapalloc_arr,dalvik_heapalloc_arr,pid_arr],dtype=int)

#Get the current color cycle as a list, then reset the cycle to be at the beginning
  colors = []     
  while True:
    colors.append(ax._get_lines.color_cycle.next())
    if colors[0] == colors[-1] and len(colors)>1:
      colors.pop(-1)
      break

  for i in xrange(len(colors)-1):
    ax._get_lines.color_cycle.next()

#Show the table
  the_table = plt.table(cellText=tableData, rowLabels=rowLabels, rowColours=colors,    colLabels=colLabels, loc='bottom')

#Make some line plots
  xAxis_val = [0,10,20,30,40,50,60,70,80,90,100]
  ax.plot(xAxis_val,native_heapsize_arr, linewidth=2.5, marker="o", label="Native Heap Size")
  ax.plot(xAxis_val,dalvik_heapsize_arr, linewidth=2.5, marker="D", label="Dalvik Heap Size")
  ax.plot(xAxis_val,native_heapalloc_arr, linewidth=2.5, marker="^",label="Native Heap Allocated")
  ax.plot(xAxis_val,dalvik_heapalloc_arr, linewidth=2.5, marker="h", label="Dalvik Heap Allocated")

#show legend
  plt.legend(loc='upper right', fontsize=10)

#set the column color where PID is different from 1st PID 
  c=the_table.get_celld()[(5,3)]
  c.set_color('r')
  i=0
  while i<=10:
    c=the_table.get_celld()[(5,i)]
if(c.get_text().get_text()!=((the_table.get_celld()[(5,0)]).get_text().get_text())):
  c.set_color('r')
  (the_table.get_celld()[(4,i)]).set_color('r')
  (the_table.get_celld()[(3,i)]).set_color('r')
  (the_table.get_celld()[(2,i)]).set_color('r')
  (the_table.get_celld()[(1,i)]).set_color('r')
  i+=1

#Turn off x-axis ticks and show the plot              
  plt.xticks([])

#Configure Y axis
  plt.ylim(0,60000)
  plt.yticks([10000,20000,30000,40000,50000,60000])
  plt.grid(True)

#Setting the name of the window title of the plot  
  fig.canvas.set_window_title(pkg_name_arr[0] + "- Memory Dump Plot")

#Setting the Title of the plot
  plt.title(pkg_name_arr[0],color='r',fontsize=20)

#Setting Y Label 
  plt.ylabel('Heap Size', fontsize=14, color='r')

#show plot
  plt.show()
#end of plt_MemDump() Method

parse_dumpFiles()
plt_MemDump(pid_arr, native_heapsize_arr, dalvik_heapsize_arr, native_heapalloc_arr, dalvik_heapalloc_arr, pkg_name_arr)

Now I want to mark the columns of the table with some color where PID value differ with 1st PID value.(eg,column 30,60 & 100).

Can anybody help me to achieve this?

Francesco Montesano · Accepted Answer

matplotlib.pyplot.table gives you already the tool to do what you need:

cellColours (of the same shape of cellText): let you chose the colour for each cell
colColours: works as rowColours, but for the column headers

If you want all the cells in a column to have a specific colour you can do something like this

cellcolours = np.empty_like(tableData, dtype='object')
for i, cl in enumerate(colLabels):
    if cl > 50:
        cellcolours[:,i] = 'r'

and then call plt.table (although I suggest you to change to ax.table) adding the cellColours=cellcolours keyword.

If you want also the column headers coloured, just do something like above

If you want to be able to change the cells after you create the table, table.get_celld() returns a dictionary of cells, whose keys are tuples of the position of each cell, eg:

{(0, 0): ,  # these are column headers
 (0, 1): ,
 (0, 2): ,
 (0, 3): ,
 [...]
 (1, -1): , # this is a row header
 (1, 0): ,  # this is a cell
 (1, 1): ,
 (1, 2): ,
 (1, 3): ,
 [...]
 (2, -1): , # this is a row header
 [...] }

You can access each cell using the tuple, e.g.

c=table.get_celld()[(1,1)]

You can set the cell color with c.set_color() and inspect the text in the cell with c.get_text().get_text() (the first get_text returns a Text instance, the second the string in it)

How to set background color of a column in a matplotlib table

Answers (1)

Related Questions