Saturday 27 February 2016

Collating and Plotting Perfmon data

We've been using Perfmon to collect information about the various apps we have for quite a while now and it was always a massive pain to present the information in a meaningful manner.

The problem is that each file contains a day's worth of data, I realize that it's possible to keep a counter for longer but the comparison problem remains the same and we wanted to be able to compare performance counters on different days.

I thought I would try to use Python and the pandas library for this.

I installed the pandas library by installing Anaconda, in particular I'm using version 2.3 for Python 2.7, yes it's all a bit confusing and no, I'm not using Visual Studio but Spyder, which isn't' great but mostly works

The script below is very much tailor made to my current needs, so there are few quirks:
  1. It assumes all files will be full day files at 15 seconds intervals.
  2. It assumes that all files will have the same counters in the same order. It should be relatively trivial to change the script to take a config file with the counters needed.
  3. It calculates a total CPU usage of all processes except SQL Server by subtracting SQL server usage from total cpu usage.
There are several command line options that should, hopefully, be self explanatory

Without further ado here's the script:

Needs a bit more work - 
#way to limit the number of days to be plotted, 
#explore different plots perhaps for different counters
#Actually get the files from the servers

from os import listdir, mkdir
from os.path import isfile, join, exists
import pandas as pd
import matplotlib.pyplot as plt
import re
import sys

def RenameColumns (current, fileName):
    current = current[:-1]
    current.append(fileName)
    return current
    
def Process (allFiles, numberToPlot, category, currentFile):
    ##Add the date column. This will not work if different files start at different times and have different intervals
    df = pd.DataFrame(allFiles[0][1][allFiles[0][1].columns[0]])
    #Rename the column to date
    df.columns = ['date']
    #We only need the time, not the date.
    df['date'] = df.date.apply(lambda x:x.split(' ')[1])
    #Get the last file number
    lastFile = currentFile + numberToPlot
    if lastFile > len(allFiles):
        lastFile = len(allFiles)        
    #Process each file into a dataframe
    for fi in range(currentFile, lastFile):
        fileName = allFiles[fi][0]
        df = df.join(allFiles[fi][1][allFiles[fi][1].columns[counter+1]])
        df.columns = RenameColumns(list(df.columns), fileName)
        #This is not ideal but to provide adjacent values would have to loop through i guess.
  #I'm sure there are better ways of dealing with missing values than this
        df[fileName] = df[fileName].apply(lambda x: 0.0 if x ==' ' else x ).astype(float)
    #Return a tuple with name and Dataframe.
    return (allFiles[currentFile][0] +"-" + allFiles[lastFile-1][0] + "-" + category, df)       


def GetCPUUsageRest (allFiles, numberToPlot,numberOfCPUs,  currentFile):
     #Is this really needed...Test you say? ...    
    numberOfCPUs = float(numberOfCPUs)
     ##Add the date column
    df = pd.DataFrame(allFiles[0][1][allFiles[0][1].columns[0]])
    #Rename the column to date
    df.columns = ['date']
    #Get the time
    df['date'] = df.date.apply(lambda x:x.split(' ')[1])
    #Get the last file
    lastFile = currentFile + numberToPlot
    if lastFile > len(allFiles):
        lastFile = len(allFiles)        
    #Process each file into a dataframe
    for fi in range(currentFile, lastFile):
        fileName = allFiles[fi][0]
        #Hardcoding FTW because 
        total= pd.DataFrame(DataFrames[0][1])
        sql = pd.DataFrame(DataFrames[25][1])
        sql[fileName] = sql[fileName].apply (lambda x:x/numberOfCPUs)
        df = df.join(pd.DataFrame(total[fileName] - sql[fileName]))
        df.columns = RenameColumns(list(df.columns), fileName)
        #This is not ideal but to provide adjacent values would have to loop through i guess
        df[fileName] = df[fileName].apply(lambda x: 0.0 if x < 0 else x ).astype(float)
    #Return a tuple with file name and Dataframe
    return (allFiles[currentFile][0] +"-" + allFiles[lastFile-1][0] + "-" + "Rest-Process % CPU" , df)      
    
#These are the counters used. The files need to contain them all otherwise the plots will be all wrong
#I suppose that we could load a list of these from a config file to accomodate changes rather than this
#hard coding.    
counters=[]
counters.append('Processor(_Total)-% Processor Time')
counters.append('Memory-Available KBytes')
counters.append('.NET CLR Exceptions(_Global_)-# of Exceps Thrown  per  sec')
counters.append('.NET CLR Memory(_Global_)-% Time in GC')
counters.append('ASP.NET-Application Restarts')
counters.append('ASP.NET-Request Wait Time')
counters.append('ASP.NET-Requests Queued')
counters.append('ASP.NET-Request Execution Time')
counters.append('ASP.NET Applications(__Total__)-Requests per Sec')
counters.append('Memory-Pages per sec')
counters.append('PhysicalDisk(_Total)-% Disk Read Time')
counters.append('System-Processor Queue Length')
counters.append('Web Service(_Total)-Get Requests per sec')
counters.append('Web Service(_Total)-Post Requests per sec')
counters.append('Web Service(_Total)-Current Connections')
counters.append('SQLServer-Access Methods-Page Splits per sec')
counters.append('SQLServer-Buffer Manager-Buffer cache hit ratio')
counters.append('SQLServer-Buffer Manager-Page life expectancy')
counters.append('SQLServer-Buffer Manager-Checkpoint pages per sec')
counters.append('SQLServer-General Statistics-User Connections')
counters.append('SQLServer-General Statistics-Processes blocked')
counters.append('SQLServer-SQL Statistics-Batch Requests per sec')
counters.append('SQLServer-SQL Statistics-SQL Compilations per sec')
counters.append('SQLServer-SQL Statistics-SQL Re-Compilations per sec')
counters.append('Network Interface-Bytes Total per sec')
counters.append('SQLServer-Process % CPU')

#This is regex for dates in the format yyyyMMdd. It doesn't quite validate as it would happily allow a day of 39
regexDate = '(20\d{2}[0-2]\d{1}[0-3]\d{1})'
#Set plotting style
plt.style.use('ggplot')

cmdargs = str(sys.argv)

if len (sys.argv) != 6 :
    print "This script takes five arguments"
    print "First Argument should be path where csv files to be processed are"
    print "Second Argument should be path where plots will be stored"
    print "Third Argument should be server name"
    print "Forth Argument should be number of Days to Plot"
    print "Fifth Argument should be number of CPUs"
    sys.exit(0)

path = sys.argv[1].rstrip('\\"')
dest = sys.argv[2]
server = sys.argv[3]
numberOfDaysToPlot = int(sys.argv[4])
numberOfCPUs = int(sys.argv[5])

    
if not exists(dest):
    mkdir(dest)

DataFrames = []
allFiles=[]
#Read the files for the input path. 
files = [f for f in listdir(path) if isfile(join(path,f)) and f.endswith('csv')]
for f in files:    
    name = re.search(regexDate, f).group()
    allFiles.append((name, pd.read_csv(join(path,f))))

remainingFiles = len(files)
#It looks like perfmon adds an extra column at the end of the file, which isn't a counter, hence the - 2
for counter in range(len(allFiles[0][1].columns)-2):
    currentFile = 0 
    while remainingFiles > 0:
        DataFrames.append(Process(allFiles, numberOfDaysToPlot, counters[counter], currentFile))
        #if we've reached the last counter, then we can look at the rest
        if counter == len(allFiles[0][1].columns)-3:
            DataFrames.append(GetCPUUsageRest(allFiles, numberOfDaysToPlot, numberOfCPUs, currentFile))
        remainingFiles = remainingFiles - numberOfDaysToPlot
        currentFile = currentFile + numberOfDaysToPlot
    remainingFiles = len(files)

CPUylim = ()     
#All plotting is done here.
for d in DataFrames:
    p=d[1].plot(x='date', title=d[0])
    #Get Y Axis Limit for Total CPU
    if d[0].find('Processor(_Total)-% Processor Time') != -1:
        CPUylim = p.get_ylim()        
    if d[0].find('SQLServer-Process % CPU') != -1:
        p.set_ylim( CPUylim)
    if d[0].find('Rest-Process % CPU') != -1:
        p.set_ylim(CPUylim)   
    #These two set the x ticks and labels make sure that they roughly match up.
    p.set_xticks([750.0,1500.0,2250.0,3000.0,3750.0,4500.0, 5250.0])
    p.set_xticklabels(['03:07:30','06:15:00','09:22:30','12:30:00','15:37:30','18:45:00','21:52:31'])
    fig = p.get_figure()
    fig.set_size_inches(21,13)
    fig.savefig(join (dest, d[0] + ".png"))
    plt.close()
A sample plot generated

No comments:

Post a Comment