Maxim
Maxim

Reputation: 119

g++ optimization for overloading operator

I wrote header for arr class in arr.h with overloaded = operator for 1D-array and checked for the time needed for

A=B, where A, B arrays.

Time was comparable to the time in the simple loop for double array (without class arr) for optimization level less than 3.

#!/bin/bash 

FLAG=$1
echo $FLAG

g++ -o main $FLAG -std=c++11 main.cpp   -DARR && main
g++ -o main $FLAG -std=c++11 main.cpp         && main

# end of the script

CMP -O0 ; CMP -O1 ; CMP -O2 ; CMP -O3 ; CMP -Ofast

gives

-O0

-O1

-O2

-O3

-Ofast

But for -O3 and -Ofast simple loop demonstrates dramatic decrease of time (second column with numbers). How can I get the same result for arr class?

main.cpp

#include <iostream>
#include <iomanip>
#include <time.h>

#ifdef ARR
#include "arr.h"
#endif

using namespace std;

int main() {
    int n=1e8;
    register int i; 

    clock_t time1=clock(),time2,time3=0;

#ifdef ARR
    arr<double> A(n),B(n);

    for (i=0; i<n; i++)
        B(i)=i;
#else
    double * A = new double [n];
    double * B = new double [n];

    for (i=0; i<n; i++)
        B[i]=i;
#endif

    time2=clock();

#ifdef ARR
    A=B;
#else
    for (i=0; i<n; i++)
        A[i]=B[i];
#endif

    time3=clock();

#ifdef ARR
    cout<<fixed<<setw(16)<< scientific <<setprecision(16) <<" with    ARR:    "<<(time2-time1)/ (double)CLOCKS_PER_SEC<< " "<< (time3-time2)/ (double)CLOCKS_PER_SEC<<  " "<< (time3-time1)/ (double)CLOCKS_PER_SEC<<endl;
#else
    cout<<fixed<<setw(16)<< scientific <<setprecision(16)<<" without ARR:    "<<(time2-time1)/ (double)CLOCKS_PER_SEC<< " "<< (time3-time2)/ (double)CLOCKS_PER_SEC<<  " "<< (time3-time1)/ (double)CLOCKS_PER_SEC<<endl<<endl;
#endif

    return 0;
}

arr.h

#ifndef ARR_H
#define ARR_H

using namespace std;

template <class T> class arr
{
public:
    T* data;
    int size, dim=0;
    int dim1=0, dim2=0, dim3=0, dim4=0;
    int shape[4];

    arr() { }   

    arr(const int & isize) {   //  constructor
        dim=1;
        size=isize;
        dim1=size;
        data = new T[size];
        shape[0]=size;

        register int i;
        for (i=0; i<size; i++)
            data[i]=0.;
    }

    ~arr() {  delete [] data; }

    T &operator()(const int & index) { 
        return data[index];
    }

    arr &operator=(const arr & A) {
        register int i;
        for (i=0; i<size; i++)
            data[i]=A.data[i];
        return *this;
    }

    arr &operator=(const T & A) {
        register int i;
        for (i=0; i<size; i++)
            data[i]=A;
        return *this;
    }
};

#endif /*ARR_H */

Upvotes: 0

Views: 64

Answers (1)

1201ProgramAlarm
1201ProgramAlarm

Reputation: 32732

Since you don't do anything with A after copying the array, the optimizer is completely removing the loop. You need to use some value in the A array (for example, output one element of the array in your cout statement).

Upvotes: 1

Related Questions