Reputation: 1057
I need to read afile with matrix data and create matrix within my prrogram. The Matrix file format is looks similar to this:
# Matrix made by matblas from blosum62.iij
# * column uses minimum score
# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
# Blocks Database = /data/blocks_5.0/blocks.dat
# Cluster Percentage: >= 62
# Entropy = 0.6979, Expected = -0.5209
A R N D C Q E G H I L K M F P S T W Y V B Z X *
A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4
R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4
N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4
D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4
C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4
E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4
H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4
I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4
L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4
K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4
M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4
F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4
P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4
S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4
T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4
W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4
Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4
V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4
B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4
Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4
* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1
Letters have constant place so since I am interested only in four of them I could assign x
and y
in program. What I need is only a matrix with values on which I could search for values by giving function 'GetValue' 'x' and 'y'.
Here is my code. It is only one part a class defined in header file, which contains and search through matrix for values. Maybe it is not elegant way to do that but I don't have so much time so right now I want to do it fast. Later I would have more time so I will do it in better way.
/*
* algorytm.cpp
* implementacja algorytmu
*
* Autor: Mateusz
*
*/
#include <cstdlib>
#include <iostream>
#include <string>
#include <stdio.h>
#include <fstream>
#include <vector>
#include "matryca_sub.h"
#include <sstream>
#include <istream>
#include <sstream>
using namespace std;
int ScoreMatrix::MainMatrix(char *mat_file, int x, int y)
{
cout << "Main matrix function start" << endl;
CreateMatrix(30);
ReadMatrix(mat_file);
int val;
val=GetValue(x, y);
return val;
cout << "Main matrix function end" << endl;
}
void ScoreMatrix::CreateMatrix(int edge)
{
cout << "Creating sub matrix start" << endl;
//int** scores = new int* [*edge-1];
//for (int i=0; i<=23; i++) scores[i] = new int[*edge-1];
if( scores != 0 ) delete [] scores;
scores = new int [edge*edge];
cols = edge;
cout << "Sub matrix created" << endl;
}
void ScoreMatrix::SetValue(int x, int y, int val)
{
cout << "write to sub matrix start" << endl;
//scores[x][y] = val;
scores[(cols* y) + x] = val;
cout << "write to sub matrix end" << endl;
}
int ScoreMatrix::GetValue(int x, int y)
{
//cout << "GetValue start" << endl;
//return scores[x][y];
return scores[(cols * x) + y];
cout << "GetValue end" << endl;
}
void ScoreMatrix::ReadMatrix(char *mat_file)
{
cout << "start reading matrix from file" << endl;
int row=0;
ifstream mfile;
mfile.open(mat_file);
mfile.precision(2);
mfile.setf(ios::fixed, ios::showpoint);
while(!mfile.eof())
{
for (row=0; row<=23; row++)
{
string line;
getline( mfile, line);
istringstream iss(line);
if (line[0] !='#' && line[0] != ' ')
{
int s;
iss >> s;
for (int i=1; !iss.eof(); i++)
{
iss >> s;
SetValue(i, row, s);
}
}
}
}
cout << "end reading matrix from file" << endl;
}
Header file included:
/*
* File: mat_sub.h
* Author: mateusz
*
* Created on 6 luty 2011, 14:44
*/
#ifndef MAT_SUB_H
#define MAT_SUB_H
#include <cstdlib>
#include <iostream>
#include <string>
#include <stdio.h>
#include <stdlib.h>
#include <fstream>
#include <vector>
#include <algorithm>
#include <string.h>
using namespace std;
class ScoreMatrix
{
public:
ScoreMatrix():
cols (0)
,scores (0)
{}
char mat_file;
int MainMatrix(char *mat_file, int x, int y);
int GetValue(int x, int y);
private:
int cols, rows;
int* scores;
void CreateMatrix(int edge);
void SetValue(int x, int y, int val);
void ReadMatrix(char *mat_file);
};
#endif /* MAT_SUB_H */
Before seagfoult function print one of many start reading matrix from file
.
Upvotes: 2
Views: 2325
Reputation: 490058
Oh my. I hope you'll forgive me if I sound harsh, but this code has a lot of problems, going all the way from the basic design all the way to the details of things like the loops you use to read data from the file.
First of all, it seems to me that your "matrix" class needs a healthy dose of "single responsibility" applied. I'd strip it down to being a 2D matrix, nothing else.
Second, I'd get rid of handling the memory management on your own. All you really need is a std::vector
, with enough of a front-end to provide 2D addressing.
Third, I'd get rid of the "getValue"/"setValue", and use subscripting like every decently-designed matrix since scientific programmers converted from assembly language to Fortran in the 1950s.
Taking those into account, we get a simplified matrix class, something like this:
template <class T>
class matrix {
std::vector<T> data;
size_t cols;
public:
matrix(int x, int y) : cols(x), data(x*y) {}
T &operator()(int x, int y) {
return data[cols * y + x];
}
};
Note that for simplicity, when you subscript this, you use ()
roughly like you would in Fortran or BASIC, rather than using []
like you normally would in C or C++. You can support the latter, but it takes a lot more (and uglier) code. Also note that you do not want cols * (y+x)
, you want (cols * y) + x
if you insist on using parentheses (though think it's pretty silly, given that the relative precedence of multiplication and addition is quite well known. Finally, I've made this a template simply because it's easy to do so -- if you want to specify the type directly, that's obviously pretty easy to do.
Finally, I'd make reading the data from the file into a matrix into a free function. In doing so, I'd get rid of all the while (!whatever.eof())
, because those are pretty much guaranteed to work incorrectly.
template <class T>
void read_matrix(std::string const &filename, matrix<T> &m) {
std::ifstream infile(filename);
std::string line;
int x = 0, y=0;
while (std::getline(infile, line)) {
if (line[0] == '#' || line[0] == ' ')
continue;
int value;
std::istringinstream converter(&line[1]); // &line[1] to skip leading letter
while (converter >> value)
m(x++, y) = value;
++y;
}
}
Technically, the &line[1]
isn't guaranteed to work C++03, but C++11 does guarantee it, largely because it works with all known implementations.
Upvotes: 3
Reputation: 4254
(In case your code didn't work) A C code that reads this (and only this) format:
int base2num(char b)
// just a numbering scheme for the input characters. This indexing will be used
// in the matrix into which it is read
{
switch (b)
{
case 'A': case 'a': return 0; break;
case 'C': case 'c': return 1; break;
case 'G': case 'g': return 2; break;
case 'T': case 't': return 3; break;
case 'P': case 'p': return 4; break;
case 'V': case 'v': return 5; break;
case 'L': case 'l': return 6; break;
case 'I': case 'i': return 7; break;
case 'M': case 'm': return 8; break;
case 'F': case 'f': return 9; break;
case 'Y': case 'y': return 10; break;
case 'W': case 'w': return 11; break;
case 'H': case 'h': return 12; break;
case 'K': case 'k': return 13; break;
case 'R': case 'r': return 14; break;
case 'Q': case 'q': return 15; break;
case 'N': case 'n': return 16; break;
case 'E': case 'e': return 17; break;
case 'D': case 'd': return 18; break;
case 'S': case 's': return 19; break;
default: return -1;
}
}
void str2int(char *line, int i, int *mat, char *arr)
{
int m,k;
char * endptr;
endptr=line+1;
for (m=0;m<23;m++)
{
k=base2num(arr[m]);
if (k==-1)
continue;
mat[i*20+k]=strtol(endptr,&endptr,0);
}
}
void parseinp(FILE *fp, int *mat)
{
int i,j;
char line[256];
char arr[23];
while (fgets(line,256,fp)!=NULL)
{
if (line[0]=='#')
continue;
i=3;
j=0;
if (base2num(line[3])!=-1)
//assumption: the first character is
//part of the 20 bases
{
while(line[i]!='*')
// this is the first line of the table,
// that which contains A R N D etc.
// store the chars in arr
{
arr[j]=line[i];
j++;
i+=3;
}
continue;
}
i=base2num(line[0]);
if (i<0) // not one of the 20 core bases
{
continue;
}
else
{
// printf("%d\n",i);
str2int(line,i,mat,arr);
}
}
}
int * get_blosum()
{
FILE *fp;
int *mat;
int n,m;
mat=(double *) calloc(400, sizeof(double));
fp =fopen("BLOSUM62","r");
if (fp==NULL)
{
perror("failed to open protein matrix file!");
}
parseinp(fp, mat);
fclose(fp);
return mat;
}
Upvotes: 1
Reputation: 272467
This:
scores = new int[[*edge] [*edge]];
is not valid C++ syntax. Do you mean this?:
scores = new int[*edge * *edge];
Similarly, when you attempt to access element of scores
, you should do something like:
scores[(cols * y) + x] = val;
Incidentally, there's no point passing an int
by pointer into a function if you don't intend to modify the original value. I would rewrite CreateMatrix()
as:
void ScoreMatrix::CreateMatrix(int edge)
{
scores = new int[edge*edge];
cols = edge;
}
Upvotes: 1
Reputation: 131789
To have something evaluated before others, you dont use []
but paranthesis. It should look like this:
void ScoreMatrix::CreateMatrix(int *edge)
{
scores = new int[(*edge) * (*edge)];
cols = *edge;
}
void ScoreMatrix::SetValue(int x, int y, int val)
{
scores[cols * (y + x)] = val;
}
int ScoreMatrix::GetValue(int x, int y)
{
return *scores[cols * (x + y)];
}
Upvotes: 0
Reputation: 5300
It looks like you are using the []
syntax incorrectly. I think you want change the following lines:
scores = new int[[*edge] [*edge]];
scores[cols *[y + x]] = val;
return *scores[cols *[x + y]];
to
scores = new int[(*edge) * (*edge)];
scores[cols * (y + x)] = val;
return *scores[cols * (x + y)];
Notice the use of parentheses ()
instead of []
.
Upvotes: 0