Reputation: 1
I'm trying to apply some previously written code to some DNA sequencing data that I have collected. I would like to match my sequencing reads to a document containing specific barcodes and output the number of hits for each match. I am having some very basic trouble with calling the two files to compare. The code compiles, however, when I try to call the file "MOBYtest" in line 77, the output is 0, which indicates that the file is not being called appropriately. Does anyone have any insight into how I can incorporate the two files into the following code?
####################################################################################################
#### Supplementary File 2, barcodebin.c ############################################################
####
#### Barcode assigned to a gene using standard binary search
####
#### Can Alkan
#### 2010
####################################################################################################
*/
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
typedef struct spair{
char *name;
char *seq;
int count;
}_spair;
int binSearch(char *seq, int s, int e);
void revcomp(char *s, char *r);
static int maxfunc(const void *p1, const void *p2);
struct spair **seqs;
int seqlen;
int main(int argc, char **argv){
FILE *fp;
int i;
char fname[100];
int nseq;
char sequence[100]; char name[100];
char sname[100];
char revseq[100];
int retCode=100000;
int s, e; int olds;
int found;
int crop;
int len;
int longest_barcode=0;
int fastq=0;
char line[1000];
int loop_done=0;
int crop_lim;
int min_barcode = 5;
int verbose = 0;
fname[0]=0; nseq = 0; sname[0]=0;
for (i=1;i<argc;i++){
if (!strcmp(argv[i], "-i"))
strcpy(fname, argv[i+1]);
else if (!strcmp(argv[i], "-s"))
strcpy(sname, argv[i+1]);
else if (!strcmp(argv[i], "-n"))
nseq = atoi(argv[i+1]);
else if (!strcmp(argv[i], "-m"))
min_barcode = atoi(argv[i+1]);
else if (!strcmp(argv[i], "-v"))
verbose = 1;
}
if (fname[0]==0 || nseq==0 || sname[0]==0) return 0;
fp = fopen(fname, "MOBYtest");
if ( !fp ) {
fprintf(stderr, "Could not open file %s\n", fname);
return 255;
}
i=0;
seqs = (struct spair ** ) malloc (sizeof (struct spair *) * nseq);
if (verbose) {
fprintf(stderr, "Loading %d sequences ..", nseq);
}
while (fscanf(fp, ">%s\n%s\n", name, sequence) > 0 ){
seqlen=strlen(sequence);
if (seqlen > longest_barcode) longest_barcode = seqlen;
if (seqlen>=3){
seqs[i] = (struct spair * ) malloc (sizeof (struct spair));
seqs[i]->name = (char *) malloc (sizeof (char) * (strlen (name)+1));
seqs[i]->seq = (char *) malloc (sizeof (char) * (2*seqlen+1));
seqs[i]->count = 0;
strcpy(seqs[i]->seq, sequence);
strcpy(seqs[i]->name, name);
i++;
}
}
nseq = i;
qsort(seqs, nseq, sizeof(struct spair *), maxfunc);
if (verbose) {
fprintf(stderr, " done. %d left. Longest barcode: %d bp\n", nseq, longest_barcode);
}
fclose(fp);
fp = fopen(sname, "r");
if ( !fp ) {
fprintf(stderr, "Could not open file %s\n", sname);
return 255;
}
while (fscanf(fp, "%s\n%s\n", name, sequence) > 0){
if (name[0]=='@'){
fgets(line, 1000, fp); // + line
fgets(line, 1000, fp); // quality line
}
sequence[longest_barcode]=0;
strcpy(revseq, sequence);
s = 0, e = nseq; retCode = 10000; olds=-10;
found=0;
crop = 0;
len = strlen(revseq);
crop_lim = longest_barcode - min_barcode + 1; // ad hoc
while (crop<crop_lim){
revseq[len-crop]=0;
while (s<e && retCode!=0){
retCode=binSearch(revseq, s, e);
if (retCode < 0){e = (s+e)/2;}
else if (retCode > 0){olds=s; s = (s+e)/2;}
else if (retCode == 0){
seqs[(s+e)/2]->count++;
crop = crop_lim+1;
break;
}
if (s>=e || s==olds){
crop++;
s = 0, e = nseq; retCode = 10000; olds=-10;
break;
}
}
}
}
if (verbose) {
fprintf(stderr, " done.\n");
}
fprintf(stdout, "Barcode_Name\tBarcode_Length\tFrequency\n");
for (i=0;i<nseq;i++)
fprintf(stdout, "%s\t%d\t%d\n", seqs[i]->name, strlen(seqs[i]->seq), seqs[i]->count);
}
int binSearch(char *seq, int s, int e){
int med = (s+e)/2;
int ret;
ret = strcmp(seq, seqs[med]->seq);
return ret;
}
void revcomp(char *s, char *r){
int len = strlen(s);
int i;
for (i=0;i<len;i++){
switch(s[i]){
case 'A': r[len-i-1] = 'T'; break;
case 'T': r[len-i-1] = 'A'; break;
case 'C': r[len-i-1] = 'G'; break;
case 'G': r[len-i-1] = 'C'; break;
default: r[len-i-1] = s[i]; break;
}
}
r[len]=0;
}
static int maxfunc(const void *p1, const void *p2){
struct spair *a, *b;
int ret;
a = *((struct spair **)p1);
b = *((struct spair **)p2);
ret = strcmp(a->seq, b->seq);
return ret;
}
Upvotes: 0
Views: 77
Reputation: 132108
Your problem is here:
fp = fopen(fname, "MOBYtest");
that's not how you call fopen()
. If you look at the man page for fopen()
you'll see the signature:
FILE *fopen(const char *pathname, const char *mode);
So, you don't "plug" your filename into the program, you pass it when invoking, i.e. if you compiled this file as my_prog
, you would execute something like:
my_prog -i "MOBYtest" -s "my_list_of_barcodes_I_guess" -n 100
and the MOBYtest
string would come into fopen()
via the pathname
parameter. Now, the mode for opening the file is one or "r", "w", a" etc., which correspond to "read", "write", "append" etc. Again, see the man page.
Some more advice:
Upvotes: 1