Reputation: 197
I am required to read from an keyboard(stdin), the following text. Pl note that it will be entered by user from keyboard in this format only.
#the total size of physical memory (units are B, KB, MB, GB)
512MB 2 #the following are memory allocations
{
abc = alloc(1KB);
{
y_ = alloc(128MB);
x1= alloc(128MB);
y_ = alloc(32MB);
for (i = 0; i < 256; i++) abc[i] =alloc(512kB);
x1 = alloc(32MB); x2 = alloc(32MB); x3 = alloc(32MB);
x1.next = x2, x2.next = x3, x3.next = x1;
}
abc = alloc(256MB);
}
So basically let me break it down. a line beginning with # sign is considered as comment and is ignored. the first two allocations are physical memory size and number of generations. a global bracket will be opened. and it may be followed by a line called
abc = alloc(1KB);
where abc is the object name, 1KB is the memory size allocated.
x1.next = x2,
here x1 points to x2.
for (i = 0; i < 256; i++) abc[i] =alloc(512kB);
the for loop os entered in this format and it can have a same line command or can have nested for loops.
I have the following code that takes care of this somewhat. I want to know improve on it. Please do help.
And my code is this:
#include <iostream>
#include <algorithm>
#include <string>
#include <iomanip>
#include <limits>
#include <stdio.h>
#include <sstream>
using namespace std;
using std::stringstream;
string pMem,sGen, comment,val,input,input_for,id_size,id,init_str1, init_str2, inc_str, id_dummy,s_out,sss, id_dummy1;
int gen=0, pMem_int=0,i=0, gBrckt =0,cBrckt=0, oBrckt=0, id_size_int,v1,v2, for_oBrckt=0,for_cBrckt=0,y=0, y1=0, g=0;
unsigned long pMem_ulong =0, id_size_ulong;
char t[20], m[256], init1[10],init2[10],inc[10];
unsigned pos_start, pos,pos_strt=0,pos_end=0;
string extract(string pMem_extract);
unsigned long toByte(int pMem_int_func, string val);
void commentIgnore(string& input);
void func_insert();
void func_insert_for();
stringstream out;
void commentIgnore_for(string& input_for);
int main()
{
/* Reading the input main memory and num of generations */
/* Ignoring comment line */
cin >> pMem;
if(pMem == "#") {
cin.clear();
pMem.clear();
getline(cin,comment);
cin >> pMem;
}
if(pMem == "#") {
cin.clear();
pMem.clear();
getline(cin,comment);
cin >> pMem;
}
if(pMem == "#") {
cin.clear();
pMem.clear();
getline(cin,comment);
cin >> pMem;
}
/* Reading input generations */
cin>> sGen;
if(sGen == "#") {
cin.clear();
sGen.clear();
getline(cin,comment);
cin >> sGen;
}
if(sGen == "#") {
cin.clear();
sGen.clear();
getline(cin,comment);
cin >> sGen;
}
if(sGen == "#") {
cin.clear();
sGen.clear();
getline(cin,comment);
cin >> sGen;
}
/* Convert sGen and physical memory to int and report error if not a number */
gen = atoi(sGen.c_str());
if(gen ==0) {
cerr << "Generation must be a number"<<endl;
exit(0);
}
pMem_int = atoi(pMem.c_str());
// cout<< gen<<" "<<pMem_int<<endl;
/* Now that the number from pMem is removed, get its unit B,MB,KB */
extract(pMem); /* returns val(string) */
/* convert the given physical memory to Byte. input: pMem_int*/
toByte(pMem_int, val); /* return(pMem_ulong)*/
// move pMem_ulond to another location to keep address intact
/* read rest of the inputs */
/* Ignore comment lines before the global bracket */
cin >> input;
if(input == "#"){
cin.clear();
input.clear();
getline(cin,comment);
cin >> input;
}
if(input == "#"){
cin.clear();
input.clear();
getline(cin,comment);
cin >> input;
}
if(input == "#"){
cin.clear();
input.clear();
getline(cin,comment);
cin >> input;
}
if(input.compare("{") ==0)
gBrckt=1;
else {
cerr<< "Syntax error\n";
exit(0);
}
/* Clearing the input stream for next input */
cin.ignore(numeric_limits<streamsize>::max(), '\n');
cin.clear();
input.clear();
//cout<<"input: "<<input<<endl;
while( getline(cin,input)) {
if(input == "CTRL-D")
break;
commentIgnore(input);
//cout<<"inputloop: "<<input<<endl;
/* If input = '{' or '}'*/
if(input.compare("{") ==0)
oBrckt = oBrckt + 1;
if (input.compare("}") ==0)
cBrckt = cBrckt + 1;
if (((input.find("alloc"))!= string::npos) && (input.find("alloc") < input.find("for"))) {
func_insert();
//call the allocate function here with name: id, size: id_size_ulong
}
if ((input.find("for")) != string::npos) {
sscanf(input.c_str(), "for (%s = %d; %s < %d; %[^)])", init1, &v1, init2, &v2, inc);
init_str1 = init1, init_str2 = init2, inc_str = inc;
cout<<init1<<" ="<< v1<<" "<<init_str1<<" < " << v2<< " "<< inc_str<<endl;
cout << input <<endl;
if(init_str1 != init_str2) {
cerr << "Error!\n";
exit(0);
}
if ((input.find("alloc"))!= string::npos) {
// unsigned pos = (input.find("alloc"));
if((input.find(";")) != string::npos) {
pos_start = (input.find(")")+1);
string alloc_substr = input.substr(pos_start);
cout<<"Substring alloc: "<< alloc_substr<<endl;
func_insert();
//call the allocate function here with name: id, size: id_size_ulong
}
else {
cerr << "ERROR: SYNTAX\n";
exit(0);
}
}
// cin.ignore();
while(getline(cin,input_for)) {
commentIgnore_for(input_for);
if ((input_for.find("{") != string::npos)) {
pos = input_for.find("{");
for_oBrckt = for_oBrckt+1;
string for_brckt = input_for.substr(pos,pos);
cout<< "Found: " << for_oBrckt<<endl;
}
if ((input_for.find("}") != string::npos)) {
pos = input_for.find("}");
for_cBrckt = for_cBrckt+1;
string for_brckt = input_for.substr(pos,pos);
cout<< "Found: " << for_cBrckt<<endl;
}
if (((input_for.find("alloc"))!= string::npos) && (input_for.find("alloc") < input_for.find("for"))) {
func_insert_for();
//call the allocate function here with name: id, size: id_size_ulong
}
if(for_oBrckt == for_cBrckt)
break;
}
cout<<"out of break"<<endl;
}
if (((input.find(".next"))!= string::npos) && (input.find(".next") < input.find("for"))) {
func_insert();
//call the allocate function here with name: id, size: id_size_ulong
}
if(((cBrckt-oBrckt)) == gBrckt)
break;
}
}
/*---------------------- Function definitions --------------------------------*/
/* Function to extract the string part of physical memory */
string extract(string pMem_extract) {
i=0;
const char *p = pMem_extract.c_str();
for(i=0; i<=(pMem_extract.length()); i++) {
if (*p=='0'|| *p=='1'|| *p=='2'|| *p=='3'|| *p =='4'|| *p=='5'|| *p=='6'|| *p=='7'|| *p=='8'|| *p=='9')
*p++;
else {
val = pMem_extract.substr(i);
return(val);
}
}
}
/* Convert the physical memory to bytes. return(pMem_ulong);*/
unsigned long toByte(int pMem_int_func, string val)
{
if (val == "KB")
pMem_ulong = (unsigned long) pMem_int_func * 1024;
else if (val == "B")
pMem_ulong = (unsigned long) pMem_int_func;
else if (val == "GB")
pMem_ulong = (unsigned long) pMem_int_func * 1073741824;
else if (val == "MB")
pMem_ulong = (unsigned long) pMem_int_func * 1048576;
else {
cerr<<"Missing the value in memory, B, KB, MB, GB\n";
exit(0);
}
return(pMem_ulong);
}
/*Ignoring comment line*/
void commentIgnore(string& input)
{
unsigned found = input.find('#');
if (found!=std::string::npos)
input= input.erase(found);
else
return;
return;
}
void func_insert() {
sscanf(input.c_str(), "%s = alloc(%[^)]);", t, m);
id =t;
id_size =m;
cout<<"Tag: "<<id <<" Memory: "<<id_size<<endl;
extract(id_size); /* Separates B,MB,KB and GB of input, returns val*/
id_size_int = atoi(id_size.c_str());
/* Convert object size to B */
toByte(id_size_int, val); /* return(pMem_ulong) */
id_size_ulong = pMem_ulong;
}
void func_insert_for() {
sscanf(input_for.c_str(), "%s = alloc(%[^)]);", t, m);
id =t;
id_size =m;
if(!((id.find("[")) && (id.find("]")) != string::npos)) {
cout<<"Tag: "<<id <<" Memory: "<<id_size<<endl;
extract(id_size); /* Separates B,MB,KB and GB of input, returns val*/
id_size_int = atoi(id_size.c_str());
/* Convert object size to B */
toByte(id_size_int, val); /* return(pMem_ulong) */
id_size_ulong = pMem_ulong;
// allocate here
return;
}
else {
if(inc_str.find("++"))
y1 =1;
if(inc_str.find("="))
{
sss = inc_str.substr(inc_str.find("+") +1);
y1 = atoi(sss.c_str());
cout<<"y1:"<<y1<<endl;
}
pos_strt = id.find("[");
pos_end = id.find("]") -1;
cout<<"Positions start and ebd: " << pos_strt<<pos_end<<endl;
id_dummy = id.substr(0,pos_strt);
id = id_dummy;
cout<<"Tag: "<<id_dummy <<" Memory: "<<id_size<<endl;
extract(id_size); /* Separates B,MB,KB and GB of input, returns val*/
id_size_int = atoi(id_size.c_str());
/* Convert object size to B */
toByte(id_size_int, val); /* return(pMem_ulong) */
id_size_ulong = pMem_ulong;
//allocate here
cout<<"v1: " << v1 << " " << v2<<endl;
// g = 0;
for(y = v1; y < v2; y= y+y1) {
// allocate here
}
}
return;
}
void commentIgnore_for(string& input_for)
{
unsigned found = input_for.find('#');
if (found!=std::string::npos)
input_for= input_for.erase(found);
else
return;
return;
}
Also i am required to make it whitespace compatible. What it means is that input can be entered in one line as well. like two allocations in one line. Which i have not been able to taken care. I need help with that.
Upvotes: 1
Views: 1531
Reputation: 153899
IIUC, the input is not line oriented, so the usual rule of
using std::getline
may not apply. My own approach would be:
Insert a filtering streambuf to removed the comments.
Use some sort of regular expression based lexer (e.g. flex
)
to break the input up into tokens.
Define the grammar; I'd use bison
once I'd gotten this far,
in order to generate the code to parse the grammar, but a simple
recursive descent parser shouldn't be too hard to write.
You don't say what you must do with the information once you have parsed it, so it's hard to give more precise advice.
Upvotes: 1
Reputation: 129314
My suggestion would be that you write a proper tokenizer - a piece of code that understands what belongs together (such as "words"), and where to split (e.g. "(, ), {, }"). The tokenizer would return an enum, something like this:
enum Token {
Token_Unknown, // Error indiciation.
Token_LeftParen,
Token_RightParen,
Token_LeftBracket,
Token_RightBracket,
Token_Comma,
Token_Semicolon,
Token_Equal,
Token_Word, // Sequence of alphanumerics
};
Once you have a token, you need to understand what that token is. It may make sense to have a table of "known" tokens (aka "keywords"), such as "mem", "alloc", "for", and so on. If something isn't a keyword, it's the name of a symbol, such as a variable. You store those in a table, so that you can refer to them later.
You will have to use some sort of stack of where you are, so you can get back to where you came from when you finish.
Writing a generic parser isn't terribly hard, and you will most likely end up with a lot less code than your current code in doing so.
Certainly, you'd be able to get rid of all the:
if(input == "#"){
cin.clear();
input.clear();
getline(cin,comment);
cin >> input;
}
Just let the parser check if there is a '#'
as the input, and skip over to the end of the line (completing the current token if you have one, if not, just keep going).
Upvotes: 1