Reputation: 112
When breaking up a string within C++ into individual lines and parameters (as a 2D vector) it can create the interesting problem of access violations when attempting to parse the vectors between functions. Within the code example, there have been many attempts to ensure that data is passed to and from the functions are independent objects and in no way a reference.
segregate.hpp
#pragma once
#include <vector>
#include <string>
/*
Purpose:
To to take a whole file as a string,
and break it up into individual words
*/
namespace Segregate {
// Module types
typedef std::vector< std::string > ParamArray;
struct StrCommand{
unsigned long line;
ParamArray param;
};
typedef std::vector< StrCommand > StrCommands;
bool IsParamBreak(char val);
bool IsLineBreak(char val);
ParamArray Parameterize(std::string str);
StrCommands Fragment(std::string str);
}
#include "./segregate.cpp"
segregate.cpp
#include "./segregate.hpp"
namespace Segregate{
bool IsParamBreak(char val){
if (val == ' '){
return true;
}else if (val == '\t'){
return true;
}
return false;
};
bool IsLineBreak(char val){
if (val == '\n'){
return true;
}
return false;
};
// Splits a single line into individual parameters
ParamArray Parameterize(std::string str){
str.append(" "); // Ensures that the loop will cover all segments
unsigned long length = str.size();
unsigned long comStart = 0;
ParamArray res;
// Ignore carrage returns
// Windows artifact
if (str[0] == '\r'){
comStart = 1;
}
// Ignore indentation
// Find the start of actual content
while (comStart < length && IsParamBreak(str[comStart])){
comStart++;
}
// Count the number of parameters
unsigned long vecLen = 0;
for (unsigned long i=comStart; i<length; i++){
if ( IsParamBreak(str[i]) ){
vecLen++;
}
}
res.reserve(vecLen);
// Scan will fail if there is no data
if (length == 0){
return res;
}
// Slice the the string into parts
unsigned long toIndex = 0;
unsigned long cursor = comStart;
for (unsigned long i=cursor; i<length; i++){
if (IsParamBreak(str[i]) == true){
// Transfer the sub-string to the vector,
// Ensure that the data is it's own, and not a reference
res[toIndex].reserve(i-cursor);
// Error here
res[toIndex].assign( str.substr(cursor, i-cursor) );
cursor = i+1;
toIndex++;
}
}
return res;
};
StrCommands Fragment(std::string str){
str.append("\n"); // Ensures that the loop will cover all segments
unsigned long length = str.size();
// Result
StrCommands res;
// Count lines
// Ignoring empty lines
unsigned long vecLen = 1;
for (unsigned long i=0; i<length; i++){
if (IsLineBreak(str[i])){
vecLen++;
}
}
res.reserve(vecLen);
// Ignore 'empty' strings as they may cause errors
if (vecLen == 0){
return res;
}
// Read lines
unsigned long toIndex = 0;
unsigned long cursor = 0;
for (unsigned long i=0; i<length; i++){
if (IsLineBreak(str[i])){
// Error here
res[toIndex].param = ParamArray( Parameterize( std::string(str.substr(cursor, i-cursor)) ) );
res[toIndex].line = i+1;
// Ignore blank lines
if (res[toIndex].param.size() == 0){
vecLen--;
}else{
toIndex++;
}
cursor = i+1;
}
}
// Shrink the result due to undersizing for blank lines
res.reserve(vecLen);
return res;
};
}
Memory access violations normally occur on lines 66 & 108 (when the element data is stored locally within a vector). It appears to occur during the assignment phase, as deduced by using an intermidiate temporary variable to store the result directly after it's parsing. The error can also occur during vector::reserve(), tho that occurs less often.
Note: On Windows there is no direct error message:
Exception thrown at 0x00A20462 in fiber.exe: 0xC0000005: Access violation reading location 0xBAADF009.
is only seen when using 'C/C++ Extension for Visual Studio Code' debugging, not within normal terminal execution.
However on Ubuntu it outputs:
Segmenation fault (core dump)
Upvotes: 0
Views: 1694
Reputation: 36379
You are calling reserve
on your vector which allocates memory to store your objects but doesn't construct them. When you then try and use the methods of the objects which you haven't constructed it's likely to crash.
There are 2 possible solutions, either call resize
instead of reserve
or call push_back
to construct new objects at the end of the vector.
Upvotes: 4