user27382745
user27382745

Reputation:

Webots controller crashes when changing a value in a vector

Im having troubles with the Webots simulator. Whenever I run the code, it always freezes at exactly 0.750 secs, and reports that the main bot controller crashed (there's a second controller for scoring purposes). I know it's when I assign a new value to the vector, as during debugging I commented it, and it ran like nothing happened. Code is below (there are other files within the same controller, but do not show signs of error):

// library for sim functions
#include "include/simFunctions.cpp"

// create the sim class
Sim sim;

class Bot{
private:
  double currentScore;
  bool training = true;
  int currentTerm = 65;
  int previousTerm = 0;
public:
  const void* message = " ";
  int functionOutput;

  // activate or deactivate the hook
  void hook(bool activated) {
    if (activated == true) {
      Hook->setVelocity(2.0);
      sim.delay(130, "msec");
      Hook->setVelocity(0.0);
    } else if (activated == false) {
      Hook->setVelocity(-2.0);
      sim.delay(130, "msec");
      Hook->setVelocity(0.0);
    }
  }
  
    // training functions for network
  void trainingNetwork(NeuralNetwork& actor, NeuralNetwork& critic, int numEpisodes, double gamma, double learningRate, double GRADIENT_CLASH_THRESHOLD, double weight_decay) {
    
    AdamWOptimizer actorOptimizer(learningRate, 0.9, 0.999, 0.01, weight_decay);
    AdamWOptimizer criticOptimizer(learningRate, 0.9, 0.999, 0.01, weight_decay);
    
    actor.add_layer(Layer(3, 128, "relu", actorOptimizer));
    actor.add_layer(Layer(128, 128, "relu", actorOptimizer));
    actor.add_layer(Layer(128, 4, "linear", actorOptimizer));
    
    critic.add_layer(Layer(2, 128, "relu", criticOptimizer));
    critic.add_layer(Layer(128, 128, "relu", criticOptimizer));
    critic.add_layer(Layer(128, 1, "linear", criticOptimizer));
    
    for (int episode = 0; episode <= numEpisodes; ++episode) {
      vector<vector<double>> states;
      vector<double> actions, rewards, logProbs, values;
      
      if (left1->getVelocity() != 0.0) {
        sim.moveBot(0);
        sim.delay(50, "msec");
      }
      sim.resetSimManual();
      sim.programSetup();
      training = true;
      while (training == true) {
        vector<double> state;
        double input1;
        double input2;
        double input3;
        
        state.resize(3);
        
        input1 = (left1->getVelocity() + left2->getVelocity() + left3->getVelocity()) / 3;
        input2 = (right1->getVelocity(), right2->getVelocity(), right3->getVelocity()) / 3;
        input3 = robot->getTime();
        state[0] = input1;
        states.push_back(state);
      
        vector<vector<double>> actionProbs = actor.forward({state});
        
        vector<vector<double>> valueEstimates = critic.forward({state});
        values.push_back(valueEstimates[0][0]);
      
        values.push_back(valueEstimates[0][0]);
        
        sim.delay(64, "msec");
      
        int action = (actionProbs[0][0] > actionProbs[0][1]) ? 0 : 1;
        logProbs.push_back(log(max(actionProbs[0][action], 1e-8)));
        
        functionOutput = action;
        
        cout << "MAINBOT: functionOutput = " << functionOutput << endl;
        functionConvert(functionOutput);
        
        sim.receive();
        if (receiv->getQueueLength() >= 1) {
          message = receiv->getData();
          currentScore = *(double *)message;
          rewards.push_back(currentScore);
          receiv->nextPacket();
        }
        
        if (robot->getTime() >= currentTerm) {
          training = false;
          previousTerm = currentTerm;
          currentTerm = currentTerm + 61;
        }
      }
      
      vector<double> advantages;
    for (int t = 0; t < rewards.size(); ++t) {
      double td_target = rewards[t] + (t < rewards.size() - 1 ? gamma * values[t + 1] : 0.0);
      advantages.push_back(td_target - values[t]);
    }
      
    double actorLoss = computeLoss(logProbs, advantages);
      
    double criticLoss = 0.0;
    for (size_t i = 0; i < rewards.size(); ++i) {
      double td_target = rewards[i] + (i < rewards.size() - 1 ? gamma * values[i + 1] : 0.0);
      criticLoss += pow(td_target - values[i], 2);
    }
    criticLoss = rewards.size();
      
    actor.backward({{actorLoss}}, GRADIENT_CLASH_THRESHOLD);
    actor.update_weights();
      
    critic.backward({{criticLoss}}, GRADIENT_CLASH_THRESHOLD);
    critic.update_weights();
    }
      
  }
  
  double computeLoss(const vector<double>& logProbs, const vector<double>& advantages) {
    double loss = 0.0;
    for (int i = 0; i < logProbs.size(); ++ i) {
      loss -= logProbs[i] * advantages[i];
    }
    return loss;
  }
  
  void functionConvert(int functionID) {
    if (functionID == 0) {
      sim.moveBot(0);
    } else if (functionID == 1) {
      sim.moveBot(1);
    } else if (functionID == -1) {
      sim.moveBot(2);
    } else if (functionID == 2) {
      sim.moveBot(3);
    } else if (functionID == -2) {
      sim.moveBot(4);
    }
  }
};

Upvotes: 0

Views: 50

Answers (0)

Related Questions