#ifndef THTS_H
#define THTS_H

#include <queue>

#include "search_engine.h"

#include "utils/stopwatch.h"

class ActionSelection;
class OutcomeSelection;
class BackupFunction;
class Initializer;
class RecommendationFunction;

// THTS, Trial-based Heuristic Tree Search, is the implementation of the
// abstract framework described in the ICAPS 2013 paper "Trial-based Heuristic
// Tree Search for Finite Horizon MDPs" (Keller & Helmert). The described
// ingredients (plus the recommendation function that is added in T. Keller's
// PhD dissertation) are implemented in the following five classes (1-5) or as a
// function in this class (6):

// 1. ActionSelection

// 2. Outcome Selection

// 3. BackupFunction

// 4. Initializer

// 5. RecommendationFunction

// 6. continueTrial()

// Add ingredients by deriving from the corresponding class.

struct SearchNode {
    SearchNode(double const& _prob, int const& _stepsToGo)
        : children(),
          immediateReward(0.0),
          prob(_prob),
          stepsToGo(_stepsToGo),
          futureReward(-std::numeric_limits<double>::max()),
          numberOfVisits(0),
          // expectedNumberOfVisits(0),
          initialized(false),
          solved(false),
          isActionNode(false) {}

    ~SearchNode() {
        for (unsigned int i = 0; i < children.size(); ++i) {
            if (children[i]) {
                delete children[i];
            }
        }
    }

    void reset(double const& _prob, int const& _stepsToGo) {
        children.clear();
        immediateReward = 0.0;
        prob = _prob;
        stepsToGo = _stepsToGo;
        futureReward = -std::numeric_limits<double>::max();
        numberOfVisits = 0;
        // expectedNumberOfVisits = 0;
        initialized = false;
        solved = false;
        isActionNode = false;
        trialRewards.clear();
        meanWalk.clear();
    }

    double getExpectedRewardEstimate() const {
        return immediateReward + futureReward;
    }

    double getExpectedFutureRewardEstimate() const {
        return futureReward;
    }

    void print(std::ostream& out, std::string indent = "") const {
        if (solved) {
            out << indent << "SOLVED with: " << getExpectedRewardEstimate()
                << " (in " << numberOfVisits << " real visits)" << std::endl;
        } else {
            out << indent << getExpectedRewardEstimate() << " (in "
                << numberOfVisits << " real visits)" << std::endl;
        }
    }

    std::vector<SearchNode*> children;

    double immediateReward;
    double prob;
    int stepsToGo;

    double futureReward;
    int numberOfVisits;
    // int expectedNumberOfVisits;

    // This is used in two ways: in decision nodes, it is true if all children
    // are initialized; and in chance nodes that represent an action (i.e., in
    // children of decision nodes), it is true if an initial value has been
    // assigned to the node.
    bool initialized;
    //Save the first greatMs values of each child of currentRoot
    std::deque<double> trialRewards;
    //Save all meanwalks of children
    std::vector<double> meanWalk;
    //level where node is located in tree(max=root)
    //depth of node related to max depth in current tree archived..
    int searchDepthi=0;
    // A node is solved if futureReward is equal to the true future reward
    bool solved;
    bool isActionNode;

};

class THTS : public ProbabilisticSearchEngine {
public:
    enum TerminationMethod {
        TIME,                     // stop after timeout sec
        NUMBER_OF_TRIALS,         // stop after maxNumberOfTrials trials
        TIME_AND_NUMBER_OF_TRIALS // stop after timeout sec or maxNumberOfTrials
                                  // trials, whichever comes first
    };

    THTS(std::string _name);

    // Set parameters from command line
    bool setValueFromString(std::string& param, std::string& value) override;

    //  This is called when caching is disabled because memory becomes sparse
    void disableCaching() override;

    // Learns parameter values from a random training set
    void learn() override;

    // Start the search engine as main search engine
    void estimateBestActions(State const& _rootState,
                             std::vector<int>& bestActions) override;

    // Start the search engine to estimate the Q-value of a single action
    void estimateQValue(State const& /*state*/, int /*actionIndex*/,
                        double& /*qValue*/) override {
        assert(false);
    }

    // Start the search engine to estimate the Q-values of all applicable
    // actions
    void estimateQValues(State const& /*state*/,
                         std::vector<int> const& /*actionsToExpand*/,
                         std::vector<double>& /*qValues*/) override {
        assert(false);
    }
	
    // Parameter setter
    void setActionSelection(ActionSelection* _actionSelection);
    void setOutcomeSelection(OutcomeSelection* _outcomeSelection);
    void setBackupFunction(BackupFunction* _backupFunction);
    void setInitializer(Initializer* _initializer);
    void setRecommendationFunction(
        RecommendationFunction* _recommendationFunction);

    void setMaxSearchDepth(int _maxSearchDepth) override;
    void setTerminationMethod(THTS::TerminationMethod _terminationMethod) {
        terminationMethod = _terminationMethod;
    }

    void setMaxNumberOfTrials(int _maxNumberOfTrials) {
        maxNumberOfTrials = _maxNumberOfTrials;
    }

    void setNumberOfNewDecisionNodesPerTrial(
        int _numberOfNewDecisionNodesPerTrial) {
        numberOfNewDecisionNodesPerTrial = _numberOfNewDecisionNodesPerTrial;
    }

    void setMaxNumberOfNodes(int _maxNumberOfNodes) {
        maxNumberOfNodes = _maxNumberOfNodes;
        // Resize the node pool and give it a "safety net" of 20000 nodes (this
        // is because the termination criterion is checked only at the root and
        // not in the middle of a trial)
        nodePool.resize(maxNumberOfNodes + 20000, nullptr);
    }

	//integer corresponed to which child


    // Methods to create search nodes
    SearchNode* createRootNode();
    SearchNode* createDecisionNode(double const& _prob);
    SearchNode* createChanceNode(double const& _prob);

    // Methods that return certain nodes of the explicated tree
    SearchNode const* getCurrentRootNode() const {
        return currentRootNode;
    }

    SearchNode const* getTipNodeOfTrial() const {
        return tipNodeOfTrial;
    }

    // Print
    void printStats(std::ostream& out, bool const& printRoundStats,
                    std::string indent = "") const override;

    //FOR CHANGE DETECTION
    //Performng if change occured in the policy
    void performChangeDetection(SearchNode* decisionNode, double* oldExpected);
    //calculate searchdepthi
    void calculatesearchdepthi(SearchNode* node);
    //Returns the boundary value h. Used for changedetection;
    double calculateBoundaryH(SearchNode* node);
    //function for calulating the mean from values of ChanceNote
    double calculateMeanOfSamples(SearchNode* chanceNode);
    //function for adding values to trialRewards
    bool addTrialReward(SearchNode* chanceNode,double futReward);

    void setMyNovelVarFromDecinode(double a){
        myNovelVar=a;
    }

    //bool for determine if changededecting is allowed

    bool returnCalcTBool(){return calcT;}
    bool returnEnabledCusum(){return deactivateChangedectector;}

    int getExpectedNumberOfRemainingVisits(int stepsToGo) const {
        /*if(detT&&P.size()>0){
            //assert(index<P.size());
            if(usedAction.size()==0){
                return T[SearchEngine::horizon];
            }else{
                double a=1.0;
                for (int i = 0; i < usedAction.size(); ++i) {
                    a*=P[usedAction[i]];
                }
                return (int)(a*(double)T[SearchEngine::horizon]);
            }
        }
        assert(stepsToGo < expectedNumberOfVisits.size());*/
        return (int)expectedNumberOfVisits[stepsToGo];
    }
    void IgnoreNewValuesOnChangedetected(SearchNode* node, double* oldExpection);

    double getEps(){return eps;}
    double getTuningAlpha(){return tuningAlpha;}
    int usedRandom;
    int allPicks;
    bool returnEnabeldCalculateEps(){ return DoSomeThingIfNegativAlarm;}
    double calculateBoundaryHConstant();
private:
    // Main search functions
    void visitDecisionNode(SearchNode* node);
    void visitChanceNode(SearchNode* node);
    void visitDummyChanceNode(SearchNode* node);

    // Initialization of different search phases
    void initRound();
    void initStep(State const& _rootState);
    void initTrial();
    void initTrialStep();

    // Trial length determinization
    bool continueTrial(SearchNode* /*node*/) {
        return initializedDecisionNodes < numberOfNewDecisionNodesPerTrial;
    }

    // Determines if the current state has been solved before or can be solved
    // now
    bool currentStateIsSolved(SearchNode* node);
    void resetValuesOnChangedetected(SearchNode* currentRootnode);
    // If the root state is a reward lock or has only one reasonable action,
    // noop or the only reasonable action is returned
    int getUniquePolicy();

    // Determine if another trial is performed
    bool moreTrials();

    //for changedetection:

    int amountOfTrials;


    int maxSearchDepthi;
    //for changedetection last value
    double myNovelVar;


    // Ingredients that are implemented externally
    ActionSelection* actionSelection;
    OutcomeSelection* outcomeSelection;
    BackupFunction* backupFunction;
    Initializer* initializer;
    RecommendationFunction* recommendationFunction;

    // Search nodes used in trials
    SearchNode* currentRootNode;

    // The tip node of a trial is the first node that is encountered that
    // requires initialization of a child
    SearchNode* tipNodeOfTrial;

    // The path of states that is traversed in a trial (such that states[x] is
    // the state that is visited with x steps-to-go)
    std::vector<PDState> states;

    // Indices that allow simple access to the current state, action etc.
    int stepsToGoInCurrentState;
    int stepsToGoInNextState;
    int appliedActionIndex;

    // The accumulated reward that has been achieved in the current trial (the
    // rewards are accumulated in reverse order during the backup phase, such
    // that it reflects the future reward in each node when it is backed up)
    double trialReward;

    // Counter for the number of trials
    int currentTrial;

    // Max search depth for the current step
    int maxSearchDepthForThisStep;

    // Variable used to navigate through chance node layers
    int chanceNodeVarIndex;

    // Index of the last variable with non-deterministic outcome in the current
    // transition
    int lastProbabilisticVarIndex;

    // Counter for the number of decision nodes that have been initialized in
    // the current trial
    int initializedDecisionNodes;

    // Memory management (nodePool)
    int lastUsedNodePoolIndex;
    std::vector<SearchNode*> nodePool;

    // The stopwatch used for timeout check
    Stopwatch stopwatch;

    // Parameter
    THTS::TerminationMethod terminationMethod;
    int maxNumberOfTrials;
    int numberOfNewDecisionNodesPerTrial;
    int maxNumberOfNodes;

    // Statistics
    int numberOfRuns;
    int cacheHits;
    int accumulatedNumberOfStepsToGoInFirstSolvedRootState;
    bool firstSolvedFound;
    int accumulatedNumberOfTrialsInRootState;
    int accumulatedNumberOfSearchNodesInRootState;
    //Maximal Samples used for mean in change detection
    int greatM;
    int changeDetected;
    int changeChecked;
    int changeIgnored;
    double eps;
    // My Params
    std::vector<double> expectedNumberOfVisits;
    //for detT has all probabilitys for expectedVisits calculation
    double boundaryHConstant;

    int totalExpectedNumberOfTrials;
    int gammaT;
    int gammaTScaled;
    bool pht;


    //activate calculation of expected T
    bool calcT=false;
    bool deactivateChangedectector=false;
    //decide how T will be split up;
    bool detT;
    bool DoSomeThingIfNegativAlarm;
    //for Actionselection
    float tuningAlpha;
    // determine if the same reset action on all changedetections is used
    bool resetOnNegativ;
    // Tests which access private members
    friend class THTSTest;
    friend class BFSTestSearch;
    friend class MCUCTTestSearch;
    friend class UCTBaseTestSearch;
};

#endif
