#include <stdlib.h>     /* srand, rand */

#include "compression.h"

#include "../task_utils/causal_graph.h"
#include "../utils/logging.h"

using namespace std;

namespace pdbs {

    void print_avg_finite_mean_for_PDBCollection(shared_ptr<PDBCollection> &coll) {
        shared_ptr<PatternDatabase> current_pdb;
        size_t entries = coll->size();
        double finite_mean_sum = 0.0;
        for (size_t i = 0; i < entries; i++) {
            current_pdb = (*coll)[i];
            finite_mean_sum += current_pdb->compute_mean_finite_h();
        }
        double average = finite_mean_sum / entries;
        utils::g_log << "Average mean_finite_h_value: " << average << endl;
    }

    void print_avg_finite_mean_for_PDBCollection(PDBCollection &coll) {
        shared_ptr<PatternDatabase> current_pdb;
        size_t entries = coll.size();
        double finite_mean_sum = 0.0;
        for (size_t i = 0; i < entries; i++) {
            current_pdb = (coll)[i];
            finite_mean_sum += current_pdb->compute_mean_finite_h();
        }
        double average = finite_mean_sum / entries;
        utils::g_log << "Average mean_finite_h_value: " << average << endl;
    }

    shared_ptr <PatternDatabase> min_compress(
            const shared_ptr <PatternDatabase> &small_pdb,
            const shared_ptr <PatternDatabase> &larger_pdb,
            int variable_domain_size) {
        vector<int> distances_new;
        const vector<int> distances_old = larger_pdb->get_distances();
        for (size_t i = 0; i < distances_old.size(); i += variable_domain_size) {
            int min_distance = numeric_limits<int>::max();
            for (int j = 0; j < variable_domain_size; j++) {
                if (distances_old[i + j] < min_distance) {
                    min_distance = distances_old[i + j];
                }
            }
            distances_new.push_back(min_distance);
        }
        vector <size_t> hash_mult = small_pdb->get_hashmultipliers();
        return make_shared<PatternDatabase>(
                small_pdb->get_pattern(),
                small_pdb->get_size(),
                move(distances_new),
                move(hash_mult),
                small_pdb->get_operator_transitions());
    }

    vector<int> missing_variables(const TaskProxy &task_proxy, const Pattern &pattern) {
        set<int> causally_relevant_variables;
        const causal_graph::CausalGraph &causal_graph = task_proxy.get_causal_graph();

        for (int var : pattern) {
            const vector<int> &causal_rel_vars = causal_graph.get_successors(var);
            causally_relevant_variables.insert(causal_rel_vars.begin(), causal_rel_vars.end());
        }
        // The pattern variables are in causal graph -> remove them
        const set<int> copy_causal_vars = causally_relevant_variables;
        for (int causal_var : copy_causal_vars) {
            for (int pattern_var : pattern) {
                if (causal_var == pattern_var) {
                    causally_relevant_variables.erase(causal_var);
                }
            }
        }

        return vector<int>(causally_relevant_variables.begin(), causally_relevant_variables.end());
    }

    Pattern enlarge_pattern(const Pattern &pattern, int variable) {
        Pattern larger_pattern(pattern);
        // Add variable to front for easier compressing (no sorting)
        larger_pattern.insert(larger_pattern.begin(), variable);
        return larger_pattern;
    }

    double compare_pdbs(shared_ptr <PatternDatabase> cur, shared_ptr <PatternDatabase> old) {
        int num_improvements = 0;

        const vector<int> cur_distances = cur->get_distances();
        const vector<int> old_distances = old->get_distances();

        size_t num_entries = cur_distances.size();
        for (size_t i = 0; i < num_entries; i++) {
            if (cur_distances[i] > old_distances[i] && cur_distances[i] < std::numeric_limits<int>::max()) {
                num_improvements++;
            }
        }
        return num_improvements / static_cast<double>(num_entries);
    }

    vector <Pattern> compute_candidates(const Pattern &pattern, const TaskProxy &task) {
        vector<int> missing_vars = missing_variables(task, pattern);
        vector <Pattern> candidates;
        for (int variable : missing_vars) {
            candidates.push_back(enlarge_pattern(pattern, variable));
        }
        return candidates;
    }

    int find_pdb_hillclimbing(
            Pattern original_pattern,
            shared_ptr <PatternDatabase> &current_pdb,
            const TaskProxy &task,
            const Options &opts,
            const int remaining_states,
            const std::vector<int> &operator_costs = std::vector<int>()
    ) {

        int state_budget = remaining_states;
        int used_budget = 0;
        //utils::g_log << "Original Pattern: " << original_pattern << endl;
        double min_improvement = opts.get<double>("min_impr_compression");
        int max_iterations = opts.get<int>("max_iterations");
        int iterations= 0;
        // Determine initial candidates
        vector <Pattern> candidates = compute_candidates(original_pattern, task);
        shared_ptr <PatternDatabase> best_pdb = current_pdb;
        double best_improvement_score = 0.0;
        double prev_improvement_score = 0.0;
        Pattern best_pattern = original_pattern;
        // Hillclimbing iterations
        while (iterations < max_iterations) {
            // Min as parameter
            double last_improvement_delta = best_improvement_score - prev_improvement_score;
            if (iterations != 0 && last_improvement_delta < min_improvement) {
                current_pdb = best_pdb;
                return used_budget;
            }
            for (Pattern candidate : candidates) {
                // Quit if the budget is used up
                if (used_budget >= state_budget) {
                    current_pdb = best_pdb;
                    return used_budget;
                }
                // Create enlargened PDB
                shared_ptr <PatternDatabase> larger_pdb = create_default_pdb(
                        task,
                        candidate,
                        false,
                        operator_costs);
                used_budget += larger_pdb->get_size();
                // Compress PDB
                // Find additional variables compared to original for compression
                vector<int> additional_vars;
                int length_difference = candidate.size() - original_pattern.size();
                for (int i = 0; i < length_difference; i++) {
                    additional_vars.push_back(candidate[i]);
                }
                // Calculate domain size of additional variables
                int dom_size = 1;
                for (int i : additional_vars) {
                    dom_size *= task.get_variables()[i].get_domain_size();
                }
                shared_ptr <PatternDatabase> compressed_pdb = min_compress(current_pdb, larger_pdb, dom_size);

                // Evaluate and update if best
                double improvement_score = compare_pdbs(compressed_pdb, current_pdb);
                if (improvement_score > best_improvement_score) {
                    prev_improvement_score = best_improvement_score;
                    best_improvement_score = improvement_score;
                    best_pdb = compressed_pdb;
                    best_pattern = candidate;
                    utils::g_log << "Compression - Improvement found: " << improvement_score << endl;
                }
            }
            // Prepare new candidates
            candidates = compute_candidates(best_pattern, task);

            iterations++;
        }
        current_pdb = best_pdb;
        return used_budget;
    }

    int find_pdb_randomwalk(
            Pattern original_pattern,
            shared_ptr <PatternDatabase> &current_pdb,
            const TaskProxy &task,
            const Options &opts,
            const int remaining_states,
            const std::vector<int> &operator_costs = std::vector<int>()
    ) {
        utils::g_log << "Starting Boosting Randomwalk" << endl;
        // Get starting parameters (size of original pattern, pattern and limit for the new size)
        int num_states = current_pdb->get_size();
        int states_budget = remaining_states;

        // Get the all causally relevant variables currently not in the pattern
        std::vector<int> missing_vars = missing_variables(task, original_pattern);
        // Choose one of those at random and check if the size would still be within the limits

        const VariablesProxy task_variables = task.get_variables();
        Pattern new_pattern = original_pattern;
        if (missing_vars.empty()) {
            utils::g_log << "There are no variables left to add, exiting random walk." << endl;
            return 0;
        }
        while (num_states < states_budget) {
            int rnd_num = rand() % missing_vars.size();
            int candidate = missing_vars[rnd_num];
            if (task_variables[candidate].get_domain_size() * num_states < states_budget) {
                // Add variable to pattern, update values and proceed
                num_states *= task_variables[candidate].get_domain_size();
                new_pattern = enlarge_pattern(new_pattern, candidate);
            }
            missing_vars.erase(missing_vars.begin() + rnd_num);
            if (missing_vars.empty()) {
                break;
            }
        }
        // Create PDB from largest pattern after randomly creating the larger pattern
        std::shared_ptr<PatternDatabase> larger_pdb = create_default_pdb(
                task,
                new_pattern,
                false,
                operator_costs);
        // Compress PDB
        // Find additional variables compared to original for compression
        vector<int> additional_vars;
        int length_difference = new_pattern.size() - original_pattern.size();
        for (int i = 0; i < length_difference; i++) {
            additional_vars.push_back(new_pattern[i]);
        }
        // Calculate domain size of additional variables
        int dom_size = 1;
        for (int i : additional_vars) {
            dom_size *= task.get_variables()[i].get_domain_size();
        }

        shared_ptr <PatternDatabase> compressed_pdb = min_compress(current_pdb, larger_pdb, dom_size);
        // Compare the two pdbs, if the compressed is better, return it
        if (compare_pdbs(compressed_pdb, current_pdb) > std::numeric_limits<double>::epsilon()) {
            utils::g_log << "Compressed PDB is better" << endl;
            current_pdb = compressed_pdb;
        } else {
            utils::g_log << "Original PDB is better" << endl;
        }
        return num_states;
    }


    void add_compression_parser_parameters(OptionParser &parser) {
        parser.add_option<bool>(
                "compress_pdbs",
                "using pattern database compression",
                "false");
        parser.add_option<int>(
                "max_iterations",
                "limit of iterations for hillclimbing",
                "3",
                Bounds("1", "infinity")
                );
        parser.add_option<double>(
                "min_impr_compression",
                "improvement threshold for compression hillclimbing",
                "0.05",
                Bounds("0.0", "1.0")
                );
        parser.add_option<int>(
                "max_size_compr",
                "maximal number of states per pattern database ",
                "2000000",
                Bounds("1", "infinity"));
        parser.add_option<int>(
                "max_size_compr_col",
                "maximal number of states per pattern collection ",
                "20000000",
                Bounds("1", "infinity"));
        vector<string> compression_algorithms;
        compression_algorithms.push_back("RANDOMWALK");
        compression_algorithms.push_back("HILLCLIMBING");
        parser.add_enum_option<CompressionAlgorithm>(
                "compr_algo",
                compression_algorithms,
                "compression algorithm",
                "RANDOMWALK");

    }

    int compute_best_pdb(Pattern original_pattern,
                                                  shared_ptr <PatternDatabase> &current_pdb,
                                                  const TaskProxy &task,
                                                  const Options &opts,
                                                  const int remaining_states_collection,
                                                  const std::vector<int> &operator_costs
                                                  ) {
        // Calculate state budget for boosting
        int max_states = min(remaining_states_collection, opts.get<int>("max_size_compr"));

        // Determine which boosting algorithm should be used
        CompressionAlgorithm compr_algo = opts.get<CompressionAlgorithm>("compr_algo");
        if (compr_algo == CompressionAlgorithm::HILLCLIMBING) {
            return find_pdb_hillclimbing(original_pattern, current_pdb, task, opts, max_states, operator_costs);
        } else if (compr_algo == CompressionAlgorithm::RANDOMWALK) {
            return find_pdb_randomwalk(original_pattern, current_pdb, task, opts, max_states, operator_costs);
        } else {
            utils::g_log << "Invalid compression algorithm provided. Aborting" << endl;
            abort();
        }
    }
}