"""
general doc
"""

import os.path
from analysis_wp_util import *

"""
Interesting code for search analysis: 
gp_by_clumne_name = df.groupby(By=['columne_name'])
gp_by_clumne_name.mean()
gp_by_clumne_name.count()

horizontal bars (above "Visualising Your Data") could be interesting for showing multiple attributes 
above each other for multiple problems
"""

""" Save structure notes
"do_evaluation",
 "opt"             "3h"
 "partial_opt"     "8h"
 "sat",            "3h"        properties-sat-3h-t_both
          "search"        properties-sat-3h-t_both-s_both
"""

path = os.path.join("/home", "sev", "MT1", "Code", "experiments", "severin-invariant-comparison", "data")
trans_path = os.path.join(path, "eval_rin08_strips-20_8_28-cluster-eval")
# propeties_path = os.path.join(trans_path, "properties")
propeties_path = os.path.join(path, "do_evaluation", "opt", "3h", "properties-opt-3h-t_both")

# json_mod = reindex_by_hand(propeties_path)

df = pd.read_json(propeties_path, dtype=float)

renamed = rename_columns(df, {"helm": ["helmert", ""], "rin": ["rin08", ""]}, introduce_new_lvl=True)

attribute = 'translator_invariant_number_of_found_mutexes'
x_name = "helmert"
y_name = "rin08"
min_wins_text = "lower"

# scatterplot
nice_df = renamed
# do_scatter_plot(nice_df, x_name, y_name, attribute, min_wins_text)

# dataframe processing
total_test_cost_estimation = nice_df["translator_invariant_number_of_candidates_considered"] / nice_df[
    "translator_invariant_number_of_iterations"] * nice_df["translator_invariant_number_of_test_of_candidates"]
nice_df["total_test_cost_estimation"] = total_test_cost_estimation
logarithmicised = nice_df.replace(0, 0.001)

# trying to find perfect dependency for time_finding_invariants
# logarithmicised = logarithmicised["rin08"]
nice_df["test_by_time"] = logarithmicised["translator_invariant_number_of_test_of_candidates"] / logarithmicised[
    "translator_time_finding_invariants"]
mean = nice_df["test_by_time"].mean()
nice_df["test_by_time_dif"] = nice_df["test_by_time"] - mean
a = nice_df[["test_by_time_dif", "translator_invariant_number_of_test_of_candidates",
             "translator_time_finding_invariants"]].where(
    nice_df["test_by_time_dif"].gt(3000))  # and nice_df["translator_time_finding_invariants"] > 0.07

# some plots with x,y different attributes
# important: 0
reg_plots = [{"x_name": "translator_time_finding_invariants",
              "y_name": "translator_invariant_number_of_test_of_candidates", "outfile_name": 'plot-time-vs-tests',"tex":"True"},
             {"x_name": "translator_time_finding_invariants",
              "y_name": "total_test_cost_estimation", "outfile_name": 'plot-time-complexity-estimation'}
             ]
for reg_plot in reg_plots:
    do_regplot_from_dict(reg_plot, logarithmicised)

# pairplots
nice_df = nice_df.apply(pd.to_numeric, errors='ignore')
smth = nice_df[['translator_invariant_number_of_predicates', 'translator_invariant_number_of_found_invariants',
                'translator_invariant_number_of_grounded_actions', 'translator_invariant_number_of_objects', 'algorithm'
                ]]
smth = smth.apply(pd.to_numeric, errors='ignore')
smth['algorithm'] = smth['algorithm'].apply(lambda x: 0.0 if 'helmert' in x else 1.0)
# sns.pairplot(data=smth, hue='algorithm', corner=True)  # can take ages with many columns selected
plt.show()
