nnperm/table_real_data.py at master · randommm/nnperm · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#----------------------------------------------------------------------
# Copyright 2018 Marco Inacio <pythonpackages@marcoinacio.com>
#
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, version 3 of the License.

#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.    See the
#GNU General Public License for more details.

#You should have received a copy of the GNU General Public License
#along with this program.    If not, see <http://www.gnu.org/licenses/>.
#----------------------------------------------------------------------

import numpy as np
import pandas as pd
import itertools
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from db_structure_real_data import ResultRealData, db
import pickle
import collections

cls = ["-", ":", "-.", "--"]
clw = [1.0, 2.0, 1.5, 3.0, 0.5, 4.0]
clws = list(itertools.product(clw, cls))

for include_extra in range(3):
    df = pd.DataFrame(list(ResultRealData
        .select()
        .where(ResultRealData.include_extra==include_extra)
        .dicts()
    ))

    def renamer(x):
        label, retrain_permutations = x
        if label == 'permutation':
            label = 'COINP'
        if label == 'shuffle_once':
            label = 'SCPI'
        if label == 'cpi':
            label = 'CPI'
        if (not retrain_permutations) and label not in ["remove",'CPI']:
            label = "Approx " + label
        return label

    nc = df[['method', 'retrain_permutations']].apply(renamer, axis=1)
    df[['method']] = nc

    df['feature_tested'] = df['feature_tested'].apply(pickle.loads)
    ext = lambda x: (x[0]
        if isinstance(x, collections.abc.Sequence)
        else x)
    df['feature_tested'] = df['feature_tested'].apply(ext)

    df['estimator_method'] = df[['estimator', 'method']].apply(
        lambda x: (x[0], x[1]),
        axis=1
        )

    datap = dict()
    for estimator_method in np.sort(np.unique(df['estimator_method'])):
        valsp = []
        for feature_tested in np.unique(df['feature_tested']):
            try:
                valp = df[df['estimator_method'] == estimator_method]
                valp = valp[valp['feature_tested'] == feature_tested]
                valp = valp.pvalue.iloc[[0]].item()
            except (ValueError, IndexError):
                valp = np.nan

            valsp.append("{0:.2f}".format(valp))

        datap[estimator_method] = valsp


    dfp = pd.DataFrame.from_dict(datap)
    #dfp.index = [str(x) for x in range(1, 10)]
    columns = ['carat', 'depth', 'table', 'x', 'y', 'z']
    columns.extend(['cut', 'color', 'clarity'])
    if include_extra == 1:
        columns.append('c+n')
    if include_extra == 2:
        columns.append('c+c+n')
    dfp = dfp.transpose()
    dfp.columns = columns

    print(dfp)
    print(dfp.to_latex(multirow=True))