penalty python实现

原创
2017/04/03 00:01
阅读数 215

用了1个星期 终于算把 penalty 在python里实现了下面是代码

 

# encoding: utf-8

import pandas as pd
import numpy as np
from pandas import DataFrame as df

data=pd.read_csv('wdata.csv',header=None,names=['ID','Leg','A0','A1','A1a','A1b','A2a','A2b','A3','A4','A5','A6','A7','A8','A9','A10','A11','A12','A13','A14','A15','A16','A17_1a','A17_1b','A17_2a','A17_2b','A17_4a','A17_4b','A17_5a','A17_5b','A17_6','A20','A21','A22','A23','A24a','A25','A26','A27','A28','A29','A30','A31','A32','A33','A34','A35','A35b','A35c','A36','A37','A38','A39','A40','A41','A42','A43','A44','A45','A46'])

de_data=['A5','A7','A9','A11']


"""
def def_jar(de_data):
    data_list=data.ix[:,de_data]
    for var_i in de_data:
        for var_id in range(0,data_list.shape[0]+1):
                if   data_list.ix[var_id:var_id,var_i] is 5:
                     data_list.ix[var_id:var_id,'new_%s'%var_i]='lev1'
                elif data_list.ix[var_id:var_id,var_i] is 4:
                     data_list.ix[var_id:var_id,'new_%s'%var_i]='lev2'
                else :
                     data_list.ix[var_id:var_id,'new_%s'%var_i]='lev3'
    print(data_list.head())
"""

def def_var(de_data):
    
    for i in de_data:
        
        new_data=[] #初始化中转列表   初始化和清空必须要相同缩进
        for each_line in data[i]:
            if each_line ==3:
                new_data.append('JAR')
            elif each_line >3:
                new_data.append('Too Much')
            else:
                new_data.append('Not Enough')
        data['N_%s'%i]=new_data
        new_data=new_data.clear    #清空中转列表
 

 
        
#def_var(de_data)

def_varlist=[]
def def_var2(de_data):
    for i in de_data:      
            data['N_%s'%i]=np.where(data[i]==3,'JAR',np.where(data[i]>3,'much','not'))
            def_varlist.append('N_%s'%i)

def_var2(de_data)

print(data.head())


#        table_mean2=pd.DataFrame(data.pivot_table('A10',columns='N_A5',aggfunc='mean'),columns='A10_A5')

de_data2=def_varlist

#  计算A10的三个组别的mean  dataframe
def creat_meantable(inde_var,de_data2):
    table_mean=pd.DataFrame()
    for var in de_data2:  
        temp=pd.DataFrame(data.pivot_table(inde_var,columns=var,aggfunc='mean')).T
        table_mean=table_mean.append(temp,ignore_index=True)
    table_mean.index=de_data2
    return table_mean

def creat_penc(inde_var,de_data2):
    table_penc=pd.DataFrame()
    for var in de_data2:
        temp=pd.DataFrame(data[var].value_counts(normalize=True)).T
        table_penc=table_penc.append(temp,ignore_index=True)
    table_penc.index=de_data2
    return table_penc 

table_meanall=creat_meantable('A10',de_data2)
table_penall=creat_penc('A10',de_data2)


table_penall['m_drop']=table_meanall['much']-table_meanall['JAR']

table_penall['n_drop']=table_meanall['not']-table_meanall['JAR']


table_penall['m_penalty']=-table_penall['m_drop']*table_penall['much']
table_penall['n_penalty']=table_penall['n_drop']*table_penall['not']

print(table_penall[['m_penalty','n_penalty']])

展开阅读全文
打赏
0
0 收藏
分享
加载中
更多评论
打赏
0 评论
0 收藏
0
分享
返回顶部
顶部