企业如何通过vue小程序开发满足高效运营与合规性需求
671
2022-11-25
pandas基础(2)_多重索引
1:多重索引的构造
>>> #下面显示构造pd.MultiIndex
>>> df1=DataFrame(np.random.randint(0,150,size=(6,3)),columns=['java','html5','python'])
>>> import pandas as pd
>>> df1=DataFrame(np.random.randint(0,150,size=(6,3)),columns=['java','html5','python'],index=pd.MultiIndex.from_arrays([['张三','张三','侯少','侯少','a','a'],['M','E','M','E','M','E']]))
>>> df1#因为Python自身的原因,对汉字的识别不是太好,所以汉字被?代替了
java html5 python
-? M 2 13 76
E 141 67 84
M 116 83 8
E 70 118 125
a M 74 0 76
E 111 31 8
>>> #使用元组tuple创建
df2=DataFrame(np.random.randint(0,150,size=(6,3)),columns=['java','html','python'],index=pd.MultiIndex.from_tuples([('a','1'),('a','11'),('b','1'),('b','11'),('c','1'),('c','11')]))
>>> df2
java html python
a 1 32 144 99
11 104 101 16
b 1 93 98 41
11 59 30 45
c 1 91 17 149
11 9 28 59
>>> #使用product
df2=DataFrame(np.random.randint(0,150,size=(6,3)),columns=['java','html','python'],index=pd.MultiIndex.from_product([['zhangsan ','lisi','wangwu'],['mid','end']]))
>>> df2
java html python
zhangsan mid 50 128 54
end 3 4 91
lisi mid 4 93 110
end 116 123 122
wangwu mid 88 25 54
end 48 146 57
>>> #对dataFrame同样可以设置成多重索引
df2=DataFrame(np.random.randint(0,150,size=(3,6)),columns=pd.MultiIndex.from_product([['java','html','python'],['mid','end']]),index=['张三','李四','王五'])
>>> df2
java html python
mid end mid end mid end
-? 33 38 112 70 113 110
-? 29 46 132 91 117 128
-? 73 56 118 82 132 39
>>>
>>> df2['java','mid']#查询某一列
-? 33
-? 29
-? 73
Name: (java, mid), dtype: int32
>>> s['zhangsan':'lisi']#其实就是一个Series
Series([], dtype: int64)
>>> s.iloc[0:3]
a 0 1
1 2
b 0 3
dtype: int64
>>> #切片
>>> df2['张三':'王五']
java html python
mid end mid end mid end
-? 33 38 112 70 113 110
-? 29 46 132 91 117 128
-? 73 56 118 82 132 39
>>>df2.iloc[0:4]#推荐使用
Df2[‘张三’,‘期中’]和df2.loc[‘张三’].loc[‘期中’]
#如何一级索引有多个,对二级索引会遇到问题,也就是说,无法直接对二级进行索引
必须把二级索引变成一级索引才可以进行索引
>>> df2.stack()
html java python
-? end 70 38 110
mid 112 33 113
end 91 46 128
mid 132 29 117
end 82 56 39
mid 118 73 132
>>> #stack =堆----》行
end mid
-? html 70 112
java 38 33
python 110 113
html 91 132
java 46 29
python 128 117
html 82 118
java 56 73
python 39 132
>>> #默认为-1
2:多重索引的计算
>>> df2
java html python
mid end mid end mid end
-? 33 38 112 70 113 110
-? 29 46 132 91 117 128
-? 73 56 118 82 132 39
>>> df1.sum()
java 514
html5 312
python 377
dtype: int64
>>> df1.sum(axis=0)
java 514
html5 312
python 377
dtype: int64
>>> df1.sum(axis=1)#对列
-? M 91
E 292
M 207
E 313
a M 150
E 150
dtype: int64
>>> df1.sum(axis=1)#对列求和,得到每行的和
-? M 91
E 292
M 207
E 313
a M 150
E 150
dtype: int64
>>> df1.std
-? M 2 13 76 E 141 67 84 M 116 83 8 E 70 118 125 a M 74 0 76 E 111 31 8> >>> #求方差 >>> df1.std(axis=1) -? M 39.929104 E 38.759945 M 55.344376 E 29.938827 a M 43.312816 E 54.064776 dtype: float64 >>> df1.max() java 141 html5 118 python 125 dtype: int32 3多重索引的拼接 >>> nd = np.random.randint(0,10,size=(3,3)) >>> nd array([[9, 9, 4], [7, 2, 4], [1, 6, 1]]) >>> np.concatenate ((nd,nd),axis=0)#在列方向就行拼接 array([[9, 9, 4], [7, 2, 4], [1, 6, 1], [9, 9, 4], [7, 2, 4], [1, 6, 1]]) >>> np.concatenate ([nd,nd],axis=1)#在行方向进行拼接 array([[9, 9, 4, 9, 9, 4], [7, 2, 4, 7, 2, 4], [1, 6, 1, 1, 6, 1]]) >>> def make_df(cols,inds): data = {c:[c+str(i) for i in cols]for c in cols} return DataFrame(data,index=inds,columns=cols) >>> make_df(['A','B'],[1,2]) A B 1 AA BA 2 AB BB >>> df1=make_df(list('AB'),[0,1]) >>> df2=make_df(list('AB'),[2,3]) >>> pd.concat ([df1,df2])#默认在列方向进行拼接 A B 0 AA BA 1 AB BB 2 AA BA 3 AB BB >>> #优先增加行数 >>> pd.concat ((df1,df2),axis=1) A B A B 0 AA BA NaN NaN 1 AB BB NaN NaN 2 NaN NaN AA BA 3 NaN NaN AB BB >>> #注意index在级联时可以重复 3) >>> #列名可以相同但是不建议 >>> df3= make_df(list('AB'),[0,1]) >>> df4=make_df(list('VB'),[1,2]) >>> pd.concat((df3,df4))#只能传入一个参数 A B V 0 AA BA NaN 1 AB BB NaN 1 NaN BV VV 2 NaN BB VB >>> #3种连接方式 >>> #1:外连接:补NaN(默认模式) >>> df1= make_df(list('AB'),[1,3]) >>> df2= make_df(list('AB'),[2,4]) >>> df2= make_df(list('BC'),[2,4]) >>> pd.concat ([df1,df2],join='inner')#连接都有的部分 B 1 BA 3 BB 2 BB 4 BC >>> pd.concat ([df1,df2],join='outer') A B C 1 AA BA NaN 3 AB BB NaN 2 NaN BB CB 4 NaN BC CC >>> #内连接只连接匹配项 >>> #3:连接指定轴 join_axes所以CDF的F便不显示了 >>> df3= make_df(list('ACD'),[0,1,2]) >>> df4= make_df(list('CDF'),[3,4,5]) >>> pd.concat([df3,df4],join_axes=[df3.columns]) A C D 0 AA CA DA 1 AC CC DC 2 AD CD DD 3 NaN CC DC 4 NaN CD DD 5 NaN CF DF >>> #join_axes 某一个DataFrame列索引为新的列索引值 >>> #3使用append()函数添加 >>> #concat方法属于pandas >>> #append()在后面添加 >>> #concat([df1,df2]) >>> #df1.append(df2) >>> #merge与concat的区别是,merge需要依据某一共同的行或列来进行合并 >>> #使用pd.merge()合并时,会自动根据两者相同column名称的那一属性,作为key来进行合并,注意每一列的顺序不要求一致 >>> #一对一合并 >>> df1 = DataFrame({'employee':['po','sara','danis'],'group':['sail','counting','marcketing']}) >>> df2 = DataFrame({'employee':['po','sara','danis'],'work_time':[2,3,1]}) >>> df1 employee group 0 po sail 1 sara counting 2 danis marcketing >>> df2 employee work_time 0 po 2 1 sara 3 2 danis 1 >>> pd.merge (df1,df2) employee group work_time 0 po sail 2 1 sara counting 3 2 danis marcketing 1 >>> pd.concat([df1,df2]) employee group work_time 0 po sail NaN 1 sara counting NaN 2 danis marcketing NaN 0 po NaN 2.0 1 sara NaN 3.0 2 danis NaN 1.0 >>> df3 = DataFrame({'employee':['po','sara','liulei'],'work_time':[2,3,1]}) >>> pd.merge(df1,df3) employee group work_time 0 po sail 2 1 sara counting 3 >>> #merge只合并相同属性里面都有的项 >>> #下面是merge的多对一的合并 >>> df1 = DataFrame({'employee':['po','sara','danis'],'work_time':[2,3,1]}) >>> df2 = DataFrame({'employee':['po','po','danis'],'group':['sail','counting','marcketing']}) >>> pd.merge(df1,df2) employee work_time group 0 po 2 sail 1 po 2 counting 2 danis 1 marcketing >>> #出现了两个po >>> #下面是多对多的合并 >>> df1 = DataFrame({'employee':['po','sara','danis'],'group':['sail','counting','marcketing']}) >>> df1 = DataFrame({'employee':['po','po','danis'],'group':['sail','counting','marcketing']}) >>> df2 = DataFrame({'employee':['po','po','danis'],'work_time':[2,3,1]}) >>> pd.merge(df1,df2) employee group work_time 0 po sail 2 1 po sail 3 2 po counting 2 3 po counting 3 4 danis marcketing 1 >>> #1*2*2的模式 >>> #使用merge多对多可以来处理重名等数据的情况 >>> df3= DataFrame({'employee':['po','Summer','Flower'],'group':['sail','marking','serch'],'WorkTime':[1,2,3]}) >>> df4= DataFrame({'employee':['po','Summer','Flower'],'group':['sail','marking','serch'],'salary':[12000,20000,10002]}) >>> df3 WorkTime employee group 0 1 po sail 1 2 Summer marking 2 3 Flower serch >>> df4 employee group salary 0 po sail 12000 1 Summer marking 20000 2 Flower serch 10002 >>> pd.merge(df3,df4) WorkTime employee group salary 0 1 po sail 12000 1 2 Summer marking 20000 2 3 Flower serch 10002 >>> df3= DataFrame({'employee':['po','Winter','Flower'],'group':['marketing','marking','serch'],'WorkTime':[1,2,3]}) >>> pd.merge(df3,df4) WorkTime employee group salary 0 3 Flower serch 10002 >>> pd.merge(df3,df4,on='employee') WorkTime employee group_x group_y salary 0 1 po marketing sail 12000 1 3 Flower serch serch 10002 >>> #出现两行数据的原因是指定了employee相同就可以合并 >> pd.merge(df3,df4,on='group') WorkTime employee_x group employee_y salary 0 2 Winter marking Summer 20000 1 3 Flower serch Flower 10002 >>> pd.merge(df3,df4,on='group',suffixes=['_A','_B']) WorkTime employee_A group employee_B salary 0 2 Winter marking Summer 20000 1 3 Flower serch Flower 10002 >>> df3= DataFrame({'employee':['po','Winter','Flower'],'group':['marketing','marking','serch'],'WorkTime':[1,2,3]}) >>> df4= DataFrame({'employer':['po','Summer','Flower'],'group':['sail','marking','serch'],'salary':[12000,20000,10002]}) >>> pd.merge(df3,df4) WorkTime employee group employer salary 0 2 Winter marking Summer 20000 1 3 Flower serch Flower 10002 >>> pd.merge(df3,df4,left_on='employee',right_on='employer') WorkTime employee group_x employer group_y salary 0 1 po marketing po sail 12000 1 3 Flower serch Flower serch 10002 >>> #df3主键key为employee和df4主键为employer,两者不同但又想相互匹配时,可以指定前者的left_on为employee和后者的right_on为employer这时两者可以进行匹配 >>> #内合并与外合并 >>> #内合并只保留两者都有的数据 >>> df1=DataFrame({'age':[18,22,33],'height':[175,169,180]}) >>> df1=DataFrame({'age':[18,23,32],'height':[175,169,180]}) >>> df2=DataFrame({'age':[18,22,33],'weight':[175,169,180]}) >>> pd.merge(df1,df2) age height weight 0 18 175 175 >>> pd.merge(df1,df2,how='outer') age height weight 0 18 175.0 175.0 1 23 169.0 NaN 2 32 180.0 NaN 3 22 NaN 169.0 4 33 NaN 180.0 >>> #默认为内合并,通过how可以指定合并类型 >>> >>> pd.merge(df1,df2,how='left') age height weight 0 18 175 175.0 1 23 169 NaN 2 32 180 NaN >>> pd.merge(df1,df2,how='right') age height weight 0 18 175.0 175 1 22 NaN 169 2 33 NaN 180 >>> #left保留前者的数据,right保留后者数据 >>> #left保留前者df1的数据,right保留后者df2数据 >>> #下面是列冲突 >>> df3= DataFrame({'employee':['po','Winter','Flower'],'group':['marketing','marking','serch'],'WorkTime':[1,2,3]}) >>> df4= DataFrame({'employee':['po','Summer','Flower'],'group':['sail','marking','serch'],'salary':[12000,20000,10002]}) >>> pd.merge(df3,df4) WorkTime employee group salary 0 3 Flower serch 10002 >>> pd.merge(df3,df4,on='employee',suffixes=['_李','_王']) WorkTime employee group_?? group_?? salary 0 1 po marketing sail 12000 1 3 Flower serch serch 10002 >>> #因为两者的employee和group相同,当指定employee为主键时,suffixes修改的就是group 4:总结: 多重索引也是pandas里非常重要的知识点,要牢牢掌握 作者:你的雷哥
版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。
发表评论
暂时没有评论,来抢沙发吧~