ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

pandas入门:层次化索引

2021-02-15 08:32:12  阅读:215  来源: 互联网

标签:层次化 index frame two 索引 Green print Ohio pandas


from pandas import Series
import numpy as np

data = Series(np.random.randn(10),
              index=[['a','a','a','b','b','b','c','c','d','d'],
                     [1,2,3,1,2,3,1,2,2,3]])
print(data)
'''
a  1    0.050239
   2    1.886958
   3   -1.366131
b  1    1.678755
   2    0.029100
   3   -1.121555
c  1    1.732161
   2    0.401984
d  2    1.368133
   3   -0.631580
dtype: float64
'''
print(data.index)
'''
MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 2),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )
'''
print(data['b'])
'''
1    1.678755
2    0.029100
3   -1.121555
dtype: float64
'''
print(data['b':'c'])
'''
b  1    1.678755
   2    0.029100
   3   -1.121555
c  1    1.732161
   2    0.401984
dtype: float64
'''
print(data.loc[['b','d']])
'''
b  1    1.678755
   2    0.029100
   3   -1.121555
d  2    1.368133
   3   -0.631580
dtype: float64
'''
print(data[:,2])
'''
a    1.886958
b    0.029100
c    0.401984
d    1.368133
dtype: float64
'''
print(data.unstack())
'''
          1         2         3
a  0.050239  1.886958 -1.366131
b  1.678755  0.029100 -1.121555
c  1.732161  0.401984       NaN
d       NaN  1.368133 -0.631580
'''
print(data.unstack().stack())
'''
a  1    0.050239
   2    1.886958
   3   -1.366131
b  1    1.678755
   2    0.029100
   3   -1.121555
c  1    1.732161
   2    0.401984
d  2    1.368133
   3   -0.631580
dtype: float64
'''
from pandas import Series, DataFrame, MultiIndex
import pandas as pd
import numpy as np

frame = DataFrame(np.arange(12).reshape((4,3)),
                  index=[['a','a','b','b'],[1,2,1,2]],
                  columns=[['Ohio','Ohio','Colorado'],
                           ['Green','Red','Green']])
print(frame)
'''
     Ohio     Colorado
    Green Red    Green
a 1     0   1        2
  2     3   4        5
b 1     6   7        8
  2     9  10       11
'''
frame.index.names = ['key1','key2']
frame.columns.names = ['state','color']
print(frame)
'''
state      Ohio     Colorado
color     Green Red    Green
key1 key2                   
a    1        0   1        2
     2        3   4        5
b    1        6   7        8
     2        9  10       11
'''
print(frame['Ohio'])
'''
color      Green  Red
key1 key2            
a    1         0    1
     2         3    4
b    1         6    7
     2         9   10
'''

mIndex = MultiIndex.from_arrays([['Ohio','Ohio','Colorado'],['Green','Red','Green']],
                       names=['state','color'])
frame2 = DataFrame(np.arange(12).reshape((4,3)),
                   columns=mIndex)
print(frame2)
'''
state  Ohio     Colorado
color Green Red    Green
0         0   1        2
1         3   4        5
2         6   7        8
3         9  10       11
'''
# 重排分级顺序
frame3 = frame.swaplevel('key1','key2')
print(frame3)
'''
state      Ohio     Colorado
color     Green Red    Green
key2 key1                   
1    a        0   1        2
2    a        3   4        5
1    b        6   7        8
2    b        9  10       11
'''
frame4 = frame.swaplevel(0,1).sort_index(0)
print(frame4)
'''
state      Ohio     Colorado
color     Green Red    Green
key2 key1                   
1    a        0   1        2
     b        6   7        8
2    a        3   4        5
     b        9  10       11
'''
# 根据级别汇总统计
frame5 = frame.sum(level='key2')
print(frame5)
'''
state  Ohio     Colorado
color Green Red    Green
key2                    
1         6   8       10
2        12  14       16
'''
frame6 = frame.sum(level='color',axis=1)
print(frame6)
'''
color      Green  Red
key1 key2            
a    1         2    1
     2         8    4
b    1        14    7
     2        20   10
'''
from pandas import DataFrame

frame = DataFrame({'a':range(7),
                   'b':range(7,0,-1),
                   'c':['one','one','one','two','two','two','two'],
                   'd':[0,1,2,0,1,2,3]})
print(frame)
'''
   a  b    c  d
0  0  7  one  0
1  1  6  one  1
2  2  5  one  2
3  3  4  two  0
4  4  3  two  1
5  5  2  two  2
6  6  1  two  3
'''
# set_index 会将其一个或多个列转换为行索引,并创建一个新的DataFrame
frame2 = frame.set_index(['c','d'])
print(frame2)
'''
       a  b
c   d      
one 0  0  7
    1  1  6
    2  2  5
two 0  3  4
    1  4  3
    2  5  2
    3  6  1
'''
# 默认情况下那些列会从DataFrame中移除,也可将其保留下来
frame3 = frame.set_index(['c','d'],drop=False)
print(frame3)
'''
       a  b    c  d
c   d              
one 0  0  7  one  0
    1  1  6  one  1
    2  2  5  one  2
two 0  3  4  two  0
    1  4  3  two  1
    2  5  2  two  2
    3  6  1  two  3
'''
# reset_index与set_index相反,层次化索引会转移到列里面
frame4 = frame2.reset_index()
print(frame4)
'''
     c  d  a  b
0  one  0  0  7
1  one  1  1  6
2  one  2  2  5
3  two  0  3  4
4  two  1  4  3
5  two  2  5  2
6  two  3  6  1
'''

标签:层次化,index,frame,two,索引,Green,print,Ohio,pandas
来源: https://www.cnblogs.com/nicole-zhang/p/12955106.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有