Pandas 기초

import pandas as pd

multiple index 생성

data = pd.Series([10, 20, 30, 40, 15, 25, 35, 25],
                index = [['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b'], ['obj1', 'obj2', 'obj3', 'obj4', 'obj1', 'obj2', 'obj3', 'obj4']])
data
a  obj1    10
   obj2    20
   obj3    30
   obj4    40
b  obj1    15
   obj2    25
   obj3    35
   obj4    25
dtype: int64
data.index
MultiIndex([('a', 'obj1'),
            ('a', 'obj2'),
            ('a', 'obj3'),
            ('a', 'obj4'),
            ('b', 'obj1'),
            ('b', 'obj2'),
            ('b', 'obj3'),
            ('b', 'obj4')],
           )

Partial indexing

data['b']
obj1    15
obj2    25
obj3    35
obj4    25
dtype: int64
data[:, 'obj2']
a    20
b    25
dtype: int64

데이터 unstack

data.unstack(0)
a b
obj1 10 15
obj2 20 25
obj3 30 35
obj4 40 25
data.unstack(1)
obj1 obj2 obj3 obj4
a 10 20 30 40
b 15 25 35 25
d = data.unstack()
d
obj1 obj2 obj3 obj4
a 10 20 30 40
b 15 25 35 25
d.stack()
a  obj1    10
   obj2    20
   obj3    30
   obj4    40
b  obj1    15
   obj2    25
   obj3    35
   obj4    25
dtype: int64

column indexing

import numpy as np

df = pd.DataFrame(np.arange(12).reshape(4, 3),
                 index = [['a', 'a', 'b', 'b'], ['one', 'two', 'three', 'four']],
                 columns = [['num1', 'num2', 'num3'], ['red', 'green', 'red']]
                 )
df
num1 num2 num3
red green red
a one 0 1 2
two 3 4 5
b three 6 7 8
four 9 10 11
df.index
MultiIndex([('a',   'one'),
            ('a',   'two'),
            ('b', 'three'),
            ('b',  'four')],
           )
df.columns
MultiIndex([('num1',   'red'),
            ('num2', 'green'),
            ('num3',   'red')],
           )
df.index.names=['key1', 'key2']
df.columns.names=['n', 'color']
df
n num1 num2 num3
color red green red
key1 key2
a one 0 1 2
two 3 4 5
b three 6 7 8
four 9 10 11
df['num1']
color red
key1 key2
a one 0
two 3
b three 6
four 9
df.ix['a']
---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

<ipython-input-48-470835279a79> in <module>
----> 1 df.ix['a']


~/opt/anaconda3/envs/playdata/lib/python3.9/site-packages/pandas/core/generic.py in __getattr__(self, name)
   5463             if self._info_axis._can_hold_identifiers_and_holds_name(name):
   5464                 return self[name]
-> 5465             return object.__getattribute__(self, name)
   5466 
   5467     def __setattr__(self, name: str, value) -> None:


AttributeError: 'DataFrame' object has no attribute 'ix'

swaplevel

df.swaplevel('key1', 'key2')
n num1 num2 num3
color red green red
key2 key1
one a 0 1 2
two a 3 4 5
three b 6 7 8
four b 9 10 11
df.sort_index(level='key2')
n num1 num2 num3
color red green red
key1 key2
b four 9 10 11
a one 0 1 2
b three 6 7 8
a two 3 4 5

level을 사용하여 상태 요약

df.sum(level = 'key1')
n num1 num2 num3
color red green red
key1
a 3 5 7
b 15 17 19
df.sum(level='color', axis=1)
color red green
key1 key2
a one 2 1
two 8 4
b three 14 7
four 20 10

File operations

파일 읽기

import pandas as pd
df = pd.read_csv('./pythondsp-pandasguide-b936c3b43406/data/readEx/ex1.csv',
                           index_col=None, encoding='utf-8')
df
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
df = pd.read_table('./pythondsp-pandasguide-b936c3b43406/data/readEx/ex1.csv',
                           sep=',', index_col=None, encoding='utf-8')
df
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
  • header
df = pd.read_csv('./pythondsp-pandasguide-b936c3b43406/data/readEx/ex2.csv',
                           index_col=None, encoding='utf-8', header=None)
df
0 1 2 3 4
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
df = pd.read_csv('./pythondsp-pandasguide-b936c3b43406/data/readEx/ex2.csv',
                           index_col=None, encoding='utf-8', names=['a', 'b', 'c', 'd', 'message'])
df
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo
df = pd.read_csv('./pythondsp-pandasguide-b936c3b43406/data/readEx/ex2.csv',
                encoding='utf-8',  names=['a', 'b', 'c', 'd', 'message'],
                index_col='message')
df
a b c d
message
hello 1 2 3 4
world 5 6 7 8
foo 9 10 11 12
pd.read_csv('./pythondsp-pandasguide-b936c3b43406/data/readEx/csv_mindex.csv',
                           index_col=['key1', 'key2'], encoding='utf-8')
value1 value2
key1 key2
one a 1 2
b 3 4
c 5 6
d 7 8
two a 9 10
b 11 12
c 13 14
d 15 16
  • skiprows
d = pd.read_csv('./pythondsp-pandasguide-b936c3b43406/data/readEx/ex4.csv',
                           index_col=None, encoding='utf-8')
d
# hey!
a b c d message
# just wanted to make things more difficult for you NaN NaN NaN NaN
# who reads CSV files with computers anyway? NaN NaN NaN
1 2 3 4 hello
5 6 7 8 world
9 10 11 12 foo
d = pd.read_csv('./pythondsp-pandasguide-b936c3b43406/data/readEx/ex4.csv',
                           index_col=None, encoding='utf-8', skiprows=[0, 2, 3])
d
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo

파일에 데이터 쓰기

d.to_csv('d_out.csv')
d.to_csv('d_out2.csv', header=False, index=False)
d1 = pd.read_csv('./pythondsp-pandasguide-b936c3b43406/data/readEx/d_out.csv',
                           index_col=None, encoding='utf-8')
d1
Unnamed: 0 a b c d message
0 0 1 2 3 4 hello
1 1 5 6 7 8 world
2 2 9 10 11 12 foo
d2 = pd.read_csv('./pythondsp-pandasguide-b936c3b43406/data/readEx/d_out2.csv',
                           index_col=None, encoding='utf-8')
d2
0 1 2 3 4 hello
0 1 5 6 7 8 world
1 2 9 10 11 12 foo

Merge

df1 = pd.DataFrame({'key' : ['b', 'b', 'a', 'c', 'a', 'a', 'b'],
                   'data1' : range(7)})
df1
key data1
0 b 0
1 b 1
2 a 2
3 c 3
4 a 4
5 a 5
6 b 6
df2 = pd.DataFrame({'key' : ['a', 'b', 'd', 'b'],
                   'data2' : range(4)})
df2
key data2
0 a 0
1 b 1
2 d 2
3 b 3
pd.merge(df1, df2)
key data1 data2
0 b 0 1
1 b 0 3
2 b 1 1
3 b 1 3
4 b 6 1
5 b 6 3
6 a 2 0
7 a 4 0
8 a 5 0
df1 = pd.DataFrame({'key1' : ['b', 'b', 'a', 'c', 'a', 'a', 'b'],
                   'data1' : range(7)})
df2 = pd.DataFrame({'key2' : ['a', 'b', 'd', 'b'],
                   'data2' : range(4)})
pd.merge(df1, df2, left_on='key1', right_on='key2')
key1 data1 key2 data2
0 b 0 b 1
1 b 0 b 3
2 b 1 b 1
3 b 1 b 3
4 b 6 b 1
5 b 6 b 3
6 a 2 a 0
7 a 4 a 0
8 a 5 a 0

inner / outer join

pd.merge(df1, df2, left_on='key1', right_on='key2', how='left')
key1 data1 key2 data2
0 b 0 b 1.0
1 b 0 b 3.0
2 b 1 b 1.0
3 b 1 b 3.0
4 a 2 a 0.0
5 c 3 NaN NaN
6 a 4 a 0.0
7 a 5 a 0.0
8 b 6 b 1.0
9 b 6 b 3.0
pd.merge(df1, df2, left_on='key1', right_on='key2', how='right')
key1 data1 key2 data2
0 a 2.0 a 0
1 a 4.0 a 0
2 a 5.0 a 0
3 b 0.0 b 1
4 b 1.0 b 1
5 b 6.0 b 1
6 NaN NaN d 2
7 b 0.0 b 3
8 b 1.0 b 3
9 b 6.0 b 3
pd.merge(df1, df2, left_on='key1', right_on='key2', how='outer')
key1 data1 key2 data2
0 b 0.0 b 1.0
1 b 0.0 b 3.0
2 b 1.0 b 1.0
3 b 1.0 b 3.0
4 b 6.0 b 1.0
5 b 6.0 b 3.0
6 a 2.0 a 0.0
7 a 4.0 a 0.0
8 a 5.0 a 0.0
9 c 3.0 NaN NaN
10 NaN NaN d 2.0

Concatenating data

s1 = pd.Series([0, 1], index=['a', 'b'])
s2 = pd.Series([2, 1, 3], index=['c', 'd', 'e'])
s3 = pd.Series([4, 7], index=['a', 'e'])
s1
a    0
b    1
dtype: int64
s2
c    2
d    1
e    3
dtype: int64
s3
a    4
e    7
dtype: int64
pd.concat([s1, s2])
a    0
b    1
c    2
d    1
e    3
dtype: int64
pd.concat([s1, s2], axis=1)
0 1
a 0.0 NaN
b 1.0 NaN
c NaN 2.0
d NaN 1.0
e NaN 3.0
pd.concat([s1, s2, s3], keys=['one', 'two', 'three'])
one    a    0
       b    1
two    c    2
       d    1
       e    3
three  a    4
       e    7
dtype: int64
pd.concat([df1, df2], join='inner', axis=1, keys=['one', 'two'])
one two
key1 data1 key2 data2
0 b 0 a 0
1 b 1 b 1
2 a 2 d 2
3 c 3 b 3
pd.concat({'level1':df1, 'level2':df2}, axis=1, join='inner')
level1 level2
key1 data1 key2 data2
0 b 0 a 0
1 b 1 b 1
2 a 2 d 2
3 c 3 b 3

데이터 변환

중복항목 제거

df = pd.DataFrame({'k1':['one']*3+['two']*4,
                  'k2':[1,1,2,3,3,4,4]})
df
k1 k2
0 one 1
1 one 1
2 one 2
3 two 3
4 two 3
5 two 4
6 two 4
df.duplicated()
0    False
1     True
2    False
3    False
4     True
5    False
6     True
dtype: bool
df.drop_duplicates()
k1 k2
0 one 1
2 one 2
3 two 3
5 two 4
df.drop_duplicates(keep='last')
k1 k2
1 one 1
2 one 2
4 two 3
6 two 4
df.drop_duplicates(['k1'])
k1 k2
0 one 1
3 two 3
df.drop_duplicates(['k1', 'k2'])
k1 k2
0 one 1
2 one 2
3 two 3
5 two 4

value 대체

df.replace('one', 'One')
k1 k2
0 One 1
1 One 1
2 One 2
3 two 3
4 two 3
5 two 4
6 two 4
df.replace(['one', 3], ['One', 30])
k1 k2
0 One 1
1 One 1
2 One 2
3 two 30
4 two 30
5 two 4
6 two 4

groupby 와 데이터 집합

기초

df = pd.DataFrame({'k1':['a', 'a', 'b', 'b', 'a'],
                  'k2':['one', 'two', 'one', 'two', 'one'],
                  'data1':[2, 3, 3, 2, 4],
                  'data2':[5, 5, 5, 5, 10]})
df
k1 k2 data1 data2
0 a one 2 5
1 a two 3 5
2 b one 3 5
3 b two 2 5
4 a one 4 10
gp1 = df['data1'].groupby(df['k1'])
gp1
<pandas.core.groupby.generic.SeriesGroupBy object at 0x7ff3d800a910>
gp1.mean()
k1
a    3.0
b    2.5
Name: data1, dtype: float64
gp2 = df['data1'].groupby([df['k1'], df['k2']])
mean = gp2.mean()
mean
k1  k2 
a   one    3
    two    3
b   one    3
    two    2
Name: data1, dtype: int64

그룹 반복

for name, group in gp1:
    print(name)
    print(group)
a
0    2
1    3
4    4
Name: data1, dtype: int64
b
2    3
3    2
Name: data1, dtype: int64
for name, group in gp2:
    print(name)
    print(group)
('a', 'one')
0    2
4    4
Name: data1, dtype: int64
('a', 'two')
1    3
Name: data1, dtype: int64
('b', 'one')
2    3
Name: data1, dtype: int64
('b', 'two')
3    2
Name: data1, dtype: int64
for (k1, k2), group in gp2:
    print(k1, k2)
    print(group)
a one
0    2
4    4
Name: data1, dtype: int64
a two
1    3
Name: data1, dtype: int64
b one
2    3
Name: data1, dtype: int64
b two
3    2
Name: data1, dtype: int64

Data 집합

gp1.max()
k1
a    4
b    3
Name: data1, dtype: int64
gp2.min()
k1  k2 
a   one    2
    two    3
b   one    3
    two    2
Name: data1, dtype: int64

dates 와 times

time 생성

import pandas as pd
import numpy as np

rng = pd.date_range('2011-03-01 10:15', periods = 10, freq = 'M')
rng
DatetimeIndex(['2011-03-31 10:15:00', '2011-04-30 10:15:00',
               '2011-05-31 10:15:00', '2011-06-30 10:15:00',
               '2011-07-31 10:15:00', '2011-08-31 10:15:00',
               '2011-09-30 10:15:00', '2011-10-31 10:15:00',
               '2011-11-30 10:15:00', '2011-12-31 10:15:00'],
              dtype='datetime64[ns]', freq='M')
rng = pd.date_range('2015 Jul 2 10:15', periods = 10, freq = 'M')
rng
DatetimeIndex(['2015-07-31 10:15:00', '2015-08-31 10:15:00',
               '2015-09-30 10:15:00', '2015-10-31 10:15:00',
               '2015-11-30 10:15:00', '2015-12-31 10:15:00',
               '2016-01-31 10:15:00', '2016-02-29 10:15:00',
               '2016-03-31 10:15:00', '2016-04-30 10:15:00'],
              dtype='datetime64[ns]', freq='M')
  • start 와 end 를 통해 생성
rng = pd.date_range(start='2015 Jul 2 10:15', end='2015 July 12', freq='12H')
rng
DatetimeIndex(['2015-07-02 10:15:00', '2015-07-02 22:15:00',
               '2015-07-03 10:15:00', '2015-07-03 22:15:00',
               '2015-07-04 10:15:00', '2015-07-04 22:15:00',
               '2015-07-05 10:15:00', '2015-07-05 22:15:00',
               '2015-07-06 10:15:00', '2015-07-06 22:15:00',
               '2015-07-07 10:15:00', '2015-07-07 22:15:00',
               '2015-07-08 10:15:00', '2015-07-08 22:15:00',
               '2015-07-09 10:15:00', '2015-07-09 22:15:00',
               '2015-07-10 10:15:00', '2015-07-10 22:15:00',
               '2015-07-11 10:15:00', '2015-07-11 22:15:00'],
              dtype='datetime64[ns]', freq='12H')
len(rng)
20
rng = pd.date_range(start='2015 Jul 2 10:15', end='2015 July 12', freq='12H', tz='Asia/Kolkata')
rng
DatetimeIndex(['2015-07-02 10:15:00+05:30', '2015-07-02 22:15:00+05:30',
               '2015-07-03 10:15:00+05:30', '2015-07-03 22:15:00+05:30',
               '2015-07-04 10:15:00+05:30', '2015-07-04 22:15:00+05:30',
               '2015-07-05 10:15:00+05:30', '2015-07-05 22:15:00+05:30',
               '2015-07-06 10:15:00+05:30', '2015-07-06 22:15:00+05:30',
               '2015-07-07 10:15:00+05:30', '2015-07-07 22:15:00+05:30',
               '2015-07-08 10:15:00+05:30', '2015-07-08 22:15:00+05:30',
               '2015-07-09 10:15:00+05:30', '2015-07-09 22:15:00+05:30',
               '2015-07-10 10:15:00+05:30', '2015-07-10 22:15:00+05:30',
               '2015-07-11 10:15:00+05:30', '2015-07-11 22:15:00+05:30'],
              dtype='datetime64[ns, Asia/Kolkata]', freq='12H')
rng.tz_convert('Australia/sydney')
DatetimeIndex(['2015-07-02 14:45:00+10:00', '2015-07-03 02:45:00+10:00',
               '2015-07-03 14:45:00+10:00', '2015-07-04 02:45:00+10:00',
               '2015-07-04 14:45:00+10:00', '2015-07-05 02:45:00+10:00',
               '2015-07-05 14:45:00+10:00', '2015-07-06 02:45:00+10:00',
               '2015-07-06 14:45:00+10:00', '2015-07-07 02:45:00+10:00',
               '2015-07-07 14:45:00+10:00', '2015-07-08 02:45:00+10:00',
               '2015-07-08 14:45:00+10:00', '2015-07-09 02:45:00+10:00',
               '2015-07-09 14:45:00+10:00', '2015-07-10 02:45:00+10:00',
               '2015-07-10 14:45:00+10:00', '2015-07-11 02:45:00+10:00',
               '2015-07-11 14:45:00+10:00', '2015-07-12 02:45:00+10:00'],
              dtype='datetime64[ns, Australia/Sydney]', freq='12H')
type(rng[0])
pandas._libs.tslibs.timestamps.Timestamp

문자열을 date 로 변환

dd=['07/07/2015', '08/12/2015', '12/04/2015']
dd
['07/07/2015', '08/12/2015', '12/04/2015']
type(dd[0])
str
list(pd.to_datetime(dd))
[Timestamp('2015-07-07 00:00:00'),
 Timestamp('2015-08-12 00:00:00'),
 Timestamp('2015-12-04 00:00:00')]
d = list(pd.to_datetime(dd, dayfirst=True))
d
[Timestamp('2015-07-07 00:00:00'),
 Timestamp('2015-12-08 00:00:00'),
 Timestamp('2015-04-12 00:00:00')]
type(d[0])
pandas._libs.tslibs.timestamps.Timestamp

periods

pr = pd.Period('2012', freq='M')
pr.asfreq('D', 'start')
Period('2012-01-01', 'D')
pr.asfreq('D', 'end')
Period('2012-01-31', 'D')
  • period 연산
pr = pd.Period('2012', freq='A')
pr
Period('2012', 'A-DEC')
pr+1
Period('2013', 'A-DEC')
prMonth = pr.asfreq('M')
prMonth
Period('2012-12', 'M')
prMonth -1
Period('2012-11', 'M')

range 를 통한 period 생성

prg = pd.period_range('2010', '2015', freq='A')
prg
PeriodIndex(['2010', '2011', '2012', '2013', '2014', '2015'], dtype='period[A-DEC]', freq='A-DEC')
data = pd.Series(np.random.rand(len(prg)), index=prg)
data
2010    0.773062
2011    0.246164
2012    0.298947
2013    0.096472
2014    0.279433
2015    0.894872
Freq: A-DEC, dtype: float64

문자열로 된 날짜를 period로 변환

dates = ['2013-02-02', '2012-02-02', '2013-02-02']
d = pd.to_datetime(dates)
d
DatetimeIndex(['2013-02-02', '2012-02-02', '2013-02-02'], dtype='datetime64[ns]', freq=None)
prd = d.to_period(freq='M')
prd
PeriodIndex(['2013-02', '2012-02', '2013-02'], dtype='period[M]', freq='M')
prd.asfreq('D')
PeriodIndex(['2013-02-28', '2012-02-29', '2013-02-28'], dtype='period[D]', freq='D')
prd.asfreq('Y')
PeriodIndex(['2013', '2012', '2013'], dtype='period[A-DEC]', freq='A-DEC')

periods 를 timestamps로 변환

prd
PeriodIndex(['2013-02', '2012-02', '2013-02'], dtype='period[M]', freq='M')
prd.to_timestamp()
DatetimeIndex(['2013-02-01', '2012-02-01', '2013-02-01'], dtype='datetime64[ns]', freq=None)
prd.to_timestamp(how='end')
DatetimeIndex(['2013-02-28 23:59:59.999999999',
               '2012-02-29 23:59:59.999999999',
               '2013-02-28 23:59:59.999999999'],
              dtype='datetime64[ns]', freq=None)

Time offsets

pd.Timedelta('3 days')
Timedelta('3 days 00:00:00')
pd.Timedelta('3M')
/Users/jinho/opt/anaconda3/envs/playdata/lib/python3.9/site-packages/IPython/core/interactiveshell.py:3441: FutureWarning: Units 'M', 'Y' and 'y' do not represent unambiguous timedelta values and will be removed in a future version
  exec(code_obj, self.user_global_ns, self.user_ns)





Timedelta('0 days 00:03:00')
pd.Timedelta('4 days 3M')
Timedelta('4 days 00:03:00')
pd.Timestamp('9 July 2016 12:00') + pd.Timedelta('1 day 3 min')
Timestamp('2016-07-10 12:03:00')
rng + pd.Timedelta('1 day')
DatetimeIndex(['2015-07-03 10:15:00+05:30', '2015-07-03 22:15:00+05:30',
               '2015-07-04 10:15:00+05:30', '2015-07-04 22:15:00+05:30',
               '2015-07-05 10:15:00+05:30', '2015-07-05 22:15:00+05:30',
               '2015-07-06 10:15:00+05:30', '2015-07-06 22:15:00+05:30',
               '2015-07-07 10:15:00+05:30', '2015-07-07 22:15:00+05:30',
               '2015-07-08 10:15:00+05:30', '2015-07-08 22:15:00+05:30',
               '2015-07-09 10:15:00+05:30', '2015-07-09 22:15:00+05:30',
               '2015-07-10 10:15:00+05:30', '2015-07-10 22:15:00+05:30',
               '2015-07-11 10:15:00+05:30', '2015-07-11 22:15:00+05:30',
               '2015-07-12 10:15:00+05:30', '2015-07-12 22:15:00+05:30'],
              dtype='datetime64[ns, Asia/Kolkata]', freq='12H')

time 과 index data

dates = pd.date_range('2015-01-12', '2015-06-14', freq = 'M')
dates
DatetimeIndex(['2015-01-31', '2015-02-28', '2015-03-31', '2015-04-30',
               '2015-05-31'],
              dtype='datetime64[ns]', freq='M')
len(dates)
5
atemp = pd.Series([100.2, 98, 93, 98, 100], index=dates)
atemp
2015-01-31    100.2
2015-02-28     98.0
2015-03-31     93.0
2015-04-30     98.0
2015-05-31    100.0
Freq: M, dtype: float64
idx = atemp.index[3]
idx
Timestamp('2015-04-30 00:00:00', freq='M')
atemp[idx]
98.0
stemp = pd.Series([89, 98, 100, 88, 89], index=dates)
stemp
2015-01-31     89
2015-02-28     98
2015-03-31    100
2015-04-30     88
2015-05-31     89
Freq: M, dtype: int64
temps = pd.DataFrame({'Auckland':atemp, 'Delhi':stemp})
temps
Auckland Delhi
2015-01-31 100.2 89
2015-02-28 98.0 98
2015-03-31 93.0 100
2015-04-30 98.0 88
2015-05-31 100.0 89
temps['Auckland']
2015-01-31    100.2
2015-02-28     98.0
2015-03-31     93.0
2015-04-30     98.0
2015-05-31    100.0
Freq: M, Name: Auckland, dtype: float64
temps['Diff'] = temps['Auckland'] - temps['Delhi']
temps
Auckland Delhi Diff
2015-01-31 100.2 89 11.2
2015-02-28 98.0 98 0.0
2015-03-31 93.0 100 -7.0
2015-04-30 98.0 88 10.0
2015-05-31 100.0 89 11.0
del temps['Diff']
temps
Auckland Delhi
2015-01-31 100.2 89
2015-02-28 98.0 98
2015-03-31 93.0 100
2015-04-30 98.0 88
2015-05-31 100.0 89

응용

import pandas as pd
df = pd.read_csv('./pythondsp-pandasguide-b936c3b43406/data/time/stocks.csv')
df.head()
Unnamed: 0 date AA GE IBM MSFT
0 0 1990-02-01 00:00:00 4.98 2.87 16.79 0.51
1 1 1990-02-02 00:00:00 5.04 2.87 16.89 0.51
2 2 1990-02-05 00:00:00 5.07 2.87 17.32 0.51
3 3 1990-02-06 00:00:00 5.01 2.88 17.56 0.51
4 4 1990-02-07 00:00:00 5.04 2.91 17.93 0.51
d = df.date[0]
d
'1990-02-01 00:00:00'
type(d)
str
df = pd.read_csv('./pythondsp-pandasguide-b936c3b43406/data/time/stocks.csv',
                          parse_dates=['date'], index_col='date')
df.head()
Unnamed: 0 AA GE IBM MSFT
date
1990-02-01 0 4.98 2.87 16.79 0.51
1990-02-02 1 5.04 2.87 16.89 0.51
1990-02-05 2 5.07 2.87 17.32 0.51
1990-02-06 3 5.01 2.88 17.56 0.51
1990-02-07 4 5.04 2.91 17.93 0.51
del df['Unnamed: 0']
df.head()
AA GE IBM MSFT
date
1990-02-01 4.98 2.87 16.79 0.51
1990-02-02 5.04 2.87 16.89 0.51
1990-02-05 5.07 2.87 17.32 0.51
1990-02-06 5.01 2.88 17.56 0.51
1990-02-07 5.04 2.91 17.93 0.51
df.index.name
'date'

resampling

stocks = pd.read_csv('./pythondsp-pandasguide-b936c3b43406/data/time/stocks.csv',
                          parse_dates=['date'])
stocks.head()
Unnamed: 0 date AA GE IBM MSFT
0 0 1990-02-01 4.98 2.87 16.79 0.51
1 1 1990-02-02 5.04 2.87 16.89 0.51
2 2 1990-02-05 5.07 2.87 17.32 0.51
3 3 1990-02-06 5.01 2.88 17.56 0.51
4 4 1990-02-07 5.04 2.91 17.93 0.51