mardi 18 décembre 2018

Python Pandas for dummies

Fist step to Python Pandas library

Loading pandas

In [3]:
# this is a comment
import pandas as pd 
In [8]:
# creating a series 
# A one dimentional array 
s = pd.Series([3,5,5,9,6,8])
s
Out[8]:
0    3
1    5
2    5
3    9
4    6
5    8
dtype: int64
In [9]:
# returning the first 5 element of a series
s.head()
Out[9]:
0    3
1    5
2    5
3    9
4    6
dtype: int64

Reading data from URL

In [12]:
ufo = pd.read_table('http://bit.ly/uforeports', sep=',')
In [13]:
ufo.head()
Out[13]:
City Colors Reported Shape Reported State Time
0 Ithaca NaN TRIANGLE NY 6/1/1930 22:00
1 Willingboro NaN OTHER NJ 6/30/1930 20:00
2 Holyoke NaN OVAL CO 2/15/1931 14:00
3 Abilene NaN DISK KS 6/1/1931 13:00
4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00
In [16]:
# columns can be access as array or as obje 
# ufo['State'] === ufo.State
 
ufo.State
Out[16]:
0        NY
1        NJ
2        CO
3        KS
4        NY
5        ND
6        CA
7        MI
8        AK
9        OR
10       CA
11       AL
12       SC
13       IA
14       MI
15       CA
16       CA
17       GA
18       TN
19       AK
20       NE
21       LA
22       LA
23       KY
24       WV
25       CA
26       WV
27       NM
28       NM
29       UT
         ..
18211    MA
18212    CA
18213    CA
18214    TX
18215    TX
18216    CA
18217    CO
18218    TX
18219    CA
18220    CA
18221    NH
18222    PA
18223    SC
18224    OK
18225    CA
18226    CA
18227    CA
18228    TX
18229    IL
18230    CA
18231    CA
18232    WI
18233    AK
18234    CA
18235    AZ
18236    IL
18237    IA
18238    WI
18239    WI
18240    FL
Name: State, Length: 18241, dtype: object
In [29]:
# add a new row to the table 
ufo['Address'] = ufo['City'] + ' ' + ufo.State
ufo.head()
Out[29]:
City Colors Reported Shape Reported State Time Address
0 Ithaca NaN TRIANGLE NY 6/1/1930 22:00 Ithaca NY
1 Willingboro NaN OTHER NJ 6/30/1930 20:00 Willingboro NJ
2 Holyoke NaN OVAL CO 2/15/1931 14:00 Holyoke CO
3 Abilene NaN DISK KS 6/1/1931 13:00 Abilene KS
4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00 New York Worlds Fair NY
In [30]:
# see the shap of the data ( number of rows and columns)
ufo.shape
Out[30]:
(18241, 6)
In [31]:
# to see data type of each columns 
ufo.dtypes
Out[31]:
City               object
Colors Reported    object
Shape Reported     object
State              object
Time               object
Address            object
dtype: object
In [26]:
# view columns
ufo.columns
Out[26]:
Index(['City', 'Colors Reported', 'Shape Reported', 'State', 'Time',
       'Address'],
      dtype='object')
In [32]:
# drop colums[col1, col2, ..., coln]
ufo.drop(['Address'], axis=1, inplace=True)
ufo.head()
Out[32]:
City Colors Reported Shape Reported State Time
0 Ithaca NaN TRIANGLE NY 6/1/1930 22:00
1 Willingboro NaN OTHER NJ 6/30/1930 20:00
2 Holyoke NaN OVAL CO 2/15/1931 14:00
3 Abilene NaN DISK KS 6/1/1931 13:00
4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00
In [36]:
# sort a column of table
ufo.State.sort_values(ascending=True).head()
Out[36]:
10454    AK
14421    AK
3511     AK
103      AK
2929     AK
Name: State, dtype: object
In [37]:
# sort table base on specific column
ufo.sort_values('City').head()
Out[37]:
City Colors Reported Shape Reported State Time
1761 Abbeville NaN DISK SC 12/10/1968 0:30
4553 Aberdeen NaN CYLINDER WA 6/15/1981 22:00
16167 Aberdeen NaN VARIOUS OH 3/29/2000 3:00
14703 Aberdeen NaN TRIANGLE WA 9/30/1999 21:00
389 Aberdeen ORANGE CIRCLE SD 11/15/1956 18:30