Numpy
个人笔记,仅供参考,谢谢
导入
import numpy
import numpy as np
from numpy import *
Numpy数组对象
引入
a = [ 1 , 2 , 3 , 4 ] , b = [ 4 , 5 , 6 , 7 ]
[ x+ 1 for x in a]
a + b = > [ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ]
[ x + y for ( x, y) in zip ( a, b) ]
- - - - - - - - - - -
a = np. array( [ 1 , 2 , 3 , 4 ] )
a+ 1 = > array( [ 2 , 3 , 4 , 5 ] )
b = np. array( [ 2 , 3 , 4 , 5 ] )
a + b = > array( [ 3 , 5 , 7 , 9 ] )
产生数组
lst = [ 1 , 2 , 3 ]
a = np. array( lst)
a = np. array( [ 1 , 2 , 3 ] )
np. zeros( 5 )
= > array( [ 0. , 0. , 0. , 0. , 0. , ] )
np. ones( 3 )
= > array( [ 1. , 1. , 1. ] )
np. ones( 3 , dtype= 'int' )
a= np. array( [ 1 , 2 , 3 ] )
a. fill( 6 )
a. fill( 2.5 )
a = a. astype( 'float' )
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
生成整数序列
a= np. arange( 1 , 10 )
a= np. arange( 1 , 10 , 2 )
生成等差序列
a = np. linspace( 1 , 10 , 4 )
生成随机数
np. random. rand( 10 )
np. random. randn( 10 )
np. random. randint( 1 , 10 , 6 )
数组属性
type ( a)
a. dtype
a. shape
a. size
a. ndim
索引与切片
a= np. array( [ 0 , 1 , 2 , 3 ] )
a[ 0 ]
a[ 0 ] = 1
a[ 1 : 3 ]
<= > a[ 1 : - 2 ] <= > a[ - 3 , 3 ]
a[ - 2 : ]
a[ : : 2 ] <= > a[ 1 : a. size: 2 ]
a= [ 1 , 2 , 3 , 4 ]
rst = a[ 1 : ] - a[ : - 1 ]
多维数组及其属性
定义
a = np. array( [ [ 1 , 2 ] , [ 3 , 4 ] ] )
np. array( [ [ [ 1 , 2 ] , [ 2 , 0 ] ] , [ [ 3 , 5 ] , [ 6 , 7 ] ] ] )
a. shape = > ( 2 , 4 ) ; 2 行4 列
a. size = > 4
a. ndim = > 2
索引
a[ 1 , 1 ] = > 4
a[ 0 , 0 ] = > 1
a[ 0 , 1 ] = - 1
a[ 1 ] = > [ 1 , 2 ]
a[ : , 1 ] = > array( [ 1 , 11 ] )
多维数组切片
a = np. array( [ 0 , 1 , 2 , 3 , 4 , 5 ] , [ 10 , 11 , 12 , 13 , 14 , 15 ] , [ 20 , 21 , 22 , 23 , 24 , 25 ] , [ 30 , 31 , 32 , 33 , 34 , 35 ] , [ 40 , 41 , 42 , 43 , 44 , 45 ] )
a[ 0 , 3 : 5 ]
a[ 4 : , 4 : ]
[ lower: upper: step]
a[ : : 2 , : ]
切片是引用
a= np. array( [ 0 , 1 , 2 , 3 , 4 ] )
b= a[ 2 : 4 ]
print ( b)
b[ 0 ] = 10
a
a= np. array( [ 0 , 1 , 2 , 3 , 4 ] )
b= a[ 2 : 4 ] . copy( ) ;
花式索引
a= np. arange( 0 , 100 , 10 )
index = [ 1 , 2 , - 3 ]
y = a[ index] ;
mask = np. array( [ 0 , 1 , 0 , 1 , 0 , 1 , 0 , 1 , 1 , 0 ] , dtype= bool )
a[ mask] ;
a = np. array( [ 0 , 1 , 2 , 3 , 4 , 5 ] , [ 10 , 11 , 12 , 13 , 14 , 15 ] , [ 20 , 21 , 22 , 23 , 24 , 25 ] , [ 30 , 31 , 32 , 33 , 34 , 35 ] , [ 40 , 41 , 42 , 43 , 44 , 45 ] , [ 50 , 51 , 52 , 53 , 54 , 55 ] )
取出次对角线的5 个值:
a[ ( 0 , 1 , 2 , 3 , 4 ) , ( 1 , 2 , 3 , 4 , 5 ) ]
取出最后三行1 ,3 ,5 列
a[ 3 :,[ 1 , 2 , 5 ] ] ;
a[ : 3 ] , 取出前三行
a[ : , : 3 ] , 取出前三列
a[ : 2 , : 3 ] 取出前2 行三列
mask= np. array( [ 0 , 1 , 1 , 0 , 1 , 0 ] ) ;
a[ mask] ;
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
where语句
a= np. array( [ 0 , 12 , 5 , 20 ] )
a > 10
np. where( a > 10 ) ;
a[ a> 10 ] ;
a[ np. where( a> 10 ) ]
数组类型
a = np. array( [ 1 , 5 , - 3 ] , dtype= float )
a = np. array( [ 1 , 2 , 3 ] )
b = np. asarray( a, dtype= float )
b = a. astype( float )
数组操作
a = np. array( 1 , 3 , 2 )
b = np. sort( a)
order = np. argsort( a)
np. sum ( a)
np. max ( a)
np. min ( a)
np. mean( )
np. std( )
np. cov( a, b)
a. T
concatenate( ( a0, a1, . . . , an) , axis= 0 )
x = np. array( [ [ 0 , 1 , 2 ] , [ 10 , 11 , 12 ] ] )
y = np. array( [ [ 50 , 51 , 52 ] , [ 60 , 61 , 62 ] ] )
print ( x. shape)
print ( y. shape)
z = np. concatenate( x, y)
z = np. concatenate( ( x, y) , axis = 1 )
np. vstack( ( x, y) )
np. hstack( ( x, y) )
np. dstack( ( x, y) )
内置函数
np. abs ( a)
np. exp( a)
np. median( a)
np. cumsum( a)
. . .
练习
import numpy as np
a = np. array( [ [ 0 , 1 , 2 , 3 , 4 , 5 ] , [ 10 , 11 , 12 , 13 , 14 , 15 ] , [ 20 , 21 , 22 , 23 , 24 , 25 ] , [ 30 , 31 , 32 , 33 , 34 , 35 ] , [ 40 , 41 , 42 , 43 , 44 , 45 ] , [ 50 , 51 , 52 , 53 , 54 , 55 ] ] )
a
array([[ 0, 1, 2, 3, 4, 5],
[10, 11, 12, 13, 14, 15],
[20, 21, 22, 23, 24, 25],
[30, 31, 32, 33, 34, 35],
[40, 41, 42, 43, 44, 45],
[50, 51, 52, 53, 54, 55]])
a[ : 2 , : 3 ]
array([[ 0, 1, 2],
[10, 11, 12]])
b= np. array( [ 0 , 2 , 66 , 2 , 77 ] )
b[ np. where( b< 10 ) ]
np. where( b< 10 )
(array([0, 1, 3], dtype=int64),)
a= np. array( [ 1 , 2 , 3 ] )
np. asarray( a, float )
array([1., 2., 3.])
a = np. array( [ 1 , 2 , 3 ] )
b = np. array( [ 2 , 4 , 5 ] )
np. cov( a, b)
array([[1. , 1.5 ],
[1.5 , 2.33333333]])
import pandas as pd
a = pd. Series( [ 1 , 2 , 3 ] , index = [ 'a' , 2 , 3 ] )
a[ 'a' ]
a. values
a. astype( float )
a. name
a
a 1
2 2
3 3
dtype: int64
import numpy as np
a = np. array( [ 3 ] * 4 )
a
array([3, 3, 3, 3])
df2 = pd. DataFrame( { 'A' : 1. , 'B' : pd. Timestamp( '20181001' ) , 'C' : pd. Series( 1 , index= list ( range ( 4 ) ) , dtype = float ) , 'D' : np. array( [ 3 ] * 4 , dtype= float ) , 'E' :
pd. Categorical( [ 'test' , 'train' , 'test' , 'train' ] ) , 'F' : 'anc' } )
df2
A B C D E F 0 1.0 2018-10-01 1.0 3.0 test anc 1 1.0 2018-10-01 1.0 3.0 train anc 2 1.0 2018-10-01 1.0 3.0 test anc 3 1.0 2018-10-01 1.0 3.0 train anc
df2. drop( [ 'A' , 'B' ] , axis = 1 )
C D E F 0 1.0 3.0 test anc 1 1.0 3.0 train anc 2 1.0 3.0 test anc 3 1.0 3.0 train anc
df2. index = list ( 'abcd' )
df2
df2. loc[ 'a' , 'B' ]
Timestamp('2018-10-01 00:00:00')
df2
df2. loc[ 'a' , 'E' ] = np. nan
df2. dropna( inplace= True )
df2[ ]
A B C D E F b 1.0 2018-10-01 1.0 3.0 train anc c 1.0 2018-10-01 1.0 3.0 test anc d 1.0 2018-10-01 1.0 3.0 train anc