1. 程式人生 > >numpy基礎屬性方法隨機整理(七)--- np.take() / np.where / np.argmax()/np.argmin()(對照pandas相應功能的屬性和方法對比學習)

numpy基礎屬性方法隨機整理(七)--- np.take() / np.where / np.argmax()/np.argmin()(對照pandas相應功能的屬性和方法對比學習)

Note1:
np.take(close_prices, np.where(weekdays==weekday))
pandas中參考 《pandas基礎屬性方法隨機整理(四)—例項梳理(多知識點)》有三種方法實現該條件查詢功能:

  1. 方法1:輔助列 data[‘Mon’], data[data[‘Mon’]==1].mean() # 掩碼

    即 陣列[關係表示式]:
    關係表示式是一個布林型書序,其中為True的元素對應於陣列中滿足關係表示式的元素,以上下標運算的值就是從陣列中挑選與布林陣列中為True的元素相對應的元素

  2. 方法2:輔助列data[‘Mon’], # 查詢.query, 功能與索引一樣,有時更方便
  3. 方法3: 不新增輔助列,利用groupby()方法實現降取樣 data.groupby ( lambda x: x.month ). mean(). T[1]

numpy方法:
for迴圈中的條件查詢np.take(.., np.where(..))
np.where(關係表示式):陣列中滿足關係表示式的元素的下標陣列
np.take(陣列,下標陣列):陣列中由下標陣列所表示的元素集合

where(condition, [x, y]):
    -- x, y不為空: condition為True, 返回x; False, 返回y
    -- x, y為空: 返回condition為True的陣列下標
average_prices = np.zeros(5)
for weekday in range(average_prices.size):
    average_prices[weekday] = np.take(close_prices,
    np.where(weekdays==weekday)).mean()
    # np.where(weekdays==weekday)獲得序列號的陣列,作為take取元素的下標
import numpy as np
c = ages + scores * 1j                              # j要用 1
j 表示 d = np.sort_complex(c).real # float:[20. 22. 23. 27. 29.] <class 'numpy.ndarray'> d = d.astype('int64') # int:[20 22 23 27 29] <class 'numpy.ndarray'> index_names = [] for i, v in enumerate(d): print(np.where(ages ==v)[0][0], np.take(ages, np.where(ages == v))[0][0]) index_names.append(np.where(ages ==v)[0][0]) # 安裝d的元素順序依次對比,取出ages的下標 namesComplexSorted = names[index_names] print(namesComplexSorted) # ['Jue' 'Kity' 'Mike' 'scote' 'Tom'] print(np.where(namesComplexSorted == 'Kity')[0][0]) # (array([1], dtype=int64),)

Note2:
最大值 / 最小值對應的索引:

屬性描述 numpy pandas
最大值的索引 argmax() idxmax()
最小值的索引 argmin() idxmin()

Note3:
print()內部的if…else…語句:

print(average_prices,
        '(max)' if(weekday == max_index) else
        '(min)' if(weekday == min_index) else '')

Code:

import os 
import sys
import datetime as dt 
import numpy as np 


g_weekdays = ('MON','Tue','WED','THU','FRI','SAT','SUN')

def dmy2weekday(dmy):
    return dt.datetime.strptime(str(dmy, encoding='utf-8'), '%d-%m-%Y').date().weekday()


def read_data(filename):
    weekdays, close_prices = np.loadtxt(filename, delimiter=',', usecols=(1,6),unpack=True,converters={1:dmy2weekday})
    return weekdays, close_prices

def cal_average_prices(weekdays, close_prices):
    average_prices = np.zeros(5)
    for weekday in range(average_prices.size):
        average_prices[weekday] = np.take(
            close_prices, np.where(weekdays==weekday)).mean()
        # np.take(close_prices, np.where().mean()) 類比pandas中的 df[df[weekday==1]].mean()
        # 參考 《pandas基礎屬性方法隨機整理(四)---例項梳理(多知識點)》 
    return average_prices

def main(argc, argv, envp):
    weekdays, close_prices = read_data('aapl.csv')
    average_prices = cal_average_prices(weekdays, close_prices)
    max_index = np.argmax(average_prices)
    # pandas中對應的屬性為 idxmax/ idxmin
    min_index = np.argmin(average_prices)
    for weekday, average_prices in enumerate(average_prices):
        print(g_weekdays[weekday], ':', average_prices,
            # weekday 作為g_weekday的索引號
            '(max)' if(weekday == max_index) else
            '(min)' if(weekday == min_index) else '')
        # print() print內部竟然還有這種if..else..操作
    return 0

if __name__ == '__main__':
    sys.exit(main(len(sys.argv), sys.argv, os.environ))