1. 程式人生 > >Python資料分析與機器學習-Python時間序列分析

Python資料分析與機器學習-Python時間序列分析

原始碼下載地址:

http://download.csdn.net/download/adam_zs/10224873


from __future__ import absolute_import, division, print_function
# http://www.lfd.uci.edu/~gohlke/pythonlibs/#xgboost
import sys
import os

import pandas as pd
import numpy as np

import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt

import matplotlib.pylab as plt
import seaborn as sns

pd.set_option('display.height', 9999)
pd.set_option('display.max_rows', 9999)
pd.set_option('display.max_columns', 9999)
pd.set_option('display.width', 9999)
pd.set_option('display.float_format', lambda x: '%.5f' % x)  # pandas

np.set_printoptions(precision=5, suppress=True)  # numpy

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

# seaborn plotting style
sns.set(style='ticks', context='poster')

sentiment = pd.read_csv("data/sentiment.csv", index_col=0, parse_dates=[0])
# print(sentiment.head())
'''             
DATE       UMCSENT
2000-01-01 112.00000
2000-02-01 111.30000
2000-03-01 107.10000
2000-04-01 109.20000
2000-05-01 110.70000
'''
sentiment_short = sentiment['2005':'2016']


# sentiment_short.plot(figsize=(12, 8))
# plt.title("Consumer Sentiment")
# plt.show()

# sentiment_short = sentiment['2005':'2016']
# sentiment_short['diff_1'] = sentiment_short['UMCSENT'].diff(1)  # 1階差分
# sentiment_short['diff_2'] = sentiment_short['diff_1'].diff(1)  # 2階差分
# sentiment_short.plot(figsize=(18, 12))
# plt.title('diff_1 and diff_2')
# plt.show()

# acf結果,pacf結果
# fig = plt.figure(figsize=(12, 8))
#
# ax1 = fig.add_subplot(211)
# fig = sm.graphics.tsa.plot_acf(sentiment_short, lags=20, ax=ax1)
# ax1.xaxis.set_ticks_position('bottom')
# fig.tight_layout()
#
# ax2 = fig.add_subplot(212)
# fig = sm.graphics.tsa.plot_pacf(sentiment_short, lags=20, ax=ax2)
# ax2.xaxis.set_ticks_position('bottom')
# fig.tight_layout()
# plt.show()

# 散點圖也可以表示

# lags = 9
# ncols = 3
# nrows = int(np.ceil(lags / ncols))
#
# fig, axes = plt.subplots(ncols=ncols, nrows=nrows, figsize=(4 * ncols, 4 * nrows))
#
# for ax, lag in zip(axes.flat, np.arange(1, lags + 1, 1)):
#     lag_str = 't-{}'.format(lag)
#     X = (pd.concat([sentiment_short, sentiment_short.shift(-lag)], axis=1,
#                    keys=['y'] + [lag_str]).dropna())
#
#     X.plot(ax=ax, kind='scatter', y='y', x=lag_str)
#     corr = X.corr().as_matrix()[0][1]
#     ax.set_ylabel('Original')
#     ax.set_title('Lag: {} (corr={:.2f})'.format(lag_str, corr))
#     ax.set_aspect('equal')
#     sns.despine()
#
# fig.tight_layout()
# plt.show()

# 更直觀一些

def tsplot(y, lags=None, title='', figsize=(14, 8)):
    fig = plt.figure(figsize=figsize)
    layout = (2, 2)
    ts_ax = plt.subplot2grid(layout, (0, 0))
    hist_ax = plt.subplot2grid(layout, (0, 1))
    acf_ax = plt.subplot2grid(layout, (1, 0))
    pacf_ax = plt.subplot2grid(layout, (1, 1))

    y.plot(ax=ts_ax)
    ts_ax.set_title(title)
    y.plot(ax=hist_ax, kind='hist', bins=25)
    hist_ax.set_title('Histogram')
    smt.graphics.plot_acf(y, lags=lags, ax=acf_ax)
    smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax)
    [ax.set_xlim(0) for ax in [acf_ax, pacf_ax]]
    sns.despine()
    plt.tight_layout()
    return ts_ax, acf_ax, pacf_ax


tsplot(sentiment_short, title='Consumer Sentiment', lags=36);
plt.show()
from __future__ import absolute_import, division, print_function

import sys
import os

import pandas as pd
import numpy as np

# TSA from Statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt

# Display and Plotting
import matplotlib.pylab as plt
import seaborn as sns

pd.set_option('display.height', 9999)
pd.set_option('display.max_rows', 9999)
pd.set_option('display.max_columns', 9999)
pd.set_option('display.width', 9999)
pd.set_option('display.float_format', lambda x: '%.5f' % x)  # pandas
np.set_printoptions(precision=5, suppress=True)  # numpy

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

# seaborn plotting style
sns.set(style='ticks', context='poster')

ts_df = pd.read_csv('data/series1.csv', index_col=0, parse_dates=[0])
# print(ts_df.head())
# print(ts_df.shape) (120, 1)
'''
              value
2006-06-01  0.21507
2006-07-01  1.14225
2006-08-01  0.08077
2006-09-01 -0.73952
2006-10-01  0.53552
'''

train_count = int(ts_df.shape[0] * 0.95) + 1
X_train = ts_df[:train_count]['value']
y_test = ts_df[train_count:]['value']


# print(X_train.shape)
# print(X_train.tail())
# print(y_test.shape)
# print(y_test.head())

def tsplot(y, lags=None, title='', figsize=(14, 8)):
    fig = plt.figure(figsize=figsize)
    layout = (2, 2)
    ts_ax = plt.subplot2grid(layout, (0, 0))
    hist_ax = plt.subplot2grid(layout, (0, 1))
    acf_ax = plt.subplot2grid(layout, (1, 0))
    pacf_ax = plt.subplot2grid(layout, (1, 1))

    y.plot(ax=ts_ax)
    ts_ax.set_title(title)
    y.plot(ax=hist_ax, kind='hist', bins=25)
    hist_ax.set_title('Histogram')
    smt.graphics.plot_acf(y, lags=lags, ax=acf_ax)
    smt.graphics.plot_pacf(y, lags=lags, ax=pacf_ax)
    [ax.set_xlim(0) for ax in [acf_ax, pacf_ax]]
    sns.despine()
    fig.tight_layout()
    return ts_ax, acf_ax, pacf_ax


# tsplot(X_train, title='A Given Training Series', lags=20)
# plt.show()

arima200 = sm.tsa.SARIMAX(X_train, order=(2, 0, 0))
model_results = arima200.fit()

import itertools

p_min = 0
d_min = 0
q_min = 0
p_max = 4
d_max = 0
q_max = 4

# Initialize a DataFrame to store the results
results_bic = pd.DataFrame(index=['AR{}'.format(i) for i in range(p_min, p_max + 1)],
                           columns=['MA{}'.format(i) for i in range(q_min, q_max + 1)])

for p, d, q in itertools.product(range(p_min, p_max + 1),
                                 range(d_min, d_max + 1),
                                 range(q_min, q_max + 1)):
    if p == 0 and d == 0 and q == 0:
        results_bic.loc['AR{}'.format(p), 'MA{}'.format(q)] = np.nan
        continue

    try:
        model = sm.tsa.SARIMAX(X_train, order=(p, d, q),
                               # enforce_stationarity=False,
                               # enforce_invertibility=False,
                               )
        results = model.fit()
        results_bic.loc['AR{}'.format(p), 'MA{}'.format(q)] = results.bic
    except:
        continue
results_bic = results_bic[results_bic.columns].astype(float)
fig, ax = plt.subplots(figsize=(10, 8))
ax = sns.heatmap(results_bic,
                 mask=results_bic.isnull(),
                 ax=ax,
                 annot=True,
                 fmt='.2f',
                 )
ax.set_title('BIC')
plt.show()
import matplotlib.pylab
import numpy as np
import pandas as pd

'''滑動視窗'''

df = pd.Series(np.random.randint(low=1, high=100, size=600),
               index=pd.date_range(start='2016-01-07', periods=600, freq='D'))
# print(df.head())

print(df.rolling(window=10))  # 滑動視窗
import matplotlib.pyplot as plt

plt.figure(figsize=(15, 5))

df.plot(style='r--')
df.rolling(window=10).mean().plot(style='b')
plt.show()
import pandas as pd
import numpy as np

rng = pd.date_range('2017-01-05', periods=90, freq='D')
ts = pd.Series(np.random.randint(low=1, high=20, size=90), index=rng)
# print(ts)
# print(ts.resample(rule="M").sum())

day3Ts = ts.resample(rule='3D').sum()
# print(day3Ts.resample(rule='D').asfreq())
'''
ffill 空值取前面的值
bfill 空值取後面的值
interpolate 線性取值
'''
print(day3Ts.resample(rule='D').ffill(1))  # 1 對1個缺失值進行填充
import matplotlib.pylab as plt
import seaborn as sns
from tsfresh.examples.robot_execution_failures import download_robot_execution_failures, load_robot_execution_failures
from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

download_robot_execution_failures()
df, y = load_robot_execution_failures()
# df[df.id == 3][['time', 'F_x', 'F_y', 'F_z', 'T_x', 'T_y', 'T_z']].plot(x='time', title='Success example (id 3)',
#                                                                         figsize=(12, 6))
# df[df.id == 20][['time', 'F_x', 'F_y', 'F_z', 'T_x', 'T_y', 'T_z']].plot(x='time', title='Failure example (id 20)',
#                                                                          figsize=(12, 6))
# plt.show()

extraction_settings = ComprehensiveFCParameters()  # 特徵提取
X = extract_features(df,
                     column_id='id', column_sort='time',
                     default_fc_parameters=extraction_settings,
                     impute_function=impute)
# 特徵過濾
X_filtered = extract_relevant_features(df, y,
                                       column_id='id', column_sort='time',
                                       default_fc_parameters=extraction_settings)
X_train, X_test, X_filtered_train, X_filtered_test, y_train, y_test = train_test_split(X, X_filtered, y, test_size=.4)
cl = DecisionTreeClassifier()
cl.fit(X_train, y_train)

cl2 = DecisionTreeClassifier()
cl2.fit(X_filtered_train, y_train)
print(classification_report(y_test, cl2.predict(X_filtered_test)))

'''維基百科點選量資料'''

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re

pd.set_option('display.height', 9999)
pd.set_option('display.max_rows', 9999)
pd.set_option('display.max_columns', 9999)
pd.set_option('display.width', 9999)

train = pd.read_csv('train_1.csv').fillna(0)  # fillna空缺資料填充為0
# print(train.info())
for col in train.columns[1:]:
    train[col] = pd.to_numeric(train[col], downcast='integer')
# print(train.head())
# print(train.info())
'''
                                                Page  2015-07-01  2015-07-02  2015-07-03  2015-07-04  2015-07-05  2015-07-06  2015-07-07  2015-07-08  2015-07-09  2015-07-10  2015-07-11  2015-07-12  2015-07-13  2015-07-14  2015-07-15  2015-07-16  2015-07-17  2015-07-18  2015-07-19  2015-07-20  2015-07-21  2015-07-22  2015-07-23  2015-07-24  2015-07-25  2015-07-26  2015-07-27  2015-07-28  2015-07-29  2015-07-30  2015-07-31  2015-08-01  2015-08-02  2015-08-03  2015-08-04  2015-08-05  2015-08-06  2015-08-07  2015-08-08  2015-08-09  2015-08-10  2015-08-11  2015-08-12  2015-08-13  2015-08-14  2015-08-15  2015-08-16  2015-08-17  2015-08-18  2015-08-19  2015-08-20  2015-08-21  2015-08-22  2015-08-23  2015-08-24  2015-08-25  2015-08-26  2015-08-27  2015-08-28  2015-08-29  2015-08-30  2015-08-31  2015-09-01  2015-09-02  2015-09-03  2015-09-04  2015-09-05  2015-09-06  2015-09-07  2015-09-08  2015-09-09  2015-09-10  2015-09-11  2015-09-12  2015-09-13  2015-09-14  2015-09-15  2015-09-16  2015-09-17  2015-09-18  2015-09-19  2015-09-20  2015-09-21  2015-09-22  2015-09-23  2015-09-24  2015-09-25  2015-09-26  2015-09-27  2015-09-28  2015-09-29  2015-09-30  2015-10-01  2015-10-02  2015-10-03  2015-10-04  2015-10-05  2015-10-06  2015-10-07  2015-10-08  2015-10-09  2015-10-10  2015-10-11  2015-10-12  2015-10-13  2015-10-14  2015-10-15  2015-10-16  2015-10-17  2015-10-18  2015-10-19  2015-10-20  2015-10-21  2015-10-22  2015-10-23  2015-10-24  2015-10-25  2015-10-26  2015-10-27  2015-10-28  2015-10-29  2015-10-30  2015-10-31  2015-11-01  2015-11-02  2015-11-03  2015-11-04  2015-11-05  2015-11-06  2015-11-07  2015-11-08  2015-11-09  2015-11-10  2015-11-11  2015-11-12  2015-11-13  2015-11-14  2015-11-15  2015-11-16  2015-11-17  2015-11-18  2015-11-19  2015-11-20  2015-11-21  2015-11-22  2015-11-23  2015-11-24  2015-11-25  2015-11-26  2015-11-27  2015-11-28  2015-11-29  2015-11-30  2015-12-01  2015-12-02  2015-12-03  2015-12-04  2015-12-05  2015-12-06  2015-12-07  2015-12-08  2015-12-09  2015-12-10  2015-12-11  2015-12-12  2015-12-13  2015-12-14  2015-12-15  2015-12-16  2015-12-17  2015-12-18  2015-12-19  2015-12-20  2015-12-21  2015-12-22  2015-12-23  2015-12-24  2015-12-25  2015-12-26  2015-12-27  2015-12-28  2015-12-29  2015-12-30  2015-12-31  2016-01-01  2016-01-02  2016-01-03  2016-01-04  2016-01-05  2016-01-06  2016-01-07  2016-01-08  2016-01-09  2016-01-10  2016-01-11  2016-01-12  2016-01-13  2016-01-14  2016-01-15  2016-01-16  2016-01-17  2016-01-18  2016-01-19  2016-01-20  2016-01-21  2016-01-22  2016-01-23  2016-01-24  2016-01-25  2016-01-26  2016-01-27  2016-01-28  2016-01-29  2016-01-30  2016-01-31  2016-02-01  2016-02-02  2016-02-03  2016-02-04  2016-02-05  2016-02-06  2016-02-07  2016-02-08  2016-02-09  2016-02-10  2016-02-11  2016-02-12  2016-02-13  2016-02-14  2016-02-15  2016-02-16  2016-02-17  2016-02-18  2016-02-19  2016-02-20  2016-02-21  2016-02-22  2016-02-23  2016-02-24  2016-02-25  2016-02-26  2016-02-27  2016-02-28  2016-02-29  2016-03-01  2016-03-02  2016-03-03  2016-03-04  2016-03-05  2016-03-06  2016-03-07  2016-03-08  2016-03-09  2016-03-10  2016-03-11  2016-03-12  2016-03-13  2016-03-14  2016-03-15  2016-03-16  2016-03-17  2016-03-18  2016-03-19  2016-03-20  2016-03-21  2016-03-22  2016-03-23  2016-03-24  2016-03-25  2016-03-26  2016-03-27  2016-03-28  2016-03-29  2016-03-30  2016-03-31  2016-04-01  2016-04-02  2016-04-03  2016-04-04  2016-04-05  2016-04-06  2016-04-07  2016-04-08  2016-04-09  2016-04-10  2016-04-11  2016-04-12  2016-04-13  2016-04-14  2016-04-15  2016-04-16  2016-04-17  2016-04-18  2016-04-19  2016-04-20  2016-04-21  2016-04-22  2016-04-23  2016-04-24  2016-04-25  2016-04-26  2016-04-27  2016-04-28  2016-04-29  2016-04-30  2016-05-01  2016-05-02  2016-05-03  2016-05-04  2016-05-05  2016-05-06  2016-05-07  2016-05-08  2016-05-09  2016-05-10  2016-05-11  2016-05-12  2016-05-13  2016-05-14  2016-05-15  2016-05-16  2016-05-17  2016-05-18  2016-05-19  2016-05-20  2016-05-21  2016-05-22  2016-05-23  2016-05-24  2016-05-25  2016-05-26  2016-05-27  2016-05-28  2016-05-29  2016-05-30  2016-05-31  2016-06-01  2016-06-02  2016-06-03  2016-06-04  2016-06-05  2016-06-06  2016-06-07  2016-06-08  2016-06-09  2016-06-10  2016-06-11  2016-06-12  2016-06-13  2016-06-14  2016-06-15  2016-06-16  2016-06-17  2016-06-18  2016-06-19  2016-06-20  2016-06-21  2016-06-22  2016-06-23  2016-06-24  2016-06-25  2016-06-26  2016-06-27  2016-06-28  2016-06-29  2016-06-30  2016-07-01  2016-07-02  2016-07-03  2016-07-04  2016-07-05  2016-07-06  2016-07-07  2016-07-08  2016-07-09  2016-07-10  2016-07-11  2016-07-12  2016-07-13  2016-07-14  2016-07-15  2016-07-16  2016-07-17  2016-07-18  2016-07-19  2016-07-20  2016-07-21  2016-07-22  2016-07-23  2016-07-24  2016-07-25  2016-07-26  2016-07-27  2016-07-28  2016-07-29  2016-07-30  2016-07-31  2016-08-01  2016-08-02  2016-08-03  2016-08-04  2016-08-05  2016-08-06  2016-08-07  2016-08-08  2016-08-09  2016-08-10  2016-08-11  2016-08-12  2016-08-13  2016-08-14  2016-08-15  2016-08-16  2016-08-17  2016-08-18  2016-08-19  2016-08-20  2016-08-21  2016-08-22  2016-08-23  2016-08-24  2016-08-25  2016-08-26  2016-08-27  2016-08-28  2016-08-29  2016-08-30  2016-08-31  2016-09-01  2016-09-02  2016-09-03  2016-09-04  2016-09-05  2016-09-06  2016-09-07  2016-09-08  2016-09-09  2016-09-10  2016-09-11  2016-09-12  2016-09-13  2016-09-14  2016-09-15  2016-09-16  2016-09-17  2016-09-18  2016-09-19  2016-09-20  2016-09-21  2016-09-22  2016-09-23  2016-09-24  2016-09-25  2016-09-26  2016-09-27  2016-09-28  2016-09-29  2016-09-30  2016-10-01  2016-10-02  2016-10-03  2016-10-04  2016-10-05  2016-10-06  2016-10-07  2016-10-08  2016-10-09  2016-10-10  2016-10-11  2016-10-12  2016-10-13  2016-10-14  2016-10-15  2016-10-16  2016-10-17  2016-10-18  2016-10-19  2016-10-20  2016-10-21  2016-10-22  2016-10-23  2016-10-24  2016-10-25  2016-10-26  2016-10-27  2016-10-28  2016-10-29  2016-10-30  2016-10-31  2016-11-01  2016-11-02  2016-11-03  2016-11-04  2016-11-05  2016-11-06  2016-11-07  2016-11-08  2016-11-09  2016-11-10  2016-11-11  2016-11-12  2016-11-13  2016-11-14  2016-11-15  2016-11-16  2016-11-17  2016-11-18  2016-11-19  2016-11-20  2016-11-21  2016-11-22  2016-11-23  2016-11-24  2016-11-25  2016-11-26  2016-11-27  2016-11-28  2016-11-29  2016-11-30  2016-12-01  2016-12-02  2016-12-03  2016-12-04  2016-12-05  2016-12-06  2016-12-07  2016-12-08  2016-12-09  2016-12-10  2016-12-11  2016-12-12  2016-12-13  2016-12-14  2016-12-15  2016-12-16  2016-12-17  2016-12-18  2016-12-19  2016-12-20  2016-12-21  2016-12-22  2016-12-23  2016-12-24  2016-12-25  2016-12-26  2016-12-27  2016-12-28  2016-12-29  2016-12-30  2016-12-31
0            2NE1_zh.wikipedia.org_all-access_spider          18          11           5          13          14           9           9          22          26          24          19          10          14          15           8          16           8           8          16           7          11          10          20          18          15          14          49          10          16          18           8           5           9           7          13           9           7           4          11          10           5           9           9           9           9          13           4          15          25           9           5           6          20           3          14          46           5           5          13           4           9          10           9          11          11          11           9          15           5          10           7           4           8           9          10           6          13          16           6          24           9          11          12           8          14           6           6          11          14           6          10          20           7          15           8          15           5           8           8           5          11         165          34           6          13           8           9          11          26          18           3           5          12           6          16          19           9          10          11          11           7           9          10          24           6           6           8          16          13          10          10           6           5          20           6          47           9           9          12          11          17          15          14          11          97          11          12          11          14          15          12         104           5          22          45          75          29          34          20          12          25           9          62          20          19           8          23          13          16          34          36          11          18          12          24          30          27          44          35          53          11          26          13          18           9          16           6          19          20          19          22          30          14          16          22          15          15          26          16          13          27          18          13          32          31          16          38          18           9          14          10          24           8          15          18          10          23          17          11          26          14           8          12           9          11          34          17          29          11           9          14          21          12          11          13          11          13          16          13          19          21          14          11          35          18          42          15           5          21          56           9          20          17          18           8           9          17           9          10          14          17           6          18          13          11          12          11           8          15          11          20          59          11          18          17          12          14          13           9         490         189         102          38         126          71          21          57          79          17          17          23          16          23          18          22          44           6          31          17          25          40          19          15          15          29          18          16          13          20          22          19          11          50          22          39          23          21          23          22          16          19          35          16          12          15          13          14          10          21          20          19          14          12          15          17          16          21          27          13          11          15          14          18          18          10          11          14          18          14          13          17          15          14         234           8          62          26          22           8          22          15          69          11          18          23          12          20          17          15          16          18          21          15          30         115          56          45          17          18          15          18          14          15          15          24          22          18          30          12          13          18          17          31          26          29          12          19          19          57          17          20          49          10          19          26          41          23          30          55          17          24          14          12          49          42          37          13          30          20          33          20          14          40          15          18          26           8          25          21          20          25          19          23          18          19          18          55          16          65          11          11          13          20          21          13          24          20          13          32          16          10          13          44          17          13          72          40          19          14          13          12          14          10          26          13          22          14          23          12           8          50          13          10          16          14          10          24          10          20          10          26          25          16          19          20          12          19          50          16          30          18          25          14          20           8          67          13          41          10          21          13           8          15          14          12           6          11          10          42          21          24          14          11         204          14          45          33          28          18          14          47          15          14          18          20          14          16          14          20          60          22          15          17          19          18          21          21          47          65          17          32          63          15          26          14          20          22          19          18          20
1             2PM_zh.wikipedia.org_all-access_spider          11          14          15          18          11          13          22          11          10           4          41          65          57          38          20          62          44          15          10          47          24          17          22           9          39          13          11          12          21          19           9          15          33           8           8           7          13           2          23          12          27          27          36          23          58          80          60          69          42         161          94          77          78          20          24          13          14          26           8          82          22          11          81          37           9          40          47          18          23           6           2           7          16          10          34          14          31          20          23          14          16          34          15          30          13          30          15          25          17           8          12          17          10          21          18          30          13           7          15          23          20          15           9          47          14          11          16          12           7          15          14          12          18          29          39          11          14          28          17          20          17          36          13          11          14          14          14          33          14          13          18          13          11           8          10          11          81          14          20           6          16          18           9          12          10           8          11          14          47          13          13           6          10           8           8           8          18          31          16          15          10          13           9          32         161           6          20           8          11          13           8          19           7           9          16          11           6          38          11          17          13          12          12           9           7          15          14          14          11          13          12          12          24          15          38          18          26          15          12          14          40          19          13          39          19          16          19          11          76          14          19          26          19          17          30          17          17          17          19          11         175          10           5          12           7          12          14          19          11          19          17          15          19          15           9          20           6          11           6          15          20          35          34          21          17          22          26          16          16          28          19          17          15          11           7          15          11          36          16          22          18          46          17          15          17          12          17          14          15          14          15          28          36          23          12          25          18          18          16          20          17          16          13          15          19          14          20          37          16          15          11          42          10          14          61          39          17          17          41          35          16           9          64          22          22          66          33          30          16          18          45          17          88          23          18          12          12          13          13           5          11          13          11          22          10          13          17          10          14          18           9          16          17           6          15          18          10          11          16          10          12          12          13           9          16          19          19          11          15          10          20          25           9          14          10          14          18          25          13          24          14          13          14          24          16          15          13          11          12          28          28          17          27          48         184          64          24          92          31          34          49          21          36          32          16          16          19          22          22          19          18          18          17          35          49          19          25          24          39          19          29          30          16          54          15          39          19          17          60          12          77          63          12           9          34          30          13          20          29          10          14          23          15          12          25          22         144          31          31          17          66          78          19          44          43          35          13          13          25          15          37          38          22          28          19          46          24          22          43          58          26          20          27          35          20          31          24          24          94          18          20          18          16          38          54          29          49          25          72         144          36          97         179          29          12          21          42          53          41          19          25          19          15          21          21          27          33          15          24          13          11          14          26          11          21          14          14          54           5          10          12          11          14          28          23          20           9          12          11          14          14          15          15          11          20          13          19         621          57          17          23          19          21          47          28          22          22          65          27          17          17          13           9          18          22          17          15          22          23          19          17          42          28          15           9          30          52          45          26          20
2              3C_zh.wikipedia.org_all-access_spider           1           0           1           1           0           4           0           3           4           4           1           1           1           6           8           6           4           5           1           2           3           8           8           6           6           2           2           3           2           4           3           3           5           3           5           4           2           5           1           4           5           0           0           7           3           5           1           6           2           5           0           3           1           0           1           1           2           4           2           1           1           3           4           3           6           6           4           3           3           2           9           7           2           3           1           3           1           6           7           1           2           5           2           3           8           5           0           4           1           5           3           0           1           8           2           1           3           0           0           5           3           3           0           2           5           2           5          10           5           6           1           4           4           1           3          13           2           1           3           2           1          10           5           6           2           5           2           2           3           2           6           3           2           1           2           3           1           1           2           2           3           2           2           5           7           2           3           4           6           1           3           6           3           3           4           2           2           4           3           1           5           5           4           2           4           5           4           2           1           6           1           1           3           1           3           5           3           3           0           5           3           2           2           2           2           0           3           3           3           4           4           8           3           5           8           1           4           0           3           6           3           1           3           3           3           1           3           8           4           3           2           5           6           3           6           5           6           7           3           1           5           1           2           0           1           4           3           3           9           4           7           5          10           2           3           3           4           2           3           5           3           6           4           5           5           2           1           4           7           2           2           5           1           0           3           3           1           2           4           2           2           3           4           7           1           1          10           9           5           1           6           7           4           6           2           4         155         155          83          48          31          16           6          13           8           8           5           7           3           4           6           7          10           9           7           8           4           6           5           2           7           3           7           6           3           1           6           2           1           3           8           3           5           4           7           5           2           5           0           3          12           4           2           4           6           4           5           9           4           5           7           1           5           1           5           4           5           7           7           5           3           4           1           9           3           4           6           2           2           1          16           6           3           3           6           1           6           1           4           3           5           1           6           5           1           4           5           4           2           4           3           4           2           0           1           3          12           4           7           5           6           6           6           3           3           3           5           5           2          11           6           2           2           3           7           5           4           5           3           3           9           7           2           1           5           6           7          13           3           5           6           2           4           1           2           7           2           2           4           4           2           5           3           2           3           5           4           2           5           7           5           2           7           6          11          10           5          19           7          11           4          10           3           4           6           3           4           8          10           3           3           1          10           5           4           4           3           4           1           3           6           6           6           3           5          11           6           3           7           6           0           2           4           4           3           6           4           3           4           1           6           5           5           2           3           3           2           2           6           1           3           3           3           2          10           2           2           2           7           3           6           4           2           4           6           5           4           4           3           3           9           3           5           4           0           1           4           5           8           8           1           1           2           5           3           3           3           7           3           9           8           3         210           5           4           6           2           2           4           3           3           1           1           7           4           4           6           3           4          17
3         4minute_zh.wikipedia.org_all-access_spider          35          13          10          94           4          26          14           9          11          16          16          11          23         145          14          17          85           4          30          22           9          10          11           7           7          11           9          11          44           8          14          19          10          17          17          10           7          10           1           8          27          19          16           2          84          22          14          47          25          14          11          12          27           8          17          43           3          19          14          20          43           4           5          37          23          14          12          13          22          12          12           6          27           5           7          24           8           9          10          12          19           7           7          18          15           7           9          10           9          14           8          17           6           8           7           5           3           9           5           6           8           8          11           6           7          28          15           8           7           7          12           5          11           3           7          23           6           3           8           8          39           4          10           6           8           9          16           9           8           8           7           5           5          12           8          15           9          12           5           7           6          12           7           6          33           5          11           6           4          32           9          17           2          10          10           5           7          11           8          10           6          17          11          20          11          15          18          10          15          12          12          12           8          13           9          11           4          12           9           6          12           9           9           6           7           7          11           7          14           9          21           9          10          13          10          13          16           8          10           7          13          18           8          50           8          33           6          22           9          84          28          11           7          14          16          49          71          29          22           6          34          16          14           9          12          24          18           8          26           8           8          13          21           9          10          14          12           9          10          20          15          26          24          19          10          12           8          16          13           8          17          12          34          10           9           9          15          10          12           8          11           9          28          17          11          13          10          10          10          16          12          12          13          25          25          18          18          23          27          39          11          16           9          26          14          15          10          23          17          74         114           8          15          15          15          12          14          14          23          21          11          19           9          10          11          14           9           5          10          20          22          16           9          10          42          22           7           7          54           7           9          13           5          10          12          18          23          23          17           6          14          13          13           9          11          35           8          12          15          10          25           9           8           8          10          14           9          11         303          29         121          69          39          25          27          54          39          24          22          20          14          12           8          17          11          15          19          20          11          36          19          35          22          14          17          15          12          34          20          25          15          18          19          13          17          16          11          22          43           8          13          16           8          19          14           9          13          13          16          10          10          11          17          32          21          16          23          15          55          17          17          15           7          13          11          11           8          22           5           7          18           9          13          27          15          19           7           9          14          14           9          16          11           7          14          13          11           9           9           9          11          15          28          10          24           8          20          19          12          31          14           9          40          15          83          60          19          15          15          12          23          17          20          26          11          13           9          44           7          18           4          36          34          10           8          21           7           6          12          15           9          13          21          13          10          21          15         103          22          15          12          11          15           7          12          13           9           8          21          16          38          13          14          17          26          14          10           9          23          15           7          10           7          10          14          17          11           9          11           5          10           8          17          13          23          40          16          17          41          17           8           9          18          12          12          18          13          18          23          10          32          10          26          27          16          11          17          19          10          11
4  52_Hz_I_Love_You_zh.wikipedia.org_all-access_s...           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0           0          38         159           9           4           1          10           9           2           0           5           0           3          55         234          57           5           4           4           0           9           9           6           6           6          10           7           5           4           6           4           2           6           5           3           3           2           5           5           8           8           6           3           7           7           6           6           2           8           3           7           8           3           4           5           2           1           1           1           2           8           6           1           0           4           2           6           2           2           2           1           5           2           2           2           3          10           1           3           4           2           3           4           1           1           9           0           1           6           2           5           2           2           3           2          11           1           4           4           2          10           5           3          10           2           5           7           2           5           8           2           5           1           1           2           6           6           2           1           3           2           3           4           3           2           0          13           4           2           4           3           3           1           3           5           2           3           2           4           3          39           4           3           1           5           5           5           5           8          15          13          63           2           2           3           6          10           2           8           4           3           3           6           4           1           5           9           1           6           4           0           4           9           6           8          13           4           7           6           9           3          21           6          13          10           2           3           6           7          10           6           6           4         173           5          10          10          18          20          11           5           6          33          13          10          22          11           8           4          10          13          11           8           6          10          14           6           9           6          16          14          13          15          14          16           9         178          64          12          10          11           6           8           7           9           8           5          11           8           4          15           5           8           8           6           7          15           4          11           7          48           9          25          13           3          11          27          13          36          10
'''


def get_language(page):
    res = re.search('[a-z][a-z].wikipedia.org', page)
    if res:
        return res.group()[0:2]
    else:
        return 'na'


train['lang'] = train['Page'].map(get_language)
# print(train.head())
from collections import Counter

# print(Counter(train['lang']))
# Counter({'zh': 19, 'en': 13, 'ja': 11, 'ru': 10, 'de': 8, 'fr': 7, 'es': 6})

lang_sets = {}
lang_sets['en'] = train[train.lang == 'en'].iloc[:, :-1]
lang_sets['ja'] = train[train.lang == 'ja'].iloc[:, :-1]
lang_sets['de'] = train[train.lang == 'de'].iloc[:, :-1]
lang_sets['fr'] = train[train.lang == 'fr'].iloc[:, :-1]
lang_sets['zh'] = train[train.lang == 'zh'].iloc[:, :-1]
lang_sets['ru'] = train[train.lang == 'ru'].iloc[:, :-1]
lang_sets['es'] = train[train.lang == 'es'].iloc[:, :-1]

sums = {}
for key in lang_sets:
    sums[key] = lang_sets[key].iloc[:, 1:].sum(axis=0) / lang_sets[key].shape[0]
# print(sums)

days = [r for r in range(sums['en'].shape[0])]

# 不同國家詞頻的點選情況
# fig = plt.figure(1, figsize=[10, 10])
# plt.ylabel('Views per Page')
# plt.xlabel('Day')
# plt.title('Pages in Different Languages')
# labels = {'en': 'English', 'ja': 'Japanese', 'de': 'German', 'fr': 'French',
#           'zh': 'Chinese', 'ru': 'Russian', 'es': 'Spanish'
#           }
# for key in sums:
#     plt.plot(days, sums[key], label=labels[key])
# plt.legend()
# plt.show()

# 不同詞條的點選量
# def plot_entry(key, idx):
#     data = lang_sets[key].iloc[idx, 1:]
#     fig = plt.figure(1, figsize=(10, 5))
#     plt.plot(days, data)
#     plt.xlabel('day')
#     plt.ylabel('views')
#     plt.title(train.iloc[lang_sets[key].index[idx], 0])
#     plt.show()
#
#
# for idx in range(5, 10):
#     plot_entry('en', idx)

# 檢視每個國家關注的熱點

top_pages = {}  # 每個國家top1熱點


def national_hot(key):
    sum_set = pd.DataFrame(lang_sets[key][['Page']])
    sum_set['total'] = lang_sets[key].sum(axis=1)
    sum_set = sum_set.sort_values(by='total', ascending=False)
    top_pages[key] = sum_set.index[0]
    print('-----', key, '-----')
    print(sum_set.head(5))


for key in lang_sets:
    national_hot(key)

# 每個國家最熱的話題
for key in top_pages:
    fig = plt.figure(1, figsize=(10, 5))
    cols = train.columns
    cols = cols[1:-1]
    data = train.loc[top_pages[key], cols]
    plt.plot(days, data)
    plt.xlabel('Days')
    plt.ylabel('Views')
    plt.title(train.loc[top_pages[key], 'Page'])
    plt.show()
import numpy as np
import pandas as pd

# rng = pd.date_range(start='2017-01-01', periods=10, freq='3D')
# print(rng)

# print(np.random.randint(low=1, high=20, size=10))

time = pd.Series(np.random.randint(low=1, high=20, size=10),
                 index=pd.date_range(start='2017-01-02', periods=10, freq='3D'))
time = time.truncate(before='2017-01-05')
# print(time)
# print(time['2017-01-05':'2017-01-20'])

p1 = pd.period_range('2016-01-01 10:10', freq='10H', periods=10)
# print(p1)








相關推薦

Python資料分析機器學習-Python時間序列分析

原始碼下載地址: http://download.csdn.net/download/adam_zs/10224873 from __future__ import absolute_import, division, print_function # http://w

Python資料分析機器學習-Python分析科比生涯資料

原始碼下載:http://download.csdn.net/download/adam_zs/10222492 import matplotlib.pyplot as plt import pandas as pd import numpy as np '''科比生涯

Python資料分析機器學習實戰 Numpy/Pandas/Matplotlib等常用庫精講

課程簡介:     課程風格通俗易懂,真實案例實戰。精心挑選真實的資料集為案例,通過Python資料科學庫numpy,pandas,matplot結合機器學習庫scikit-learn完成一些列的機器學習案例。課程以實戰為基礎,所有課時都結合程式碼演示如何使用這些python

python資料分析(預測性分析機器學習

本文涉及到的主題如下所示: 預處理基於邏輯迴歸的分類基於支援向量機的分類基於ElasticNetCV的迴歸分析支援向量迴歸基於相似性傳播均值漂移演算法遺傳演算法神經網路決策樹演算法1、預處理 在上一章,我們已經做過一次預處理,即過濾掉停用詞。一些機器學習演算法對某些資料比

Python資料分析機器學習-使用sklearn構造決策樹模型

# datasets包括內建的資料集 california_housing房價的資料集 from sklearn.datasets.california_housing import fetch_california_housing import pandas as pd

【A】python資料分析機器學習實戰 專案開始前,怎麼根據實際專案選擇合適的機器學習模型

選擇合適的機器學習演算法: 最好的演算法的唯一路徑可能就是去嚐遍所有的演算法。但是這種方法非常 “蠢”。 機器學習型別 這部分我們會介紹一些最流行的機器學習模型型別。如果你對這些類別比較熟悉,那麼對你以後去選擇機器學習模型是非常有利的。 監督學習

Python資料分析機器學習-SVM調參例項

import numpy as np import matplotlib.pyplot as plt from scipy import stats from sklearn.svm import SVC from sklearn.datasets.samples_gene

【A-003】python資料分析機器學習實戰 Python科學計算庫 Pandas資料分析處理庫(四)DataFrame資料結構

pandas資料結構:DataFrame 引入: 在上一節中已經介紹過了Series物件,Series物件可以理解為由一列索引和一列值,共兩列資料組成的結構。而DataFrame就是由一列索引和多列值組成的結構,其中,在DataFrame中的每一列都是一個S

Python資料分析機器學習-使用者流失預警

import pandas as pd import numpy as np pd.set_option('display.height', 9999) pd.set_option('display.max_rows', 9999) pd.set_option('disp

Python資料分析機器學習-Numpy

import numpy world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype=str) print(type(world_alcohol)) print(world_alco

高端實戰 Python數據分析機器學習實戰 Numpy/Pandas/Matplotlib等常用庫

反向傳播 數據讀取 初識 微信 試圖 ada 安裝python 改進 貝葉斯 課程簡介:? ? 課程風格通俗易懂,真實案例實戰。精心挑選真實的數據集為案例,通過Python數據科學庫numpy,pandas,matplot結合機器學習庫scikit-learn完成一些列的

Python大數據機器學習之NumPy初體驗

clas 自己 入門 left ray spark AC DC 精通 本文是Python大數據與機器學習系列文章中的第6篇,將介紹學習Python大數據與機器學習所必須的NumPy庫。 通過本文系列文章您將能夠學到的知識如下: 應用Python進行大數據與機器學習

2017年資料分析機器學習實戰到經典案例全套高清視訊教

課程特點:1. 通俗易懂,快速入門對機器學習經典演算法結合數學推導進行形象解釋,例項演示。2. Python主導,實用高效使用資料領域最主流語言Python及其分析與建模庫作為課程核心工具。3. 案例為師,實戰護航基於真實資料集,從零開始結合Python工具與機器學習演算法完

資料機器學習 基礎篇 關聯分析

關聯規則是人類在認識客觀事物中形成的一種認知模式。這種關聯規則在人的認知裡與反射類似。如在小時候不小心被針扎到,會有痛感,這樣針刺和痛感就在大腦裡有了這種關聯。這就是人在認識事物的過程中在認知中所建立的關聯規則,即通過與客觀事件互動發現事物之間存在的依賴或因果關

Cloudera成立基金會,運用資料分析機器學習改善人們的生活

2017年10月9日,北京 –為雲端計算優化的機器學習和分析平臺供應商Cloudera近期宣佈旗下慈善組織——Cloudera基金會(the Cloudera Foundation)正式成立,進一步強調其在運用大資料為社會公益服務方面所做出的承諾。Clouder

資料結構演算法學習筆記之如何分析一個排序演算法?

前言 現在IT這塊找工作,不會幾個演算法都不好意思出門,排序演算法恰巧是其中最簡單的,我接觸的第一個演算法就是它,但是你知道怎麼分析一個排序演算法麼?有很多時間複雜度相同的排序演算法,在實際編碼中,那又如何選擇呢?下面我們帶著問題一起學習一下。  正文 一、常見經典的排序方法 (圖片來自於一畫素)

機器學習:sklearn資料機器學習組成

二、模型的選擇演算法是核心,資料和計算是基礎。這句話很好的說明了機器學習中演算法的重要性。那麼我們開看下機器學習的幾種分類:監督學習分類 k-近鄰演算法、決策樹、貝葉斯、邏輯迴歸(LR)、支援

機器學習演算法 - 時間序列系1 -時序模式概念

時序模式 1 時間序列演算法 2 時間序列的預處理 2.1 平穩性檢驗 2.2 純隨機性檢驗 3 平穩時間序列分析 3.1 AR模型 3.2 MA模型 3.3 ARMA模型 3.4 平穩

python轉型資料分析機器學習、人工智慧學習路線

最近1年的主要學習時間,都投資到了 python 資料分析和資料探勘上面來了,雖然經驗並不是十分豐富,但希望也能把自己的經驗分享下,幫助到更多想轉行python資料分析和人工智慧的朋友,給廣大同學朋友規劃個適合學習規劃。 我大學學習的應用化學,後來畢業做了2年全棧設計師(PS:設計和前端

Python資料分析挖掘學習筆記(2)使用pandas進行資料匯入

一、匯入pandas模組:   import pandas as pda 二、匯入CSV格式資料: #資料匯入 i=pda.read_csv("E:/hexun.csv")     可對匯入的資料進行統計以及按列排序: #統計 i.describe() #排序 i