import matplotlib as mpl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('nflx_2014_2023.csv')
df.head(5)

df.shape
df.info()
df.describe().T

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2516 entries, 0 to 2515
Data columns (total 20 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   date            2516 non-null   object 
 1   open            2516 non-null   float64
 2   high            2516 non-null   float64
 3   low             2516 non-null   float64
 4   close           2516 non-null   float64
 5   volume          2516 non-null   int64  
 6   rsi_7           2516 non-null   float64
 7   rsi_14          2516 non-null   float64
 8   cci_7           2516 non-null   float64
 9   cci_14          2516 non-null   float64
 10  sma_50          2516 non-null   float64
 11  ema_50          2516 non-null   float64
 12  sma_100         2516 non-null   float64
 13  ema_100         2516 non-null   float64
 14  macd            2516 non-null   float64
 15  bollinger       2516 non-null   float64
 16  TrueRange       2516 non-null   float64
 17  atr_7           2516 non-null   float64
 18  atr_14          2516 non-null   float64
 19  next_day_close  2516 non-null   float64
dtypes: float64(18), int64(1), object(1)
memory usage: 393.2+ KB

#t 典型的股票价格等于最高、最低和收盘价格的平均值
df['typical'] = df[['high', 'low','close']].mean(axis=1)
df_timeseries = df[['date','typical']]
df_timeseries

df_timeseries['date'] = pd.to_datetime(df_timeseries['date'])
df_timeseries.set_index('date', inplace=True)
plt.plot(df_timeseries)
plt.ylabel('Typical Price')
plt.title('Typical Stock Price Over Time')
plt.show()

# 按月重新抽样数据
monthly_data = df_timeseries.resample('M').mean()
plt.plot(monthly_data)
plt.ylabel('Typical Price')
plt.title('Typical Stock Price Over Time by Month')
plt.show()

# 提取时间序列值
ts_values = monthly_data['typical'].values
# 执行ADF检验
result = adfuller(ts_values)
# 打印测试结果
print('ADF Statistic:', result[0])
print('p-value:', result[1])
print('Critical Values:')
for key, value in result[4].items():
    print(f'\t{key}: {value}')

ADF Statistic: -1.538395370579076
p-value: 0.5144394812928306
Critical Values:
	1%: -3.4870216863700767
	5%: -2.8863625166643136
	10%: -2.580009026141913

start_date = "2018-01-01"
end_date = "2023-12-31"
mask = (df_timeseries.index > start_date)
df_timeseries_filtered = df_timeseries[mask]
df_timeseries_filtered

# 按月重新抽样数据
monthly_data = df_timeseries.resample('M').mean()
plt.plot(monthly_data)
plt.ylabel('Typical Price')
plt.title('Typical Stock Price Over Time by Month')
plt.show()

# 提取时间序列值
ts_values = monthly_data['typical'].values
# 执行ADF检验
result = adfuller(ts_values)
# 打印测试结果
print('ADF Statistic:', result[0])
print('p-value:', result[1])
print('Critical Values:')
for key, value in result[4].items():
    print(f'\t{key}: {value}')

ADF Statistic: -1.538395370579076
p-value: 0.5144394812928306
Critical Values:
	1%: -3.4870216863700767
	5%: -2.8863625166643136
	10%: -2.580009026141913

# 假设您的时间序列数据存储在名为“ts”的系列中。
# Plot ACF
plot_acf(ts_values_2, lags=15)
plt.title('Autocorrelation Function (ACF)')
plt.show()

# 进行季节性分解
decomposition = seasonal_decompose(df_timeseries_filtered_monthly, model='additive')
plt.figure(figsize=(10, 8))
# 原始时间序列
plt.subplot(411)
plt.plot(df_timeseries_filtered_monthly, label='Original')
plt.legend()
# 趋势组件
plt.subplot(412)
plt.plot(decomposition.trend, label='Trend')
plt.legend()
# 季节性的组件
plt.subplot(413)
plt.plot(decomposition.seasonal, label='Seasonal')
plt.legend()
# 残差分量
plt.subplot(414)
plt.plot(decomposition.resid, label='Residual')
plt.legend()
plt.tight_layout()
plt.show()

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[27], line 3
      1 # 进行季节性分解
----> 3 decomposition = seasonal_decompose(df_timeseries_filtered_monthly, model='additive')
      5 plt.figure(figsize=(10, 8))
      7 # 原始时间序列

File D:\Python310\lib\site-packages\statsmodels\tsa\seasonal.py:163, in seasonal_decompose(x, model, filt, period, two_sided, extrapolate_trend)
    161 if period is None:
    162     if pfreq is not None:
--> 163         pfreq = freq_to_period(pfreq)
    164         period = pfreq
    165     else:

File D:\Python310\lib\site-packages\statsmodels\tsa\tsatools.py:822, in freq_to_period(freq)
    820     return 24
    821 else:  # pragma : no cover
--> 822     raise ValueError(
    823         "freq {} not understood. Please report if you "
    824         "think this is in error.".format(freq)
    825     )

ValueError: freq ME not understood. Please report if you think this is in error.

ts = df_timeseries_filtered_monthly['typical']
# 定义并拟合SARIMA模型
model = SARIMAX(ts, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
results = model.fit()
# 预测未来值
forecast = results.forecast(steps=12)  # 预测未来一年
# 绘制原始数据和预测
plt.plot(ts.index, ts, label='Original Data')
plt.plot(forecast.index, forecast, label='Forecast')
plt.xlabel('Date')
plt.ylabel('Typical Stock Price')
plt.title('Time Series Forecasting with SARIMA')
plt.legend()
plt.show()

	date	open	high	low	close	volume	rsi_7	rsi_14	cci_7	cci_14	sma_50	ema_50	sma_100	ema_100	macd	bollinger	TrueRange	atr_7	atr_14	next_day_close
0	2014-01-02	52.401428	52.511429	51.542858	51.831429	12325600	34.729664	49.183584	-89.573201	-131.288579	50.112828	50.235157	46.385428	46.650698	0.751929	52.607357	1.052857	1.161182	1.247748	51.871429
1	2014-01-03	52.000000	52.495712	51.842857	51.871429	10817100	35.587886	49.457208	-65.820581	-103.026189	50.228771	50.299327	46.537571	46.754726	0.624259	52.656143	0.664283	1.090197	1.206072	51.367142
2	2014-01-06	51.889999	52.044285	50.475716	51.367142	15501500	29.820674	46.087900	-121.472559	-139.640566	50.312571	50.341203	46.680971	46.846621	0.476890	52.666928	1.568569	1.158535	1.231965	48.500000
3	2014-01-07	49.684284	49.698570	48.152859	48.500000	36167600	14.371863	32.522091	-206.762171	-238.029120	50.336228	50.268997	46.791957	46.879558	0.127277	52.560214	3.214283	1.452214	1.373559	48.712856
4	2014-01-08	48.104286	49.425713	48.074287	48.712856	20001100	18.049045	34.073549	-117.836707	-180.766801	50.373257	50.207969	46.917071	46.916075	-0.131106	52.455357	1.351426	1.437815	1.371978	48.150002

	count	mean	std	min	25%	50%	75%	max
open	2516.0	2.744558e+02	1.660051e+02	4.460571e+01	1.099825e+02	2.880000e+02	3.845425e+02	6.923500e+02
high	2516.0	2.786389e+02	1.682052e+02	4.584286e+01	1.118875e+02	2.926900e+02	3.913175e+02	7.009900e+02
low	2516.0	2.701246e+02	1.636123e+02	4.278571e+01	1.071175e+02	2.826600e+02	3.777950e+02	6.860900e+02
close	2516.0	2.744870e+02	1.659030e+02	4.488714e+01	1.100650e+02	2.882300e+02	3.845600e+02	6.916900e+02
volume	2516.0	1.049230e+07	9.173072e+06	1.144000e+06	5.017050e+06	7.795950e+06	1.299060e+07	1.333875e+08
rsi_7	2516.0	5.339958e+01	1.776386e+01	4.374756e+00	4.001592e+01	5.395392e+01	6.715112e+01	9.630571e+01
rsi_14	2516.0	5.342408e+01	1.308827e+01	9.152344e+00	4.373117e+01	5.366803e+01	6.337870e+01	9.154787e+01
cci_7	2516.0	1.192076e+01	1.008320e+02	-2.333333e+02	-7.456580e+01	2.563277e+01	9.421276e+01	2.333333e+02
cci_14	2516.0	1.570504e+01	1.113966e+02	-4.240129e+02	-7.187032e+01	2.716595e+01	1.003219e+02	3.567957e+02
sma_50	2516.0	2.704167e+02	1.644050e+02	4.967994e+01	1.076279e+02	2.860346e+02	3.765922e+02	6.485930e+02
ema_50	2516.0	2.705223e+02	1.634548e+02	4.940977e+01	1.059231e+02	2.873826e+02	3.812841e+02	6.427655e+02
sma_100	2516.0	2.667604e+02	1.631895e+02	4.638543e+01	1.056630e+02	2.876342e+02	3.657409e+02	6.167718e+02
ema_100	2516.0	2.668134e+02	1.610500e+02	4.665070e+01	1.024905e+02	2.913791e+02	3.693439e+02	6.107501e+02
macd	2516.0	1.151103e+00	9.655979e+00	-5.923381e+01	-1.929731e+00	1.535425e+00	5.693881e+00	2.434734e+01
bollinger	2516.0	2.728651e+02	1.652599e+02	4.764357e+01	1.066550e+02	2.844448e+02	3.807084e+02	6.721270e+02
TrueRange	2516.0	9.249735e+00	8.414224e+00	5.999980e-01	3.102141e+00	7.830006e+00	1.271501e+01	1.361000e+02
atr_7	2516.0	9.231142e+00	6.141865e+00	1.090197e+00	3.349633e+00	9.445428e+00	1.322733e+01	3.522181e+01
atr_14	2516.0	9.206930e+00	5.898383e+00	1.206072e+00	3.351988e+00	9.852787e+00	1.321154e+01	2.780988e+01
next_day_close	2516.0	2.746526e+02	1.658886e+02	4.488714e+01	1.101225e+02	2.882850e+02	3.848575e+02	6.916900e+02

	date	typical
0	2014-01-02	51.961905
1	2014-01-03	52.069999
2	2014-01-06	51.295714
3	2014-01-07	48.783810
4	2014-01-08	48.737619
...	...	...
2511	2023-12-22	489.410004
2512	2023-12-26	489.683339
2513	2023-12-27	491.686666
2514	2023-12-28	490.823344
2515	2023-12-29	487.016673

	typical
date
2018-01-02	199.380000
2018-01-03	204.253337
2018-01-04	205.560003
2018-01-05	208.533335
2018-01-08	210.996668
...	...
2023-12-22	489.410004
2023-12-26	489.683339
2023-12-27	491.686666
2023-12-28	490.823344
2023-12-29	487.016673

📘 基于SARIMA时间序列模型预测Netflix股票未来趋势/Netflix股票未来趋势.ipynb