三个策略都判断股票上涨(做多股票,则股:债=0.5:0.5
三个中有两个策略判断做多股票信号 ,则股:债=0.4:0.6
三个中有一个策略判断做多股票信号 ,则股:债=0.3:0.7
否则,股:债=0.1:0.9
""" 2020.09.25 15:27
@zp
数据端,既用到了153数据库,也用到了Tushare Pro数据库
显然,加入的条件过多,必定导致过拟合问题,(由于未留出测试集,因此,可以肯定的是过拟合问题存在,但却无法量化验证)
"""
# coding=utf-8
import math
import tushare as ts
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import talib
import pandas as pd
from datetime import datetime, date
import pymysql
import threading
from queue import Queue
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
sns.set(style="darkgrid", palette="muted", color_codes=True)
from scipy import stats,integrate
%matplotlib inline
sns.set(color_codes=True)
matplotlib.rcParams['axes.unicode_minus']=False
plt.rcParams['font.sans-serif']=['SimHei']
ts.set_token(' ')
pro = ts.pro_api()
#移动均值策略部分
def mean_fun():
code1,code2,test_start,test_end,t1,t2,t3,t4,t5,n=["000300.SH","000012.SH","20050101","20550101",50,70,90,110,130,1]
def dataread():
df_stock=pro.index_daily(ts_code=code1, start_date=test_start, end_date=test_end, fields='close,trade_date')
df_bond=df=pro.index_daily(ts_code=code2,start_date=test_start, end_date=test_end , fields='trade_date,close')
return df_stock,df_bond
df_stock,df_bond=dataread()
#计算均值,时间为t1 t2 t3 t4
def mean(t):
df_stock.index=pd.to_datetime(df_stock.trade_date)
return df_stock.close.sort_index().rolling(window=t).mean()
def ret_base():
df_stock.index=pd.to_datetime(df_stock.trade_date)
df_bond.index =pd.to_datetime(df_bond.trade_date)
ret_stock=(df_stock.close-df_stock.close.shift(-1))/df_stock.close.shift(-1)
ret_bond= (df_bond.close- df_bond.close.shift(-1))/df_bond.close.shift(-1)
return ret_stock,ret_bond.sort_index()
def ret_same_time(x):
return x[x.index>=mean(max(t1,t2,t3,t4,t5)).dropna().index[0] ]
ret_stock=ret_same_time(ret_base()[0]).sort_index()#ret_base()[0][ret_base()[0].index>=mean(max(t1,t2,t3,t4,t5)).dropna().index[0] ]
ret_bond= ret_same_time(ret_base()[1] )#ret_base()[1][ret_base()[1].index>=mean(max(t1,t2,t3,t4,t5)).dropna().index[0] ]
DF=ret_same_time(df_stock.close).sort_index()
return ret_same_time(mean(t1)),ret_same_time(mean(t2)),ret_same_time(mean(t3)),ret_same_time(mean(t4)),ret_same_time(mean(t5)),DF
mean1,mean2,mean3,mean4,mean5,DFF=mean_fun()
#估值pe_ttm部分
def fun():
start,end,code1,code2,t,T,start1,end1=["20140101","20550101","000300.SH","000012.SH",10,1400,"20080901","20550101"]
#读取数据
def dataread():
df_base=pro.index_dailybasic(ts_code=code1,start_date=start1,end_date=end1, fields='trade_date,pe_ttm,pb')
df_stock=pro.index_daily(ts_code=code1, start_date=start, end_date=end, fields='close,trade_date')
df_bond=df=pro.index_daily(ts_code=code2,start_date=start, end_date=end , fields='trade_date,close')
return df_base,df_stock,df_bond
df,df_stock,df_bond=dataread()
class prepare:
#计算单日收益率
def ret_base(self):
df_stock.index=pd.to_datetime(df_stock.trade_date)
ret_stock=(df_stock.close-df_stock.close.shift(-1))/df_stock.close.shift(-1)
df_bond.index=pd.to_datetime( df_bond.trade_date )
ret_bond=(df_bond.close-df_bond.close.shift(-1))/df_bond.close.shift(-1)
return ret_stock,ret_bond
#计算pe均值,标准差及滚动均值
def data_fun(self,freq,df):
df.index=pd.to_datetime(df.trade_date )
df=df.sort_index()
df_std=df.sort_index().rolling(window=T).std()[df.index>=start]#[df.index<=end]
df_std=df_std[df_std.index<=end]
df_pe_std=df_std.pe_ttm
df_pb_std=df_std.pb
df_mean=df.sort_index().rolling(window=T).mean()[df.index>=start]#[df.index<=end]
df_mean=df_mean[df_mean.index<=end]
df_pe_mean=df_mean.pe_ttm
df_pb_mean=df_mean.pb
df_roll=df.sort_index().rolling(window=t).mean()[df.index>=start]#[df.index<=end]
df_roll=df_roll[df_roll.index<=end]
df_pb_roll_mean=df_roll.pb
df_pe_ttm_roll_mean=df_roll.pe_ttm
return df_pe_mean,df_pb_mean,df_pe_std,df_pb_std,df_pe_ttm_roll_mean,df_pb_roll_mean
mean1,mean2,std1,std2,mean_roll1,mean_roll2=prepare().data_fun(t,df)
return mean1,mean2,std1,std2,mean_roll1,mean_roll2
mean_pe,mean_pb,std1,std2,mean_roll1,mean_roll2=fun()
#美林时钟部分
class read_data:
def fetchData(self, query, q, name, db, host='192.168.0.153', user='jcyj',password='jcyjQwer'):
'get data from mysql database'
conn = pymysql.connect(host, user, password, database=db, charset='utf8', cursorclass=pymysql.cursors.DictCursor)
cursor = conn.cursor()
res = cursor.execute(query)
fetch = cursor.fetchall()
data = pd.DataFrame(fetch)
q.put((name, data))
def multithread(self, dataList, db):
threads = []
q = Queue()
for key, value in dataList.items():
t = threading.Thread(target=self.fetchData, args=(value, q, key, db)) # initiate a thread
t.start() # activate a thread
threads.append(t) # collect threads list
for thread in threads:
thread.join()
result = []
for _ in range(len(threads)):
result.append(q.get())
result = {i[0]: i[1] for i in result}
return result
def readhgyz(self, db='py_daziguan_2_1'):
baseQuery = "select report_date , shibor_1w "\
" from py_hgyz_pbc_shanghai_shibor_statistics_month where DATE(report_date) > '2005-12-01' "
data = self.multithread({'base': baseQuery},db)
return data
def readetl1(self, db='py_etl'):
baseQuery = "select trading_date , bond_1y "\
" from py_etl_risk_free_rate_daily_2_1 where DATE(trading_date) > '2006-04-30'"
data = self.multithread({'base2': baseQuery},db)
return data
BO1=read_data()
BO1=BO1.readetl1()["base2"]
BO1.set_index('trading_date',inplace=True)
BO1.index = pd.to_datetime(BO1.index)
BO1.bond_1y
R_B=BO1.bond_1y
############################读取数据类###################################
class readData:
def read_index_daily(self,code,star,end):#指数数据
dsb = pro.index_daily(ts_code=code, start_date=star, end_date=end,fields='ts_code,trade_date,close,change')#默认读取三个数据
return dsb
def read_daily(self,code,star,end):
dsc1 = pro.daily(ts_code=code, start_date=star, end_date=end,fields='ts_code,trade_date,close,change')
return dsc1
def read_CPI(self,star,end):#时间格式start_cpi='201609'
dc=pro.cn_ppi(start_m=star, end_m=end,fields='month,ppi_yoy')
return dc
def read_GDP(self,star,end):#时间格式star='2016Q4'
df1 = pro.cn_gdp(start_q=star, end_q=end,fields='quarter,gdp_yoy')
return df1
def read_bond(self,code,star,end):
df=pro.cb_daily(ts_code=code,start_date=star,end_date=end)
return df
def readshibor(self,star,end):
return pro.shibor(start_date=star, end_date=end)
#####################################################################
start_time='20000430'#发布GDP需要时间,我们延迟1个月,即第一季度的GDP4月份才发布。
end_time="20550731"
star_GDP='2005Q1'#延后一年,因为我们找的是差分
end_GDP='2055Q1'#不是以3 4季度结尾
star_CPI='200501'
end_CPI='205503'
df1=readData()#读取
dc=readData()
dsc1=readData()
dsp=readData()
dsb=readData()
df1=df1.read_GDP(star_GDP,end_GDP)
dc=dc.read_CPI(star_CPI,end_CPI)
dsc1=dsc1.read_index_daily('000300.SH',start_time,end_time)
dsb=dsb.read_index_daily('000012.SH',start_time,end_time)
dsp=dsp.read_index_daily('NHCI.NH',start_time,end_time)
##########################GDP信号处理################################
def GDP_fun(df1):
df1.set_index('quarter',inplace=True)#设置日期索引
df2=(df1.shift(4)-df1).shift(-4)
df2=df2.dropna()
G=pd.Series(0,index=df2.gdp_yoy.index)
for i in range(len(df2.gdp_yoy)):
if df2.gdp_yoy[i]>0:
G[i]=1
elif df2.gdp_yoy[i]<0:
G[i]=0
else:
G[i]=G[i-1]
return G
G=GDP_fun(df1)
####################################################################
###########################CPI信号函数##############################
def CPI_fun(dc):
dc=dc.sort_index()
dc.set_index('month',inplace=True)
dc2=(dc.shift(12)-dc.shift()).shift(-12).dropna()
C=pd.Series(0,index=G.index)
for j in range( 3*math.floor( len(dc2.ppi_yoy)/3)):
if (3+j)%3==0:
for i in range(int(j/3),int((3+j)/3)):
if dc2.ppi_yoy[j]+dc2.ppi_yoy[j+1]+dc2.ppi_yoy[j+2] >0:
C[i]=1
elif dc2.ppi_yoy[j]+dc2.ppi_yoy[j+1]+dc2.ppi_yoy[j+2]<0:
C[i]=0
else:
C[i]=C[i-1]
else:
pass
return C
C=CPI_fun(dc)
########################################################################
##############################利率处理##################################
DF=read_data()
DF2=DF.readhgyz()
DF=DF2['base']
def LV_fun(dc):
dc=dc.sort_index()
dc.set_index('report_date',inplace=True)
dc2=(dc.shift(12)-dc.shift()).shift(-12).dropna()
C=pd.Series(0,index=G.index)
for j in range( 3*math.floor( len(dc2.shibor_1w)/3)):
if (3+j)%3==0:
for i in range(int(j/3),int((3+j)/3)):
if dc2.shibor_1w[j]+dc2.shibor_1w[j+1]+dc2.shibor_1w[j+2] >0:
C[i]=1
elif dc2.shibor_1w[j]+dc2.shibor_1w[j+1]+dc2.shibor_1w[j+2]<0:
C[i]=0
else:
C[i]=C[i-1]
else:
pass
return C
LV=LV_fun(DF)
#########################################################################
#将季度时间转化成对应的可读的年月日时间。
def time_fun(i):
if C.index[i][5:6]=='1':
time=C.index[i][0:4]+"0505"
for j in range(2,10):
if len(R_B[R_B.index==time])==0:
time=C.index[i][0:4]+"050"+str(j)
else:
break
if C.index[i][5:6]=='2':
time=C.index[i][0:4]+"0801"
if len(R_B[R_B.index==time])==0:
time=C.index[i][0:4]+"0802"
if len(R_B[R_B.index==time])==0:
time=C.index[i][0:4]+"0803"
if C.index[i][5:6]=='3':
time=C.index[i][0:4]+"1101"
if len(R_B[R_B.index==time])==0:
time=C.index[i][0:4]+"1102"
if len(R_B[R_B.index==time])==0:
time=C.index[i][0:4]+"1103"
if C.index[i][5:6]=='4':
time=C.index[i-1][0:4]+"0201"
for j in range(2,10):
if len(R_B[R_B.index==time])==0:
time=C.index[i-1][0:4]+"020"+str(j)
else:
break
for j in range(10,29):
if len(R_B[R_B.index==time])==0:
time=C.index[i-1][0:4]+"02"+str(j)
else:
break
return time
############################计算收益率+索引函数##########################
class ZC_fun:
def ret_function(self,ds):
#standard_base=pro.index_daily( ts_code='000300.SH', start_date='20081031', end_date="20200430")
standard_base=ds.sort_index()
standard_base.index=pd.to_datetime(standard_base.trade_date,format='%Y-%m-%d')#设置日期索引
close_base= standard_base.close
standard_ret=standard_base.change/standard_base.close.shift(-1)
ret=standard_ret.sort_index(axis=0,ascending=True)
ret=ret.dropna()
return ret#,close_base/close_base[start_time_jz].values#=str(RETT.index[0])[:4]+str(RETT.index[0])[5:7]+str(RETT.index[0])[8:10]###############
#有点问题,上面一行
def bsc_fun(self,ds):
dsc=ds.drop(['ts_code'],axis=1)
dsc=dsc.sort_index()
dsc.index=pd.to_datetime(dsc.trade_date,format='%Y-%m-%d')#设置日期索引
dsc=dsc.drop(['trade_date'],axis=1)
return dsc
#########################################################################
gc=ZC_fun()
R_c=gc.ret_function(dsc1)#股票的利
gb=ZC_fun()
R_b=gb.ret_function(dsb)#债券的利
gp=ZC_fun()
R_p=gp.ret_function(dsp)#商品的利
R_B=pd.DataFrame(R_B,dtype=np.float).bond_1y/100#无风险利率 ###########
############################对应季度的收益####################################
def RET_sig(sig_c,i):#输入ret,输出i对应的时间段收益#####截取收益段
if G.index[i][5:6] =='1':
a=G.index[i][0:4]+'0505'
b=G.index[i][0:4]+'0731'
at1=pd.to_datetime(a,format='%Y-%m-%d')
bt1=pd.to_datetime(b,format='%Y-%m-%d')
sig_c=sig_c[ sig_c.index<=bt1]
sig_c=sig_c[ sig_c.index>=at1]
elif G.index[i][5:6] =='2':
a=G.index[i][0:4]+'0801'
b=G.index[i][0:4]+'1031'
at1=pd.to_datetime(a,format='%Y-%m-%d')
bt1=pd.to_datetime(b,format='%Y-%m-%d')
sig_c=sig_c[ sig_c.index<=bt1]
sig_c=sig_c[ sig_c.index>=at1]
elif G.index[i][5:6] =='3':
a= G.index[i][0:4]+'1101'
a1=G.index[i][0:4]+'1231'
at1=pd.to_datetime(a,format='%Y-%m-%d')
bt1=pd.to_datetime(a1,format='%Y-%m-%d')
sig_c1=sig_c
sig_c1=sig_c1[ sig_c1.index<=bt1]
sig_c1=sig_c1[ sig_c1.index>=at1]
sig_c1=sig_c1
b= G.index[i-2][0:4]+'0101'
b1=G.index[i-2][0:4]+'0131'
at11=pd.to_datetime(b,format='%Y-%m-%d')
bt11=pd.to_datetime(b1,format='%Y-%m-%d')
sig_c=sig_c[ sig_c.index<=bt11]
sig_c=sig_c[ sig_c.index>=at11]
sig_c=sig_c1.append(sig_c)
else:
a=G.index[i-1][0:4]+'0201'
b=G.index[i-1][0:4]+'0430'
at1=pd.to_datetime(a,format='%Y-%m-%d')
bt1=pd.to_datetime(b,format='%Y-%m-%d')
sig_c=sig_c[ sig_c.index<=bt1]
sig_c=sig_c[ sig_c.index>=at1]
return sig_c
###########################################################################
###########################################################################
#由三个部分构建不同策略
#动量策略的信号,sig=1 判断高配股票(判断条件换成bool值形式)
def SIG1(i):
n=1
if DFF[DFF.index== time_fun(i)].values>n*mean1[mean1.index== time_fun(i)].values \
and DFF[DFF.index== time_fun(i)].values>n*mean2[mean2.index== time_fun(i)].values \
and DFF[DFF.index== time_fun(i)].values>n*mean3[mean3.index== time_fun(i)].values \
and DFF[DFF.index== time_fun(i)].values>n*mean4[mean4.index== time_fun(i)].values \
and DFF[DFF.index== time_fun(i)].values>n*mean5[mean5.index== time_fun(i)].values :
sig=1
else:
sig=0
return sig
#基本面—估值(pe_ttm)策略信号, sig=1高配股票
def SIG2(i):
if mean_roll1[mean_roll1.index== time_fun(i)].values<mean_pe[mean_pe.index== \
time_fun(i)].values-0.5*std1[std1.index== time_fun(i)].values or \
mean_roll2[mean_roll2.index== time_fun(i)].values<mean_pb[mean_pb.index== \
time_fun(i)].values-0.5*std2[std2.index== time_fun(i)].values:
sig=1
else:
sig=0
return sig
#美林时钟高配股票信号
def SIG3(i):
if C[i]==0 and LV[i]==0:
sig=1
else:
sig=0
return sig
#################################策略判断条件##################################
def quanzhong_fun(R_c,R_p,R_b,R_B):
RETT=None#初始化
n=1
for i in range(len(G)):
if SIG1(i)+SIG2(i)+SIG3(i)==3:#三个策略信号约束
RETT=( 0.5*RET_sig(R_c,i)+0*RET_sig(R_p,i) +0.5*RET_sig(R_b,i)+0*RET_sig(R_B,i) ).append(RETT)
elif SIG1(i)+SIG2(i)+SIG3(i)==2:#三个策略信号约束
RETT=( 0.4*RET_sig(R_c,i)+0*RET_sig(R_p,i) +0.6*RET_sig(R_b,i)+0*RET_sig(R_B,i) ).append(RETT)
elif SIG1(i)+SIG2(i)+SIG3(i)==1:#三个策略信号约束
RETT=( 0.3*RET_sig(R_c,i)+0*RET_sig(R_p,i) +0.7*RET_sig(R_b,i)+0*RET_sig(R_B,i) ).append(RETT)
else:
RETT=( 0.1*RET_sig(R_c,i)+0*RET_sig(R_p,i)+0.9*RET_sig(R_b,i) +0*RET_sig(R_B,i) ).append(RETT)
return RETT
RETT=quanzhong_fun(R_c,R_p,R_b,R_B)
RETT=RETT.sort_index(axis=0)
cum=np.cumprod(1+RETT)
cum=cum.dropna()
############################指数基准图#########################################
start_time_jz=str(RETT.index[0])[:4]+str(RETT.index[0])[5:7]+str(RETT.index[0])[8:10]
end_time_jz=str(RETT.index[-1])[:4]+str(RETT.index[-1])[5:7]+str(RETT.index[-1])[8:10]
def bj_standard(code,lab='000300.SH',col='k'):#针对沪深股票,直接画出比较基准(收益情况)
standard_base = pro.index_daily( ts_code=code, start_date=start_time_jz, end_date=end_time_jz)
standard_base=standard_base.sort_index()
standard_base.index=pd.to_datetime(standard_base.trade_date,format='%Y-%m-%d')#设置日期索引
close_base= standard_base.close
standard_ret=standard_base.change/standard_base.close.shift(-1)
standard_sig=pd.Series(0,index=close_base.index)
standard_trade=standard_sig.shift(1).dropna()/100#shift(1)整体下移一行
standard_SmaRet=standard_ret*standard_trade.dropna()
standard_cum=np.cumprod(1+standard_ret[standard_SmaRet.index[0:]])-1
plt.plot(close_base/close_base[-1],label=lab,color=col)
return close_base/close_base[-1] #standard_cum
###########################################################################
def performance(port_ret):
first_date = port_ret.index[0]
final_date = port_ret.index[-1]
time_interval = (final_date - first_date).days * 250 / 365
# calculate portfolio's indicator
nv = (1 + port_ret).cumprod()
arith_mean = port_ret.mean() * 250
geom_mean = (1 + port_ret).prod() ** (250 / time_interval) - 1
sd = port_ret.std() * np.sqrt(250)
mdd = ((nv.cummax() - nv) / nv.cummax()).max()
sharpe = (geom_mean - 0) / sd
calmar = geom_mean / mdd
result = pd.DataFrame({'算术平均收益': [arith_mean], '几何平均收益': [geom_mean], '波动率': [sd],
'最大回撤率': [mdd], '夏普比率': [sharpe], '卡尔曼比率': [calmar]})
print(result)
return result
if __name__=="__main__":
bj_standard('000300.SH')#比较基准函数(图)
bj_standard('000012.SH',lab='上证国债',col='b')
#bj_standard('NHCI.NH',lab='南华商品指数',col='y')
performance(RETT)
plt.plot(cum,label="组合策略",color='r',linestyle='-')
plt.title("组合策略净值走势对比图")
plt.legend()
看下权重变化
sig1=pd.Series( 0,RETT.index )
sig2=pd.Series( 0,RETT.index )
for i in range(len(G)):
if SIG1(i)+SIG2(i)+SIG3(i)==3:#三个策略信号约束
sig1[RET_sig(R_c,i).index]=0.5
sig2[RET_sig(R_c,i).index]=0.5
elif SIG1(i)+SIG2(i)+SIG3(i)==2:#三个策略信号约束
sig1[RET_sig(R_c,i).index]=0.4
sig2[RET_sig(R_c,i).index]=0.6
elif SIG1(i)+SIG2(i)+SIG3(i)==1:#三个策略信号约束
sig1[RET_sig(R_c,i).index]=0.3
sig2[RET_sig(R_c,i).index]=0.7
else:
sig1[RET_sig(R_c,i).index]=0.1
sig2[RET_sig(R_c,i).index]=0.9
import matplotlib.pyplot as plt
import numpy as np
plt.stackplot(sig1.index, sig1, sig2, baseline='zero', labels=['沪深300权重', '上证国债权重'], colors=['r', 'b'])
plt.legend(loc='upper right')
plt.ylim(0, 1.2)
plt.grid(axis='y', color='gray', linestyle=':', linewidth=2)
plt.title("组合股债权重变动图")
plt.show()
从上图可以看除,只有在2015年股票配置存在一次权重为0.5情况。配置股票权重为0.4和0.3的次数相差不大,配置0.1的情况最多。
算术平均收益 几何平均收益 波动率 最大回撤 夏普比率 卡尔曼比率
组合策略 9.17% 9.02% 7.34% 12.47% 1.23 0.72
估值策略 11.70% 11.51% 9.54% 16.08% 1.21 0.72
动量策略 15.33% 14.72% 15.51% 32.11% 0.95 0.46
美林时钟 17.28% 16.36% 18.09% 34.13% 0.90 0.48
沪深300 13.27% 9.62% 27.63% 72.30% 0.35 0.13
上证国债 3.63% 3.60% 0.96% 2.25% 3.75 1.60
通过上述结果分析,我们可以得到通过由三个策略构建的组合策略,其平均几何收益接近沪深300指数,单在风险方面沪深300的波动率高达27.63%,而我们的组合策略的波动率仅为9.17%显著优于沪深300。此外,组合策略也在波动率方面优于单一的估值、动量及美林时钟策略。组合的夏普比率也是优于任何单一策略。同时我们应该看到收益之所以会低于单一策略,是因为在我们的组合策略中,即使是三个判断信号SIG都判断高配股票,我们也仅仅配置0.5。现实大多数情况都是要充分重时风险的,做到稳中求胜。因此,综合比较之后,我们的组合策略能在较好的控制风险的情况下,取得不错的稳定收益。