之前一直在关注中欧医疗健康这支基金,收益还不错,于是想通过数学的方法分析,基金涨幅服从什么分布。

首先是一坨涨幅数据, 

2016-11-07到2021-07-29 这期间的涨幅. 单位都是百分比
[0,0,-0.7071,0.5086,-0.3036,-0.1015,0.1016,-0.203,-0.1017,-0.4073,0.3067,0.7136,-0.3036,0.5076,0.404,0.1006,-0.7035,-1.0121,0.2045,-1.4286,-0.4141,-0.2079,0.8333,-0.3099,0.3109,-2.2727,-0.3171,-0.4242,-1.1715,0,0.2155,0,0.2151,0,-0.4292,0.1078,-0.1076,-0.5388,-0.5417,0.9804,-0.1079,0.6472,0.4287,0.2134,-0.852,0.4296,-0.3209,-0.5365,-0.863,-0.6529,-0.2191,0.6586,0.2181,-0.5441,0.9847,0.4334,-0.3236,0.7576,0.4296,-0.107,0.2141,0.4274,0,-0.3191,0,0.9605,-0.4228,-1.2739,0.9677,-0.9585,1.2903,1.38,0.3141,1.9833,-0.5118,-0.5144,0.3102,-0.1031,-0.2064,-0.1034,0.5176,0.103,-0.1029,-0.7209,0.6224,1.0309,-0.2041,0.7157,0.203,-0.7092,0.4082,1.0163,1.2072,-0.0994,-0.2985,-0.8982,0.2014,-0.7035,-0.4049,1.626,1.7,0,-0.4916,-1.6798,-0.7035,0.4049,1.3105,-0.995,0.1005,1.004,-0.2982,2.1934,-1.1707,-2.0731,0.3024,0.5025,1.1,-1.1869,0.2002,-0.2997,-0.501,-0.4028,-1.1122,0.1022,0.3064,0.9165,0.7064,2.004,1.8664,-1.2536,-0.0977,0.5865,0.3887,0.5808,-0.385,-0.2899,-0.8721,0.1955,0.7805,-1.0649,-0.7828,1.3807,2.0428,1.4299,0.6579,0.747,0.278,-1.8484,-0.6591,-0.3791,1.2369,-0.7519,1.9886,-1.4856,1.3195,1.8605,0.274,-1.5483,0.555,0.552,-1.3724,-1.0204,1.4995,-1.108,-1.2138,-1.1342,0.478,-0.4757,-0.7648,0.289,-1.4409,-0.8772,1.3766,0.485,-0.4826,0.097,-0.5814,-1.1696,0,0.6903,-0.0979,-0.1961,-0.5894,-1.087,-0.2997,0.8016,0.5964,1.7787,0.2913,-0.5808,1.2658,-0.6731,-0.1936,-0.485,0.3899,0.7767,-0.6744,-0.291,-0.8755,0.0981,0.3922,-0.3906,0.7843,-0.3891,0.8789,0.7744,0.1921,-0.767,0.1932,0.3857,0.3842,-0.3828,1.0567,-0.3802,-0.2863,0.6699,-0.6654,1.1483,-0.5676,0.1903,0.7597,-0.377,0.1892,1.4164,-0.0931,1.4911,1.3774,1.721,0.6233,0.708,-0.5272,0.3534,0.9683,0.959,-0.4318,1.5611,0.427,1.3605,-0.5034,0.8432,-1.087,-0.1691,0,0.6774,0.9251,3.1667,-0.4039,-1.3788,0.6579,0.9804,0.4045,-2.498,-0.6612,2.3295,-2.7642,1.5886,-0.3292,-1.4864,-4.4426,0.1754,-1.6637,1.5138,-0.7895,-0.0884,0.531,1.4085,-1.1285,0.5268,-0.3493,1.4023,4.2351,0,0.4975,0.5776,-1.4766,0.0833,1.1647,0.7401,1.8776,-0.7212,-0.0807,-0.6462,-1.9512,1.2438,1.0647,0,-0.4052,0.4068,0.9724,-0.0803,0.1606,2.085,0,-0.2357,1.4173,-0.7764,1.1737,-1.6241,-0.6289,-0.2373,1.7446,0.3118,0.3108,-2.0139,0.0791,-3.8705,-0.3287,0.4122,-1.8883,0.1674,-2.2556,-3.8462,-0.8889,1.3453,-2.5664,4.5413,1.0426,1.1178,2.381,-0.6645,1.5886,-0.6584,-0.8285,1.5038,-0.7407,0.8292,2.2204,-0.1609,3.2232,0.5464,0.854,-1.6166,-1.1737,1.2668,-0.7819,3.3885,2.2104,-0.522,-0.6747,-1.9623,3.5412,1.3383,-2.0543,-1.7228,3.6585,0,-0.1471,-1.1782,0.149,1.3393,0,1.9824,-2.4478,1.6236,-3.1227,0.9745,1.1136,0.2203,-0.0733,1.4663,3.9017,-1.9471,2.766,0.6901,-0.6854,1.9324,2.0311,-1.1281,0.1342,3.0831,-3.186,0,2.8207,0.8491,-1.7487,0.4614,-0.1312,2.2996,-0.1927,0.7722,1.788,1.0665,-5.1521,-0.589,2.8966,-4.7345,0.2015,3.6863,2.1978,-1.3283,0.0641,-3.2671,2.5166,-1.4212,-0.9174,-1.6534,-1.8157,3.9711,0,0.8333,0.7576,-0.3418,-3.6351,-1.0676,4.6043,0,1.1004,-1.3605,-1.8621,-2.6001,0,3.824,0.8339,0.3446,3.2967,1.8617,1.1749,-1.2903,0.1307,-3.4595,-0.3381,-7.1913,-0.4386,2.6432,-1.1445,-1.4472,-3.8179,0.687,-2.047,0.6966,-3.5357,-5.0199,3.104,-2.0342,4.5681,2.8594,1.8533,-0.3791,-3.8813,0.1584,-4.664,-1.8242,5.8277,-0.0798,1.278,0.7886,3.2864,-0.6818,-1.0679,-1.2336,-2.498,-0.4003,0.0804,-1.4458,-1.0595,2.4712,-1.9293,0.9016,-3.6556,-1.602,-1.5424,-1.6536,2.7434,1.4643,-0.5093,2.4744,0.4996,2.1541,0.7299,1.2882,-5.2464,-1.2584,-2.209,-5.2129,0.4583,-1.0036,-1.7512,-1.3133,-4.3726,5.0696,5.6764,-2.2381,-2.5641,-1.2218,-1.9029,-2.2308,1.5873,3.3203,0.5671,5.8271,-1.2433,0.7194,-0.3571,0.0896,-1.0743,1.991,1.5084,-0.8741,0.97,-2.0961,0.0892,-1.6043,2.0833,-0.3549,-2.0481,-0.1818,-0.0911,1.1851,-1.2613,1.2774,3.4234,1.4808,1.2876,-4.2373,-3.5398,-1.5596,1.3979,-1.6544,1.4953,-3.1308,-1.9011,-0.4845,-3.1159,-1.1055,-1.8293,2.2774,0.8097,-0.7028,-0.6067,0.4069,0,-3.4448,-1.2592,2.4442,0.4149,0.1033,0.516,-0.616,0.1033,-1.3416,1.569,1.4418,-0.6091,2.145,1.3,-1.6782,0.2008,0.1002,1.001,0,-0.1982,-1.1917,0.7035,3.0938,1.7425,1.0466,1.4124,0.2786,-0.9259,4.486,-1.2522,-1.087,-0.2747,1.3774,3.5326,0.6999,-0.9557,3.3333,0.5942,1.6878,0.4149,-0.6612,-1.9135,-0.6785,4.953,0.1627,0.4874,-0.5659,2.9268,4.4234,-1.8154,-0.6934,-0.3879,0.5452,-1.4717,-1.1006,3.1002,-0.4626,3.5631,1.5707,-1.9882,-0.2254,-0.5271,-1.6654,1.7706,1.1346,-3.3657,-0.0774,-1.0844,1.2529,0.1547,-0.3089,0.4648,-0.3084,0.0773,0.541,0.2306,0.0767,1.7625,-0.3765,-4.9131,3.4181,-1.7679,-2.6604,3.6174,0.7758,-0.5389,2.9412,0.9774,-2.2338,-1.9802,1.3986,-0.0766,-2.454,0,1.1792,1.7094,-0.5348,0.5376,1.0695,-0.6803,-1.9026,-2.0171,-0.7126,1.5949,3.2182,-0.076,1.2177,-0.2256,-0.9043,1.1407,0.9023,2.459,-0.0727,0.5095,-0.0724,3.3333,1.122,-0.6241,0,2.1633,1.2978,-2.2252,-2.1379,1.9732,-2.2806,0.4243,0.1408,-0.1406,0.1408,-0.9142,-1.5614,-0.5047,-1.4493,0.4412,0,0,0.6589,-0.0727,2.0378,-0.9272,0.072,-0.5755,-0.4342,0.8721,-1.8732,0.2203,1.7582,2.1598,-0.1409,1.0586,0.0698,2.2331,0,0.8191,0.677,0.269,-1.1402,1.0176,3.1565,-0.0651,0.3257,-0.3247,-0.3257,0.6536,1.7532,-0.4467,-0.1282,-0.706,0.5818,0.3213,0.8969,-2.0952,0.3891,0,-0.3876,2.0752,1.2071,0.8788,-0.9956,2.5141,0.2452,-0.8563,0.3085,0.615,0.9169,-0.424,4.2579,0.3501,0,0.0581,1.9756,0.1709,-0.2275,-2.3375,1.2843,-1.7867,-0.0587,1.057,1.627,1.0863,-0.6222,1.309,1.7978,1.3797,-0.5444,-0.5473,0.7705,-0.6554,-2.0891,1.0107,2.8905,1.4587,-1.065,0.2153,2.4705,0.1048,-1.7277,-4.7949,-1.5109,0.7386,-0.6768,0.6246,-2.5959,-1.1008,-0.2929,0.9401,1.2224,0.8626,-2.3375,1.1092,-0.1155,-0.2312,2.2016,0.0567,0.3966,-1.8623,-0.575,-0.9832,-0.3505,0.2931,-0.1169,0.3511,-0.5831,0.0587,1.5826,-0.3462,-0.6948,-0.9329,2.0012,-0.3462,3.59,0.5031,0.8899,-1.1025,1.6165,1.6456,1.2412,2.0789,0.1044,0.1043,-2.814,-4.8794,4.0586,1.3001,3.4225,-1.1375,0.4707,0.7288,1.8088,-0.8629,0.6144,2.5954,-0.4464,-1.5446,1.3158,0.7493,0.5454,0.9369,-2.7846,1.5578,-2.5235,1.9289,2.5896,0.4854,2.3188,0.2833,-1.8832,-0.2399,-1.4911,-3.3203,-1.8182,-4.6296,-0.1618,-0.7563,-1.9597,3.3315,-2.3106,2.9153,3.9017,1.749,0.5056,-1.8612,2.3578,-1.352,1.1675,0.8028,2.887,-0.5806,4.4769,-1.4439,0.6616,1.7371,-1.246,0.8411,-0.5097,1.4905,-0.6425,2.0323,0.0905,-1.8544,2.0276,1.2647,-1.3381,-1.6727,1.5172,0.6341,0.6751,-1.5199,3.1321,3.7412,-0.2546,-0.8932,2.3605,0.8386,-1.7879,1.0161,-1.7603,1.4505,2.7754,-2.0867,-0.8358,3.7505,1.6247,-2.1583,0.817,1.1345,1.6426,-1.6161,1.1218,2.6149,-0.5019,1.591,1.0695,2.0408,1.3704,-2.6306,3.0769,0.0728,2.9829,-0.7771,1.6376,1.331,-2.3159,-1.3447,1.363,-0.9908,2.0729,0.07,3.394,1.489,3.1344,-0.5173,1.495,-6.0519,1.8064,-1.5735,4.1497,1.6982,2.6333,-5.9762,1.2646,0.1643,2.9856,-0.0637,1.7533,2.005,-1.7199,1.7188,-2.5499,-0.9773,-0.2229,-1.3082,-3.1038,-1.7017,0.4073,1.1156,1.1702,-2.9081,-0.1361,1.1247,0,1.31,-0.03,1.66,3.11,-0.41,-0.06,-0.41,-0.38,-2.06,-4.04,-0.14,-4.08,0.18,1.57,-1.23,1.35,-2.21,-1.54,1.2,-0.83,0.44,3.9,-1.25,0.88,-1.64,0.89,1.76,2.77,3.63,1.53,-0.03,-0.99,0.32,-2.41,1.25,-0.1,-1.3,-3.77,0.86,3.51,0.66,2.09,-1.66,0.03,0.78,0.81,-0.13,-2.25,2.79,-1.95,-3.29,1.31,-0.23,0.77,-3.21,-3.38,1.1,0.8,-0.55,-0.42,-2.07,-0.14,0.61,0.36,3.33,0,1.92,2.09,0.66,0.33,-1.53,2.02,-0.52,2.22,1.21,0.92,1.97,-0.58,2.07,-0.79,0.18,0.79,1.87,-0.56,-0.63,1.32,1.18,-1.02,1.8,1.86,1.2,-0.7,0.17,1.19,-1.29,-1.45,0.03,1.1,-2.16,5.55,1.98,5.48,2.07,-3.57,-0.55,-2.99,-0.3,1.78,0.94,1.6,-1.13,1.54,2.03,1.48,2.19,-5.67,0.52,-5.43,-0.49,-4,0.4,-2.38,1.41,-1.94,1.59,-4.68,-0.24,-6.05,-2.54,4.04,3.05,0.65,-5.41,0.95,1.3,2.4,-2.78,0.19,-0.06,0.03,1.73,3.88,-0.06,2.4,-1.16,3.06,0.84,-1.62,-0.67,2.19,-1.47,-3.4,1.34,0.3,-0.15,-0.87,3.11,1.2,1.56,-0.37,3.74,-1.82,3.31,2.34,-0.19,0.56,-3.46,-3.48,1.76,2.15,2.35,0.48,1.12,2.97,-1.3,-0.78,1.56,-0.36,0,2.47,-1.18,1.04,-2.79,1.99,2.59,-2.47,-0.28,1.25,0.45,-1.15,0.91,1.65,-0.42,-0.5,-4.01,1.43,2.25,1.55,2.29,-0.41,-2.61,2.95,0.87,-0.76,0.22,2.28,-6.01,-1,-4.9,4.01,-2.6,0.1,4.06,0.23,1.13,0.62,-3.16,2.34,0.3,1.41,-3.01,-3.56,-5.05,-1.71,4.06,2.61]

通过初步统计:

总共记录: 1158

负涨幅: 516

正涨幅:611

零涨幅:31

最大负涨幅:-7.1913

最大正涨幅:5.8277

总体收益率:309%。   也就是说从16年11月分买入100万的话, 到2021年7月29日, 挣了309万。

这是大致分析。现在想看下具体服从啥分布:

这里用非参数核密度估计,是最合适的。

用简单的代码,先粗略看下效果, 核密度估计使用的是高斯内核Gaussian kernel 

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# data 数据在上面
sns.kdeplot(data=data)
plt.show()

分析中欧医疗健康基金涨幅_数据

 

 从肉眼可以看出,从0处分割,右区稍微大于左区一丢丢。

怎么说呢, 看到这个图不是很满意,总感觉过度平滑了些,这个图左右两边基本上是55开了。现在开始设置带宽。

sns.kdeplot(data=data, bw_adjust=0.2)

分析中欧医疗健康基金涨幅_核密度估计_02

从这个图可以看出右边明显比左边大了。 也就是总体正涨幅大于负涨幅。 基本上80%都集中在 -2.5 和 2.5中

 

看到图了, 有点不甘心, 基金数据量太小了, 想根据这个核密度估计生成一堆随机数,也服从这个分布。

其实原理很简单,就是求这个分布函数的反函数。

google 搜索了一下,发现sklearn有逆采样的API,  真是太方便,不用自己造轮子了。

 

 分析中欧医疗健康基金涨幅_造轮子_03

 

 

开始撸一段代码,生成一堆随机数,服从这个这个分布

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.neighbors import KernelDensity

data=上面的数据
bandwidth=0.2
data = np.array(data)
data = data.reshape(1, -1)
kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(data)
data2 = kde.sample(n_samples=1)
# print(data2)
sns.kdeplot(data=data2[0], bw_adjust=bandwidth)
plt.show()
生成的前100项数据
[-0.04815941  0.39256206 -0.9863881   0.49870153 -0.0685882  -0.19805829
 -0.04881429 -0.06329466 -0.08757228 -0.05626878  0.73477043  0.53225204
 -0.38579164  0.83846548  0.47279225 -0.00614553 -0.6908834  -0.84516852
  0.10460452 -1.68316156 -0.66628967 -0.21534727  0.98405373 -0.05263706
  0.30697309 -2.15871898 -0.35459538 -0.52873768 -1.17071306  0.11299641
  0.03853813 -0.20053358  0.35698474 -0.01739235 -0.48132911  0.44775797
 -0.061619   -0.6657418  -0.62232173  0.89765569  0.10955856  0.80463848
  0.37803424  0.35498848 -0.78111103  0.29430354 -0.69433118 -0.38413522
 -0.83943738 -0.83408007 -0.42279157  0.72632924  0.24573081 -0.41170196
  0.85835499  0.24643445 -0.51300586  0.74195118  0.59904193  0.19662009
  0.42170835  0.30809563 -0.01263777 -0.07373658  0.04519119  0.64315866
 -0.39171321 -1.27626658  0.85453185 -1.06308532  1.18232718  0.95965952
  0.90415696  1.87500951 -0.86526574 -0.58421233  0.32383176 -0.33258237
 -0.47503023 -0.35860991  0.39685402  0.08292457  0.01761468 -0.82799399
  0.3078751   1.38049451 -0.24757849  0.60739704  0.49034587 -0.70863595
  0.53619962  0.64877759  1.09609066 -0.18906142 -0.48301451 -0.96290755
  0.21041798 -0.71870651 -0.24372297  1.75752347]

分析中欧医疗健康基金涨幅_随机数_04

 

生成的数据服从的分布和原始数据的分布基本一致。

但是在这里我发现了一个问题, 给入的样本是1158条数据, 只能随机生成1158数据, 太扯淡了。 

我的本意随机一堆数,服从这个分布。这个一堆数是任何正整数。 没办法,继续研究, 

以下是改造后的代码,支持随机任意个数了。

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.neighbors import KernelDensity

data=上面的数据
bandwidth=0.2
data = np.array(data)
data = data.reshape(-1, 1)
kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(data)
data2 = kde.sample(n_samples=1000)
# print(data2.flatten())
sns.kdeplot(data=data2.flatten(), bw_adjust=bandwidth)
plt.show()

分析中欧医疗健康基金涨幅_搜索_05

 

 

其实之所做这个模型,是因为训练要大量基金数据,1158条数据远远不够, 没有一个几万几十万数据,训练不好一个模型。