多因子策略 1.0

分享一个带权重的多因子策略的思路部分，若有不足还希望大家指点指点~ 策略是在Ricequant实现的，可以点击下面的链接进行详细的查看:---->具体的策略思路

股票池是中证 800 成分股；所有因子是 factors。

stocks  = index_components('000906.XSHG')
factors = ['日换手率','月换手率','周换手率','pe','pcf','pb','企业价值_市值比','股息率','市盈率相对盈利增长比率','每股股利_市值比','roe','roa','投入资本回报率 ROIC',
'息税前利润 /营业总收入','权益乘数','流动比率','fcff_市值比','净资产收益率(增长率)',
'基本每股收益增长率','每股收益_市值比','销售毛利率','未分配利润_市值比','账面市值比','市值','营收收入增长','产权比率','ev_ebitda',
'每股资产增长','资产负债率', '存货周转率','销售净利率','利润总额增长','总资产周转率','营业利润增长','市销率','营业利润率','本周新增评论数','本周新增关注者','本周总评论数','本周总关注者数','本周卖出行为数','本周买入行为数']
获得某期限内的所有星期五交易日：]

def get_date_list(start_date,end_date):
#     获得某日期内的星期五交易日
    dates = get_trading_dates(start_date, end_date)
    date_list=[]
    for date in dates:
        if date.weekday() == 4:
            date_list.append(date.strftime('%Y-%m-%d'))
    return date_list

1、根据日期获得股票池的所有因子数据

def get_data(date):
    #总因子，未必都选用
    factors1 = ['pe','pcf','pb','企业价值','股息率','市盈率相对盈利增长比率','每股股利','roe','roa','投入资本回报率 ROIC',
    '息税前利润 /营业总收入','权益乘数','流动比率','企业自由现金流量 FCFF','净资产收益率(增长率)',
    '基本每股收益增长率','每股收益 EPS','销售毛利率','未分配利润','总权益','市值','营收收入增长','产权比率','ebit',
    '每股资产增长','资产负债率','存货周转率','销售净利率','利润总额增长','总资产周转率',
    '营业利润增长','营业利润','营业收入','市销率'
    ]
    
    q = query(
        fundamentals.eod_derivative_indicator.pe_ratio,fundamentals.eod_derivative_indicator.pcf_ratio,
        fundamentals.eod_derivative_indicator.pb_ratio,
        fundamentals.eod_derivative_indicator.ev,fundamentals.eod_derivative_indicator.dividend_yield,
        fundamentals.eod_derivative_indicator.peg_ratio,
        fundamentals.financial_indicator.dividend_per_share,
        fundamentals.financial_indicator.adjusted_return_on_equity_weighted_average,#roe
        fundamentals.financial_indicator.return_on_asset_net_profit,#roa
        fundamentals.financial_indicator.return_on_invested_capital,#投入资本回报率 ROIC
        fundamentals.financial_indicator.ebit_to_revenue,#息税前利润 /营业总收入
        fundamentals.financial_indicator.du_equity_multiplier,#权益乘数(杜邦分析)
        fundamentals.financial_indicator.current_ratio,#流动比率
        fundamentals.financial_indicator.fcff,#企业自由现金流量 FCFF
        fundamentals.financial_indicator.inc_return_on_equity,#净资产收益率(摊薄)(同比增长率)
        fundamentals.financial_indicator.inc_earnings_per_share,#基本每股收益(同比增长率)
        fundamentals.financial_indicator.earnings_per_share,#每股收益 EPS - 基本
        fundamentals.financial_indicator.gross_profit_margin,#销售毛利率
        fundamentals.balance_sheet.undistributed_profit,#未分配利润
        (fundamentals.balance_sheet.total_equity),#总权益
        (fundamentals.eod_derivative_indicator.market_cap),#市值
        fundamentals.financial_indicator.inc_operating_revenue,#营收收入增长
        fundamentals.financial_indicator.debt_to_equity_ratio,#产权比率
        fundamentals.financial_indicator.ebitda,#ebit
        fundamentals.financial_indicator.inc_book_per_share,#每股资产增长
        fundamentals.financial_indicator.debt_to_asset_ratio,#资产负债
        fundamentals.financial_indicator.inventory_turnover,#存货周转
        fundamentals.financial_indicator.net_profit_margin,#销售净利率
        fundamentals.financial_indicator.inc_profit_before_tax,#利润总额增长
        fundamentals.financial_indicator.total_asset_turnover,#总资产周转率
        fundamentals.financial_indicator.inc_gross_profit,#营业利润增长
        fundamentals.income_statement.gross_profit,#营业利润
        fundamentals.income_statement.revenue,#营业收入
        fundamentals.eod_derivative_indicator.ps_ratio,#市销率
        ).filter(
            fundamentals.income_statement.stockcode.in_(stocks)
        )
    
    fundamentals_df = get_fundamentals(q,date)

    df =     fundamentals_df


    df.items=factors1

    
    str1 = "\'"+date+"\'"
    df = df[:,str1,:]

    
    
    df['账面市值比']=df['总权益']/df['市值']
    df['每股股利_市值比']= df['每股股利']/df['市值']
    df['fcff_市值比']=df['企业自由现金流量 FCFF']/df['市值']
    df['每股收益_市值比']=df['每股收益 EPS']/df['市值']
    df['未分配利润_市值比']=df['未分配利润']/df['市值']
    df['企业价值_市值比']=df['企业价值']/df['市值']
    df['ev_ebitda']=df['ebit']/df['企业价值']
    df['营业利润率']=df['营业利润']/df['营业收入']
    try:
        df['周换手率']= get_turnover_rate(stock_set)['week'].T.values

    except:
        df['周换手率'] = Series()
    try:
        df['月换手率']= get_turnover_rate(stock_set)['month'].T.values
    except:
        df['月换手率'] =  Series()
    try:
        df['日换手率']= get_turnover_rate(stock_set)['today'].T.values    
    except:
        df['日换手率']= Series()
    
    try:
        x = xueqiu.top_stocks(field='new_comments',frequency='1w',count = 800)
        # print(x)
        x = x.set_index('order_book_id')
        x = x.reindex(index=stock_set)
        x = x.sort_index()
        x = x.values
        df['本周新增评论数'] = x

        x = xueqiu.top_stocks(field='new_followers',frequency='1w',count = 800)
        x = x.set_index('order_book_id')
        x = x.reindex(index=stock_set)
        x = x.sort_index()
        x = x.values
        df['本周新增关注者'] = x


        x = xueqiu.top_stocks(field='total_comments',frequency='1w',count = 800)
        x = x.set_index('order_book_id')
        x = x.reindex(index=stock_set)
        x = x.sort_index()
        x = x.values
        df['本周总评论数'] = x


        x = xueqiu.top_stocks(field='total_followers',frequency='1w',count = 800)
        x = x.set_index('order_book_id')
        x = x.reindex(index=stock_set)
        x = x.sort_index()
        x = x.values
        df['本周总关注者数'] = x
        
        
        
        x = xueqiu.top_stocks(field='sell_actions',frequency='1w',count = 800)
        x = x.set_index('order_book_id')
        x = x.reindex(index=stock_set)
        x = x.sort_index()
        x = x.values
        df['本周卖出行为数'] = x
 

        x = xueqiu.top_stocks(field='buy_actions',frequency='1w',count = 800)
        x = x.set_index('order_book_id')
        x = x.reindex(index=stock_set)
        x = x.sort_index()
        x = x.values
        df['本周买入行为数'] = x
    except:
        df['本周新增评论数'] = Series()
        df['本周新增关注者']= Series()
        df['本周总评论数']= Series()
        df['本周总关注者数']= Series()
        df['本周卖出行为数']= Series()
        df['本周买入行为数']= Series()
        
        
    df = df[factors]
    return df

2、因子的权重的确定

获取 IC：

因子在某一期的 IC 指的是该期因子对股票的下期收益的预测值和股票下期的实际回报值在横截面上的相关系数。此次使用 python 中 scipy.stats 库中的 pearsonr 方法简化运算，规定因子的 IC=pearsonr(factor_values_0,returns_0_1)，假设现在是第 2 期的时间，其中 factor_values_0 指第 0 期的因子值，returns_0_1 指第 0 期至第 1 期的涨跌幅, pearsonr 为相关系数计算。

def get_currentIC(date):
#     获得 IC
    lst_date = date_list[date_list.index(date)-1]
#获得上个星期五交易日的因子数据
    fd2 = get_data(lst_date)

#     factordata = Schmidt(fd2) 
    fd2 = winsorize(fd2)
    pearson_df = pd.concat([fd2,all_returns[date]],axis=1)
    pearson_df = pearson_df.replace(np.nan,0)

    IC = pd.DataFrame()
    for fac in factors:
        ic,_ = st.pearsonr(pearson_df[fac],pearson_df[date])
        IC[fac] = np.array([ic]) 
    return IC

处理异常值的 winsorize 方法：

def winsorize(df):
    output = pd.DataFrame()
    for i in range(df.columns.size):
        s = df[df.columns[i]]
        down = np.mean(s)-3*np.std(s)
        up = np.mean(s)+3*np.std(s)

        final = s.replace(s[s<down],down)
        final = final.replace(s[s>up],up)
        output[df.columns[i]] = final
    
    return output

其中 all_returns 是股票的涨跌，通过以下方式获得所有股票的涨跌幅

def count_reven(stocks,s_date,e_date):
计算涨幅
    df_cn = get_price(stocks,start_date=s_date,end_date=e_date)['ClosingPx']

#     df_cn = df_cn.T
    day0 = Series(df_cn.ix[0])
    day1 = Series(df_cn.ix[-1])
    
    rets = day1/day0-1
    
    return rets

计算 IR 与权重:

然后，是根据 IC 来计算 IR，因子的 IR 值是指因子 IC 的均值和因子 IC 的标准差的比值。此次试验中需要最大化 IR 值;并获得权重

具体代码体现如下：

N=8
def get_bestweight(currentdate):   #传入当前日期，得到当前日期及之前 8 期的数据所得到的最优权重
    date = [date_list[date_list.index(currentdate)-i-1] for i in range(N)]  #取前 8 期日期
    IC = pd.DataFrame()
    for i in range(N):
#         print(date[i])
        ic = get_currentIC(date[i])    #计算每个日期的 IC 值
        IC = pd.concat([IC,ic],axis=0)
    IC =IC.dropna(axis=1)
    eff_facs = IC.columns
    mat = np.mat(IC.cov())                     #按照公式计算最优权重
    mat = nlg.inv(mat)
    weight = mat*np.mat(IC.mean()).reshape(len(mat),1)
    weight = np.array(weight.reshape(len(weight),))[0]
    return eff_facs,weight

至此，已经获得所有因子的权重了，但是权重的数量级参差不齐;

为了使得不让某一因子权重特别大，进行无量纲化处理

    train = weight.values
    nm=MinMaxScaler()
    train=nm.fit_transform(train)

至此就获得了因子的权重了，剩下就是对因子进行筛选，根据因子的贡献度进行因子的筛选：

将股票根据涨跌幅进行排序，分成 10 组，平均涨跌幅最高组的涨跌幅记为 high，最低记为 low。对每一个因子排序分成 10 组，平均涨幅最高一组的涨跌幅记为 port_high，最低的一组记为 port_low。

规定因子贡献度=abs(port_high-port_low)/(high-low)，因此因子贡献度越高，因子效果也好。按照因子贡献度排序，取前 7 个因子贡献度最大的因子。

然后给股票打分，选择交易的股票，具体的思路是：

将上一期时间每个因子值按大小分成十组，将收益率最大的一组规定其组号为 10，以此类推，最低的规定为 1。

获得这期每个股票在每个有效因子上的组号，乘以对应因子的权重，作为该股票在该因子上的得分，累加每个股票在每个有效因子上的得分，作为总分。取总分最高的前 30 个股票进行交易。