-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmulti_factor.py
More file actions
178 lines (155 loc) · 6.86 KB
/
multi_factor.py
File metadata and controls
178 lines (155 loc) · 6.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
"""
多因子模型模块 - 向后兼容包装器
所有类已移动到 multifactor/ 文件夹下,此文件保持向后兼容
"""
import warnings
import os
import pandas as pd
warnings.filterwarnings("ignore")
# 从新模块导入所有类
from multifactor import (
FactorPreprocessor,
ModelManager,
DataManager,
Backtester,
MultiFactorModel
)
from factor.factor_generator import generate_my_factors, generate_alpha158_factors
MODEL_DIR = 'models'
DATA_DIR = 'data'
# ========== 保持向后兼容的函数包装器 ==========
def winsorize(series, limits=(0.01, 0.01)):
"""去极值:Winsorization,限制在指定分位数边界"""
preprocessor = FactorPreprocessor()
return preprocessor.winsorize(series, limits)
def cross_sectional_zscore(df, col, new_col=None):
"""截面标准化:按日期计算 Z-score"""
preprocessor = FactorPreprocessor()
return preprocessor.cross_sectional_zscore(df, col, new_col)
def neutralize_factor_by_size_and_industry(df, factor_col, size_col='log_marketcap', industry_col='industry', new_col=None, min_stocks=3):
"""同时市值和行业中性化"""
preprocessor = FactorPreprocessor(size_col=size_col, industry_col=industry_col)
return preprocessor.neutralize(df, factor_col, size_col, industry_col, new_col, min_stocks)
def preprocess_factors(df, factor_cols, shares_col='sharesOutstanding'):
"""完整预处理流程:去极值 → 中性化 → 标准化"""
preprocessor = FactorPreprocessor()
return preprocessor.preprocess(df, factor_cols, shares_col)
def build_train_test_set(df, features, train_start_date, test_start_date, train_end_date):
"""构建训练和测试数据集"""
manager = ModelManager(model_dir=MODEL_DIR)
return manager.build_train_test_set(df, features, train_start_date, test_start_date, train_end_date)
def build_ml_multi_factor_model(
X_train, y_train, X_test, y_test,
model_type='xgb',
objective='reg:squarederror',
model_params=None,
eval_metrics=True,
random_state=42,
verbose=True
):
"""构建并训练多因子回归模型(向后兼容包装器)"""
manager = ModelManager(model_dir=MODEL_DIR, random_state=random_state)
return manager.train(X_train, y_train, X_test, y_test, model_type, objective,
model_params, eval_metrics, verbose)
def generate_stocks_combined(get_latest=False, verbose=True):
"""生成股票组合数据(向后兼容包装器)"""
manager = DataManager(data_dir=DATA_DIR)
return manager.load_stocks(get_latest=get_latest, verbose=verbose)
def generate_stock_factors(factor_func, df=None, selected_factor=None, selected_factors=None):
"""生成股票因子(向后兼容包装器)"""
manager = DataManager(data_dir=DATA_DIR)
return manager.generate_factors(factor_func, selected_factor, df, selected_factors)
def generate_processed_factors(selected_factor=None, selected_factors=None, verbose=True):
"""生成处理后的因子(向后兼容包装器)"""
manager = DataManager(data_dir=DATA_DIR)
return manager.generate_processed_factors(selected_factor, selected_factors, verbose)
def load_model(selected_model, selected_factor, selected_model_weight=None):
"""加载模型(向后兼容包装器)"""
manager = ModelManager(model_dir=MODEL_DIR)
return manager.load(selected_model, selected_factor, selected_model_weight)
def save_model(model, selected_model, selected_factor):
"""保存模型(向后兼容包装器)"""
manager = ModelManager(model_dir=MODEL_DIR)
return manager.save(model, selected_model, selected_factor)
def backtest_mutlifactor(df, score_col='score', rebalance_period=30, top_n=5,
transaction_cost=0.001, risk_free_rate=0.02, min_stocks=10,
balance_weight=True):
"""多因子回测(向后兼容包装器)"""
backtester = Backtester(rebalance_period, top_n, transaction_cost, risk_free_rate, min_stocks)
return backtester.run(df, score_col, balance_weight)
def train_and_backtest_multifactormodel(
load_fromcsv=True,
disable_factor_process=True,
train_model=True,
train_startdate='2015-01-01',
test_startdate='2019-01-01',
train_enddate='2020-01-01',
backtest_startdate='2020-01-02',
backtest_enddate=None,
balance_weight=True,
pick_stockmode=False,
pick_stock=None,
pick_startdate=None,
rebalance_period=5,
top_n=1,
selected_model=None,
selected_model_weight=None,
selected_factor=None,
train_selected_model=None,
verbose=True
):
"""
训练和回测多因子模型(向后兼容包装器)
此函数使用 MultiFactorModel 类来实现功能
"""
os.makedirs(MODEL_DIR, exist_ok=True)
# 使用 MultiFactorModel 类
model = MultiFactorModel(model_dir=MODEL_DIR, data_dir=DATA_DIR)
if pick_stockmode:
# 选股模式
return model.pick_stocks(
load_fromcsv=load_fromcsv,
disable_factor_process=disable_factor_process,
train_model=train_model,
train_startdate=train_startdate,
test_startdate=test_startdate,
train_enddate=train_enddate,
pick_stock=pick_stock,
pick_startdate=pick_startdate,
top_n=top_n,
selected_model=selected_model,
selected_model_weight=selected_model_weight,
selected_factor=selected_factor,
train_selected_model=train_selected_model,
verbose=verbose
)
else:
# 回测模式
return model.train_and_backtest(
load_fromcsv=load_fromcsv,
disable_factor_process=disable_factor_process,
train_model=train_model,
train_startdate=train_startdate,
test_startdate=test_startdate,
train_enddate=train_enddate,
backtest_startdate=backtest_startdate,
backtest_enddate=backtest_enddate,
balance_weight=balance_weight,
rebalance_period=rebalance_period,
top_n=top_n,
selected_model=selected_model,
selected_model_weight=selected_model_weight,
selected_factor=selected_factor,
train_selected_model=train_selected_model,
verbose=verbose
)
def main():
# 使用新的类结构
model = MultiFactorModel(model_dir=MODEL_DIR, data_dir=DATA_DIR)
# 示例:选股模式
# result = model.pick_stocks(load_fromcsv=False, train_model=False, top_n=5,
# selected_factor='myfactors', selected_model='xgb', verbose=False)
# 保持向后兼容:也可以使用旧的函数
train_and_backtest_multifactormodel(load_fromcsv=False, pick_stockmode=True, train_model=False, top_n=5, verbose=False)
if __name__ == '__main__':
main()