from tqz_extern.json_operator import TQZJsonOperator
warnings.filterwarnings("ignore")
session_map = TQZJsonOperator.tqz_load_jsonfile(jsonfile='../trading_time/source_trading_time.json')
def dump_all_format_csv(cls, datetime_str: str):
Dump all instruments' format csv of one day.
cls.__check_source_file(datetime_str=datetime_str)
assert cls.source_content is not None, f'cls.source_content is None.'
all_instrument_ids = list(set(cls.source_content['InstrumentID'].values))
for instrumentID in all_instrument_ids:
single_instrument_df = cls.source_content[cls.source_content["InstrumentID"] == instrumentID]
cls.__get_format_market_data(
instrument_source_df=single_instrument_df
).to_csv(f'{cls.target_dir}/{instrumentID}.csv', index=False)
def __check_source_file(cls, datetime_str: str):
Check single day's market depth data csv file.
:param datetime_str: datatime of need parse, eg: 20230926.
year, month, day = datetime_str[:4], datetime_str[4:6], datetime_str[6:]
source_path = f'E:/futures_market_data/market_depth_data/{year}/{month}/market_depth_data_{datetime_str}.csv'
cls.target_dir = f'E:/futures_market_data/target_data/{year}/{month}/{datetime_str}'
assert os.path.exists(path=source_path) is True, f'Bad source_path {source_path}.'
if os.path.exists(path=cls.target_dir) is False:
os.makedirs(cls.target_dir, exist_ok=True)
cls.source_content = pandas.read_csv(source_path)
def __get_format_market_data(cls, instrument_source_df: pandas.DataFrame()) -> pandas.DataFrame():
Clean single instrument dataframe.
:param instrument_source_df: source dataframe of single instrument
:return: single instrument dataframe after clean.
assert len(instrument_source_df['ExchangeInstrument'].unique()) == 1, f'Bad ExchangeInstrument {instrument_source_df["ExchangeInstrument"].unique()}.'
symbol = re.sub(r'\d+', '', instrument_source_df['ExchangeInstrument'].unique()[0])
assert symbol in cls.session_map.keys(), f'Bad symbol: {symbol}.'
symbol_session_map = cls.session_map[symbol]['night'] + cls.session_map[symbol]['day']
instrument_source_df['trading_time'] = False
instrument_format_df = None
for single_session in symbol_session_map:
if 2 == len(single_session):
if single_session[0] < single_session[1]:
instrument_source_df['trading_time'] = (instrument_source_df['UpdateTime'] >= single_session[0]) & (instrument_source_df['UpdateTime'] < single_session[1])
elif single_session[1] > single_session[0]:
instrument_source_df['trading_time'] = (instrument_source_df['UpdateTime'] >= single_session[0]) | (instrument_source_df['UpdateTime'] < single_session[1])
if instrument_format_df is None:
instrument_format_df = instrument_source_df[instrument_source_df['trading_time'] == True]
instrument_format_df = pandas.concat([instrument_format_df, instrument_source_df[instrument_source_df['trading_time'] == True]])
instrument_format_df.sort_values(by='Timestamp', ascending=True, inplace=True)
instrument_format_df.reset_index(inplace=True)
del instrument_format_df['index']
del instrument_format_df['trading_time']
return instrument_format_df
if __name__ == '__main__':
parser_datetime = '20230926'
MarketDataParser.dump_all_format_csv(datetime_str=parser_datetime)
