train_examples = _create_ext_examples(raw_examples[:p1],
-1,
args.prompt_prefix,
args.options,
args.separator,
args.is_shuffle,
is_train=False,
schema_lang=args.schema_lang)
--doccano_file
./data_rl/all.jsonl
--task_type
ext
--save_dir
./data_rl_dev
--splits
0
1.0
0
--negative_ratio
-1
--is_shuffle
True
单条样本为空的也标注出来,不交叉构造负样本,以免引入额外噪音
def do_predict_history(self, texts):
"""
pass
"""
texts = texts.split('history')
outputs = self.predictor.predict(texts)
history_result = []
for result in outputs:
history_list = []
result['时间'].sort(key=lambda x: x['start'])
print(result)
for index, res in enumerate(result['时间']):
history_dict_sm = {}.fromkeys(self.history, '')
for res_key in res:
if res_key == 'text':
history_dict_sm[self.history[0]] = res['text']
if res_key == 'relations':
for res_key_k in res['relations']:
if res_key_k == '接触者':
res['relations'][res_key_k].sort(key=lambda x: x['start'])
contact = []
if index < len(result['时间']) - 1:
res_contact_res = \
[i for i in res['relations'][res_key_k] if
result['时间'][index + 1]['start'] > i['start'] > res['start']]
else:
res_contact_res = \
[i for i in res['relations'][res_key_k] if i['start'] > res['start']]
for res_contact in res_contact_res:
contact_dic = {'姓名': res_contact['text'], '电话': '', '身份证': ''}
if '接触者' in result:
for res_contact_s in result['接触者']:
if res_contact_s['text'] == res_contact['text']:
if 'relations' in res_contact_s:
if '电话' in res_contact_s['relations']:
if 10 > res_contact_s['relations']['电话'][0]['start'] \
- res_contact['start'] > 0:
contact_dic['电话'] = \
res_contact_s['relations']['电话'][0]['text']
if '身份证' in res_contact_s['relations']:
if 30 > res_contact_s['relations']['身份证'][0]['start'] \
- res_contact['start'] > 0:
contact_dic['身份证'] = \
res_contact_s['relations']['身份证'][0]['text']
contact.append(contact_dic)
history_dict_sm[self.history_map[res_key_k]] = contact if contact else ''
else:
res['relations'][res_key_k].sort(key=lambda x: x['start'])
if index < len(result['时间']) - 1:
res_key_res = \
[i['text'] for i in res['relations'][res_key_k] if
result['时间'][index + 1]['start'] > i['start'] > res['start']]
else:
res_key_res = \
[i['text'] for i in res['relations'][res_key_k] if i['start'] > res['start']]
history_dict_sm[self.history_map[res_key_k]] = res_key_res[0] if res_key_res else ''
history_list.append(history_dict_sm)
history_result.extend(history_list)
return {'流行病学史': history_result}