• MindSpore 1.0.0 模型训练报错


    问题描述:

    使用MindSpore1.0.0版本进行模型训练时,在model.train()后报错如下:

    ----> 1 model.train(config.epochs, train_dataset, callbacks=cb, dataset_sink_mode=False)

    ~/.virtualenvs/basenv/lib/python3.7/site-packages/mindspore/train/model.py in train(self, epoch, train_dataset, callbacks, dataset_sink_mode, sink_size)

        562                     callbacks=callbacks,

        563                     dataset_sink_mode=dataset_sink_mode,

    --> 564                     sink_size=sink_size)

        565

        566     def _eval_dataset_sink_process(self, valid_dataset, list_callback=None, cb_params=None):

    ~/.virtualenvs/basenv/lib/python3.7/site-packages/mindspore/train/model.py in _train(self, epoch, train_dataset, callbacks, dataset_sink_mode, sink_size)

        366         with _CallbackManager(callbacks) as list_callback:

        367             if not dataset_sink_mode:

    --> 368                 self._train_process(epoch, train_dataset, list_callback, cb_params)

        369             elif context.get_context("mode") == context.PYNATIVE_MODE or context.get_context("device_target") == "CPU":

        370                 logger.warning("The pynative mode and CPU cannot support dataset sink mode currently."

    ~/.virtualenvs/basenv/lib/python3.7/site-packages/mindspore/train/model.py in _train_process(self, epoch, train_dataset, list_callback, cb_params)

        461                                                   phase='train',

        462                                                   dataset=train_dataset,

    --> 463                                                   dataset_sink_mode=False)

        464         cb_params.cur_step_num = 0

        465         run_context = RunContext(cb_params)

    ~/.virtualenvs/basenv/lib/python3.7/site-packages/mindspore/train/model.py in _exec_preprocess(self, network, is_train, phase, dataset, dataset_sink_mode, sink_size, epoch_num)

        255         if dataset_sink_mode and not is_train:

        256             dataset.__loop_size__ = 1

    --> 257         dataset_helper = DatasetHelper(dataset, dataset_sink_mode, sink_size, epoch_num)

        258

        259         if dataset_sink_mode:

    ~/.virtualenvs/basenv/lib/python3.7/site-packages/mindspore/train/dataset_helper.py in __init__(self, dataset, dataset_sink_mode, sink_size, epoch_num)

        146         else:

        147             iterclass = _DatasetIterNormal

    --> 148             self.iter = iterclass(dataset)

        149

        150     def __iter__(self):

    ~/.virtualenvs/basenv/lib/python3.7/site-packages/mindspore/train/dataset_helper.py in __init__(self, dataset)

        295         self.device_num = _get_device_num()

        296         self.global_rank = _get_global_rank()

    --> 297         self.iter = self.dataset.create_tuple_iterator()

        298

        299     def __iter__(self):

    ~/.virtualenvs/basenv/lib/python3.7/site-packages/mindspore/dataset/engine/validators.py in new_method(self, *args, **kwargs)

        285         nreq_param_bool = ['output_numpy']

        286         validate_dataset_param_value(nreq_param_bool, param_dict, bool)

    --> 287         return method(self, *args, **kwargs)

        288

        289     return new_method

    ~/.virtualenvs/basenv/lib/python3.7/site-packages/mindspore/dataset/engine/datasets.py in create_tuple_iterator(self, columns, num_epochs, output_numpy)

       1186         if self._noop_mode():

       1187             return DummyIterator(self, 'tuple')

    -> 1188         return TupleIterator(self, columns, num_epochs, output_numpy)

       1189

       1190     @check_iterator

    ~/.virtualenvs/basenv/lib/python3.7/site-packages/mindspore/dataset/engine/iterators.py in __init__(self, dataset, columns, num_epochs, output_numpy)

        358             dataset = dataset.project(columns)

        359         super().__init__(dataset, num_epochs, output_numpy)

    --> 360         self.depipeline.LaunchTreeExec()

        361

        362     def __iter__(self):

    RuntimeError: Thread ID 140231386838784 Unexpected error. Fail to find size of file

    Line of code : 267

    File         : /home/jenkins/agent-working-dir/workspace/Compile_CPU_X86_Ubuntu/mindspore/mindspore/ccsrc/minddata/dataset/core/tensor.cc

    请问这是什么原因造成的?解决方法是什么?

    解答:

    应该是将本地文件读取变成Tensor的时候进行出错了,需要检查一下这个文件的合法性,mindspore1.0的不能打印这个路径,可以安装一下新版本的mindspore(1.7)打印一下这个文件路径。

     

  • 相关阅读:
    论文笔记:Auto-Encoding Scene Graphs for Image Captioning
    【Linux】awk入门
    C++模板编程(22)---显式实例化Explicit Instantiation
    LabVIEW将 VI 升级到较新的版本和恢复为先前版本
    Session&Cookie
    Spring Security JWT Authentication and Authorisation(一)
    构造函数继承
    【LeetCode刷题日志】225.用队列实现栈
    【Tensorflow-gpu】window11下深度学习环境搭建
    红帽注册账号,官网下载Redhat Enterprise Linux 9.0 ISO镜像文件
  • 原文地址:https://blog.csdn.net/weixin_45666880/article/details/125447996