# task_name is used toselect processor,And the name is lowercase task_name = FLAGS.task_name.lower() # if task_name as a key don't match with Processor which is type of dict ,os will print error if task_name notin processors: raise ValueError("Task not found: %s" % (task_name)) # if exist,we will get the processor which aims to deal with data processor = processors[task_name]()
#这个就是自定义的方法,针对自己的数据集设定的. classMoveProcessor(DataProcessor): """Processor for the move data set .""" defget_train_examples(self, data_dir): """See base class.""" return self._create_examples( self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")
defget_dev_examples(self, data_dir): """See base class.""" return self._create_examples( self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev")
defget_test_examples(self, data_dir): """See base class.""" return self._create_examples( self._read_tsv(os.path.join(data_dir, "test.tsv")), "test")
defget_labels(self): """See base class.""" return ["0", "1", "2", "3"]
#设置统一的读取转换格式,便于不同训练集的调用 def_create_examples(self, lines, set_type): """Creates examples for the training and dev sets.""" examples = [] for (i, line) inenumerate(lines): guid = "%s-%s" % (set_type, i) if set_type == "test": text_a = tokenization.convert_to_unicode(line[0]) label = "0" else: text_a = tokenization.convert_to_unicode(line[1]) label = tokenization.convert_to_unicode(line[0]) examples.append( InputExample(guid=guid, text_a=text_a, text_b=None, label=label)) return examples
本文由 iVEGA 创作,采用
CC BY 3.0 CN协议 进行许可。
可自由转载、引用,但需署名作者且注明文章出处 http://yoursite.com/2018/12/01/Bert%E6%A8%A1%E5%9E%8B%E7%9A%84%E5%BE%AE%E8%B0%83/index.html