Shortcuts

Source code for mmocr.datasets.pipelines.ner_transforms

# Copyright (c) OpenMMLab. All rights reserved.
import torch
from mmdet.datasets.builder import PIPELINES

from mmocr.models.builder import build_convertor


[docs]@PIPELINES.register_module() class NerTransform: """Convert text to ID and entity in ground truth to label ID. The masks and tokens are generated at the same time. The four parameters will be used as input to the model. Args: label_convertor: Convert text to ID and entity in ground truth to label ID. max_len (int): Limited maximum input length. """ def __init__(self, label_convertor, max_len): self.label_convertor = build_convertor(label_convertor) self.max_len = max_len def __call__(self, results): texts = results['text'] input_ids = self.label_convertor.convert_text2id(texts) labels = self.label_convertor.convert_entity2label( results['label'], len(texts)) attention_mask = [0] * self.max_len token_type_ids = [0] * self.max_len # The beginning and end IDs are added to the ID, # so the mask length is increased by 2 for i in range(len(texts) + 2): attention_mask[i] = 1 results = dict( labels=labels, texts=texts, input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) return results
[docs]@PIPELINES.register_module() class ToTensorNER: """Convert data with ``list`` type to tensor.""" def __call__(self, results): input_ids = torch.tensor(results['input_ids']) labels = torch.tensor(results['labels']) attention_masks = torch.tensor(results['attention_mask']) token_type_ids = torch.tensor(results['token_type_ids']) results = dict( img=[], img_metas=dict( input_ids=input_ids, attention_masks=attention_masks, labels=labels, token_type_ids=token_type_ids)) return results
Read the Docs v: v0.4.0
Versions
latest
stable
v0.4.0
v0.3.0
v0.2.1
v0.2.0
v0.1.0
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.