Source code for underthesea.ner

# -*- coding: utf-8 -*-
from underthesea import chunk
import sys

if sys.version_info >= (3, 0):
    from .model_crf import CRFNERPredictor
else:
    from model_crf import CRFNERPredictor


[docs]def ner(sentence, format=None):
    """
    Location and classify named entities in text

    Parameters
    ==========

    sentence: {unicode, str}
        raw sentence

    Returns
    =======
    tokens: list of tuple with word, pos tag, chunking tag, ner tag
        tagged sentence

    Examples
    --------

    >>> # -*- coding: utf-8 -*-
    >>> from underthesea import ner
    >>> sentence = "Ông Putin ca ngợi những thành tựu vĩ đại của Liên Xô"
    >>> ner(sentence)
    [('Ông', 'Nc', 'B-NP', 'O'),
    ('Putin', 'Np', 'B-NP', 'B-PER'),
    ('ca ngợi', 'V', 'B-VP', 'O'),
    ('những', 'L', 'B-NP', 'O'),
    ('thành tựu', 'N', 'B-NP', 'O'),
    ('vĩ đại', 'A', 'B-AP', 'O'),
    ('của', 'E', 'B-PP', 'O'),
    ('Liên Xô', 'Np', 'B-NP', 'B-LOC')]
    """
    sentence = chunk(sentence)
    crf_model = CRFNERPredictor.Instance()
    result = crf_model.predict(sentence, format)
    return result