-
Notifications
You must be signed in to change notification settings - Fork 5
/
ucto_classes.pxd
42 lines (35 loc) · 1.13 KB
/
ucto_classes.pxd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#*****************************
# Python-ucto
# by Maarten van Gompel
# Centre for Language Studies
# Radboud University Nijmegen
#
# Licensed under GPLv3
#****************************/
from libcpp.string cimport string
from libcpp.vector cimport vector
from libcpp.set cimport set
from libcpp cimport bool
from libc.stdint cimport *
cdef extern from "ucto/tokenize.h" namespace "Tokenizer":
cdef cppclass Token:
string texttostring()
string typetostring()
int role
cdef cppclass TokenizerClass:
bool init(string & settingsfile) except +
bool setLowercase(bool)
bool setUppercase(bool)
bool setParagraphDetection(bool)
bool setQuoteDetection(bool)
bool setSentencePerLineOutput(bool)
bool setSentencePerLineInput(bool)
bool setXMLOutput(bool, string & docid)
bool setXMLInput(bool)
bool getLowercase()
bool getUppercase()
int setDebug(int)
void tokenize(string,string) nogil
int tokenizeLine(string &) nogil
vector[string] getUTF8Sentences() nogil
vector[Token] popSentence() nogil