SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
NGramTokenizer.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Evangelos Anagnostopoulos
8  * Copyright (C) 2013 Evangelos Anagnostopoulos
9  */
10 
12 #include <shogun/lib/SGVector.h>
13 #include <shogun/base/Parameter.h>
14 
15 namespace shogun
16 {
17 
19 {
20  n = ns;
21  last_idx = 0;
22  init();
23 }
24 
26 : CTokenizer(orig)
27 {
29  n = orig.n;
30  init();
31 }
32 
33 void CNGramTokenizer::init()
34 {
35  SG_ADD(&n, "n", "Size of n-grams",
37  SG_ADD(&last_idx, "last_idx", "Index of last token",
39 }
40 
42 {
43  last_idx = 0;
45 }
46 
47 const char* CNGramTokenizer::get_name() const
48 {
49  return "NGramTokenizer";
50 }
51 
53 {
54  return last_idx<=text.size()-n;
55 }
56 
58 {
59  start = last_idx++;
60  return start + n;
61 }
62 
64 {
66  return t;
67 }
68 }

SHOGUN Machine Learning Toolbox - Documentation