SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Tokenizer.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Evangelos Anagnostopoulos
8  * Copyright (C) 2013 Evangelos Anagnostopoulos
9  */
10 
11 #ifndef _TOKENIZER__H__
12 #define _TOKENIZER__H__
13 
14 #include <shogun/lib/config.h>
15 
16 #include <shogun/base/SGObject.h>
17 #include <shogun/lib/SGString.h>
18 #include <shogun/lib/SGVector.h>
19 
20 namespace shogun
21 {
22 class CSGObject;
23 template<class T> class SGVector;
24 
29 class CTokenizer: public CSGObject
30 {
31 public:
33  CTokenizer();
34 
36  CTokenizer(const CTokenizer& orig);
37 
39  virtual ~CTokenizer() { };
40 
45  virtual void set_text(SGVector<char> txt);
46 
52  virtual bool has_next()=0;
53 
60  virtual index_t next_token_idx(index_t& start)=0;
61 
66  virtual CTokenizer* get_copy()=0;
67 
68 private:
69  void init();
70 
71 protected:
74 };
75 }
76 
77 #endif /* _TOKENIZER__H__ */

SHOGUN Machine Learning Toolbox - Documentation