SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DelimiterTokenizer.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Evangelos Anagnostopoulos
8  * Copyright (C) 2013 Evangelos Anagnostopoulos
9  */
10 
11 #ifndef _DELIMITERTOKENIZER__H__
12 #define _DELIMITERTOKENIZER__H__
13 
14 #include <shogun/lib/config.h>
15 
16 #include <shogun/lib/Tokenizer.h>
17 #include <shogun/lib/SGVector.h>
18 #include <shogun/lib/common.h>
19 
20 namespace shogun
21 {
22 
30 {
31 public:
36  CDelimiterTokenizer(bool skip_delimiters = false);
37 
43 
45  virtual ~CDelimiterTokenizer() {}
46 
51  virtual void set_text(SGVector<char> txt);
52 
58  virtual bool has_next();
59 
68  virtual index_t next_token_idx(index_t& start);
69 
75  virtual const char* get_name() const;
76 
80  void init_for_whitespace();
81 
83 
85  void clear_delimiters();
86 
91  bool get_skip_delimiters() const;
92 
97  void set_skip_delimiters(bool skip_delimiters);
98 
99 private:
100  void init();
101 
102 public:
105 
106 protected:
109 
112 };
113 }
114 #endif /* _WHITESPACETOKENIZER__H__ */
115 

SHOGUN Machine Learning Toolbox - Documentation