SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
KMeans.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Gunnar Raetsch
8  * Written (W) 2007-2009 Soeren Sonnenburg
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #ifndef _KMEANS_H__
13 #define _KMEANS_H__
14 
15 #include <shogun/lib/config.h>
16 
17 #include <shogun/lib/common.h>
18 #include <shogun/io/SGIO.h>
22 
23 namespace shogun
24 {
25 class CDistanceMachine;
26 
29 {
32 
33  /* Standard KMeans with Lloyds algorithm */
35 };
36 
57 class CKMeans : public CDistanceMachine
58 {
59  public:
61  CKMeans();
62 
69  CKMeans(int32_t k, CDistance* d, EKMeansMethod f);
70 
78  CKMeans(int32_t k, CDistance* d, bool kmeanspp=false, EKMeansMethod f=KMM_LLOYD);
79 
86  CKMeans(int32_t k_i, CDistance* d_i, SGMatrix<float64_t> centers_i, EKMeansMethod f=KMM_LLOYD);
87  virtual ~CKMeans();
88 
89 
91 
92 
97 
103  virtual bool load(FILE* srcfile);
104 
110  virtual bool save(FILE* dstfile);
111 
116  void set_k(int32_t p_k);
117 
122  int32_t get_k();
123 
128  void set_use_kmeanspp(bool kmpp);
129 
134  bool get_use_kmeanspp() const;
135 
140  void set_fixed_centers(bool fixed);
141 
146  bool get_fixed_centers();
147 
152  void set_max_iter(int32_t iter);
153 
159 
165 
171 
176  int32_t get_dimensions();
177 
179  virtual const char* get_name() const { return "KMeans"; }
180 
185  virtual void set_initial_centers(SGMatrix<float64_t> centers);
186 
192 
198 
203  void set_mini_batch_size(int32_t b);
204 
209  int32_t get_mini_batch_size() const;
210 
215  void set_mini_batch_num_iterations(int32_t t);
216 
221  int32_t get_mini_batch_num_iterations() const;
222 
228  void set_mini_batch_parameters(int32_t b, int32_t t);
229 
230  private:
239  virtual bool train_machine(CFeatures* data=NULL);
240 
242  virtual void store_model_features();
243 
244  virtual bool train_require_labels() const { return false; }
245 
250  SGMatrix<float64_t> kmeanspp();
251  void init();
252 
257  void set_random_centers();
258  void compute_cluster_variances();
259 
260  private:
262  int32_t max_iter;
263 
265  bool fixed_centers;
266 
268  int32_t k;
269 
271  int32_t dimensions;
272 
274  SGVector<float64_t> R;
275 
277  SGMatrix<float64_t> mus_initial;
278 
280  bool use_kmeanspp;
281 
283  int32_t batch_size;
284 
286  int32_t minib_iter;
287 
289  SGMatrix<float64_t> mus;
290 
292  EKMeansMethod train_method;
293 };
294 }
295 #endif
296 
virtual const char * get_name() const
Definition: KMeans.h:179
EMachineType
Definition: Machine.h:33
virtual bool save(FILE *dstfile)
Definition: KMeans.cpp:220
Class Distance, a base class for all the distances used in the Shogun toolbox.
Definition: Distance.h:87
EKMeansMethod
Definition: KMeans.h:28
void set_use_kmeanspp(bool kmpp)
Definition: KMeans.cpp:227
void set_k(int32_t p_k)
Definition: KMeans.cpp:237
int32_t get_dimensions()
Definition: KMeans.cpp:316
A generic DistanceMachine interface.
virtual ~CKMeans()
Definition: KMeans.cpp:69
bool get_use_kmeanspp() const
Definition: KMeans.cpp:232
SGVector< float64_t > get_radiuses()
Definition: KMeans.cpp:299
KMeans clustering, partitions the data into k (a-priori specified) clusters.
Definition: KMeans.h:57
#define MACHINE_PROBLEM_TYPE(PT)
Definition: Machine.h:120
float64_t get_max_iter()
Definition: KMeans.cpp:254
double float64_t
Definition: common.h:50
virtual bool load(FILE *srcfile)
Definition: KMeans.cpp:213
bool get_fixed_centers()
Definition: KMeans.cpp:326
int32_t get_mini_batch_size() const
Definition: KMeans.cpp:275
void set_max_iter(int32_t iter)
Definition: KMeans.cpp:248
void set_fixed_centers(bool fixed)
Definition: KMeans.cpp:321
void set_mini_batch_num_iterations(int32_t t)
Definition: KMeans.cpp:280
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual void set_initial_centers(SGMatrix< float64_t > centers)
Definition: KMeans.cpp:73
virtual EMachineType get_classifier_type()
Definition: KMeans.h:96
The class Features is the base class of all feature objects.
Definition: Features.h:68
void set_mini_batch_parameters(int32_t b, int32_t t)
Definition: KMeans.cpp:291
void set_train_method(EKMeansMethod f)
Definition: KMeans.cpp:259
int32_t get_mini_batch_num_iterations() const
Definition: KMeans.cpp:286
EKMeansMethod get_train_method() const
Definition: KMeans.cpp:264
void set_mini_batch_size(int32_t b)
Definition: KMeans.cpp:269
int32_t get_k()
Definition: KMeans.cpp:243
SGMatrix< float64_t > get_cluster_centers()
Definition: KMeans.cpp:304

SHOGUN Machine Learning Toolbox - Documentation