SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SGInterface.cpp
Go to the documentation of this file.
4 
5 #include <shogun/lib/config.h>
6 #include <shogun/lib/DataType.h>
7 #include <shogun/lib/SGNDArray.h>
8 #include <shogun/lib/memory.h>
11 #include <shogun/lib/Hash.h>
12 #include <shogun/lib/Map.h>
13 #include <shogun/lib/Signal.h>
14 
31 
33 
34 #include <shogun/structure/Plif.h>
40 
41 #include <ctype.h>
42 
43 using namespace shogun;
44 
45 CSGInterface* interface=NULL;
47 
48 #if defined(HAVE_CMDLINE)
49 #define USAGE(method) "", ""
50 #define USAGE_I(method, in) "", " " in ""
51 #define USAGE_O(method, out) "" out " = ", ""
52 #define USAGE_IO(method, in, out) "" out " = ", " " in ""
53 #define USAGE_COMMA " "
54 #define USAGE_STR ""
55 #elif defined(HAVE_R)
56 #define USAGE(method) "sg('", "')"
57 #define USAGE_I(method, in) "sg('", "', " in ")"
58 #define USAGE_O(method, out) "[" out "] <- sg('", "')"
59 #define USAGE_IO(method, in, out) "[" out "] <- sg('", "', " in ")"
60 #define USAGE_COMMA ", "
61 #define USAGE_STR "'"
62 #else
63 #define USAGE(method) "sg('", "')"
64 #define USAGE_I(method, in) "sg('", "', " in ")"
65 #define USAGE_O(method, out) "[" out "]=sg('", "')"
66 #define USAGE_IO(method, in, out) "[" out "]=sg('", "', " in ")"
67 #define USAGE_COMMA ", "
68 #define USAGE_STR "'"
69 #endif
70 
71 CSGInterfaceMethod sg_methods[]=
72 {
73  { "Features", NULL, NULL, NULL },
74  {
75  N_PR_LOQO,
76  (&CSGInterface::cmd_pr_loqo),
78  "'Var1', Var1, 'Var2', Var2", "results")
79  },
80  {
82  (&CSGInterface::cmd_load_features),
84  "filename" USAGE_COMMA "feature_class" USAGE_COMMA "type" USAGE_COMMA "target[" USAGE_COMMA "size[" USAGE_COMMA "comp_features]]")
85  },
86  {
88  (&CSGInterface::cmd_save_features),
89  USAGE_I(N_SAVE_FEATURES, "filename" USAGE_COMMA "type" USAGE_COMMA "target")
90  },
91  {
93  (&CSGInterface::cmd_clean_features),
95  },
96  {
98  (&CSGInterface::cmd_get_features),
99  USAGE_IO(N_GET_FEATURES, USAGE_STR "TRAIN|TEST" USAGE_STR, "features")
100  },
101  {
103  (&CSGInterface::cmd_add_features),
105  USAGE_STR "TRAIN|TEST" USAGE_STR USAGE_COMMA "features[" USAGE_COMMA "DNABINFILE|<ALPHABET>]")
106  },
107  {
109  (&CSGInterface::cmd_add_multiple_features),
111  USAGE_STR "TRAIN|TEST" USAGE_STR USAGE_COMMA "repetitions" USAGE_COMMA "features[" USAGE_COMMA "DNABINFILE|<ALPHABET>]")
112  },
113  {
115  (&CSGInterface::cmd_add_dotfeatures),
117  USAGE_STR "TRAIN|TEST" USAGE_STR USAGE_COMMA "features[" USAGE_COMMA "DNABINFILE|<ALPHABET>]")
118  },
119  {
121  (&CSGInterface::cmd_set_features),
123  USAGE_STR "TRAIN|TEST" USAGE_STR
124  USAGE_COMMA "features["
125  USAGE_COMMA "DNABINFILE|<ALPHABET>]["
126  USAGE_COMMA "[from_position_list|slide_window]"
127  USAGE_COMMA "window size"
128  USAGE_COMMA "[position_list|shift]"
129  USAGE_COMMA "skip")
130  },
131  {
133  (&CSGInterface::cmd_set_reference_features),
135  },
136  {
138  (&CSGInterface::cmd_del_last_features),
140  },
141  {
142  N_CONVERT,
143  (&CSGInterface::cmd_convert),
144  USAGE_I(N_CONVERT, USAGE_STR "TRAIN|TEST" USAGE_STR
145  USAGE_COMMA "from_class"
146  USAGE_COMMA "from_type"
147  USAGE_COMMA "to_class"
148  USAGE_COMMA "to_type["
149  USAGE_COMMA "order"
150  USAGE_COMMA "start"
151  USAGE_COMMA "gap"
152  USAGE_COMMA "reversed]")
153  },
154  {
155  N_RESHAPE,
156  (&CSGInterface::cmd_reshape),
157  USAGE_I(N_RESHAPE, USAGE_STR "TRAIN|TEST"
158  USAGE_COMMA "num_feat"
159  USAGE_COMMA "num_vec")
160  },
161  {
163  (&CSGInterface::cmd_load_labels),
164  USAGE_I(N_LOAD_LABELS, "filename"
165  USAGE_COMMA USAGE_STR "TRAIN|TARGET" USAGE_STR)
166  },
167  {
168  N_SET_LABELS,
169  (&CSGInterface::cmd_set_labels),
171  USAGE_COMMA "labels")
172  },
173  {
174  N_GET_LABELS,
175  (&CSGInterface::cmd_get_labels),
176  USAGE_IO(N_GET_LABELS, USAGE_STR "TRAIN|TEST" USAGE_STR, "labels")
177  },
178 
179 
180  { "Kernel", NULL, NULL },
181  {
183  (&CSGInterface::cmd_set_kernel_normalization),
184  USAGE_I(N_SET_KERNEL_NORMALIZATION, "IDENTITY|AVGDIAG|SQRTDIAG|FIRSTELEMENT|VARIANCE|ZEROMEANCENTER"
185  USAGE_COMMA "size[" USAGE_COMMA "kernel-specific parameters]")
186  },
187  {
188  N_SET_KERNEL,
189  (&CSGInterface::cmd_set_kernel),
190  USAGE_I(N_SET_KERNEL, "type" USAGE_COMMA "size[" USAGE_COMMA "kernel-specific parameters]")
191  },
192  {
193  N_ADD_KERNEL,
194  (&CSGInterface::cmd_add_kernel),
195  USAGE_I(N_ADD_KERNEL, "weight" USAGE_COMMA "kernel-specific parameters")
196  },
197  {
199  (&CSGInterface::cmd_del_last_kernel),
201  },
202  {
204  (&CSGInterface::cmd_init_kernel),
206  },
207  {
209  (&CSGInterface::cmd_clean_kernel),
211  },
212  {
214  (&CSGInterface::cmd_save_kernel),
215  USAGE_I(N_SAVE_KERNEL, "filename" USAGE_COMMA USAGE_STR "TRAIN|TEST" USAGE_STR)
216  },
217  {
219  (&CSGInterface::cmd_get_kernel_matrix),
220  USAGE_IO(N_GET_KERNEL_MATRIX, "[" USAGE_STR "TRAIN|TEST" USAGE_STR, "K]")
221  },
222  {
224  (&CSGInterface::cmd_set_WD_position_weights),
226  },
227  {
229  (&CSGInterface::cmd_get_subkernel_weights),
231  },
232  {
234  (&CSGInterface::cmd_set_subkernel_weights),
236  },
237  {
239  (&CSGInterface::cmd_set_subkernel_weights_combined),
241  },
242  {
244  (&CSGInterface::cmd_get_dotfeature_weights_combined),
246  },
247  {
249  (&CSGInterface::cmd_set_dotfeature_weights_combined),
251  },
252  {
254  (&CSGInterface::cmd_set_last_subkernel_weights),
256  },
257  {
259  (&CSGInterface::cmd_get_WD_position_weights),
261  },
262  {
264  (&CSGInterface::cmd_get_last_subkernel_weights),
266  },
267  {
269  (&CSGInterface::cmd_compute_by_subkernels),
271  },
272  {
274  (&CSGInterface::cmd_init_kernel_optimization),
276  },
277  {
279  (&CSGInterface::cmd_get_kernel_optimization),
281  },
282  {
284  (&CSGInterface::cmd_delete_kernel_optimization),
286  },
287  {
289  (&CSGInterface::cmd_use_diagonal_speedup),
291  },
292  {
294  (&CSGInterface::cmd_set_kernel_optimization_type),
295  USAGE_I(N_SET_KERNEL_OPTIMIZATION_TYPE, USAGE_STR "FASTBUTMEMHUNGRY|SLOWBUTMEMEFFICIENT" USAGE_STR)
296  },
297  {
298  N_SET_SOLVER,
299  (&CSGInterface::cmd_set_solver),
300  USAGE_I(N_SET_SOLVER, USAGE_STR "AUTO|CPLEX|GLPK|INTERNAL" USAGE_STR)
301  },
302  {
304  (&CSGInterface::cmd_set_constraint_generator),
305  USAGE_I(N_SET_CONSTRAINT_GENERATOR, USAGE_STR "LIBSVM_ONECLASS|LIBSVM_MULTICLASS|LIBSVM"
306  "|SVMLIGHT|LIGHT|SVMLIGHT_ONECLASS|GPBTSVM|MPDSVM|GNPPSVM|GMNPSVM"
307  USAGE_STR)
308  },
309  {
311  (&CSGInterface::cmd_set_prior_probs),
312  USAGE_I(N_SET_PRIOR_PROBS, USAGE_STR "pos probs, neg_probs" USAGE_STR)
313  },
314  {
316  (&CSGInterface::cmd_set_prior_probs_from_labels),
318  },
319 #ifdef USE_SVMLIGHT
320  {
322  (&CSGInterface::cmd_resize_kernel_cache),
324  },
325 #endif //USE_SVMLIGHT
326 
327 
328  { "Distance", NULL, NULL },
329  {
331  (&CSGInterface::cmd_set_distance),
332  USAGE_I(N_SET_DISTANCE, "type" USAGE_COMMA "data type[" USAGE_COMMA "distance-specific parameters]")
333  },
334  {
336  (&CSGInterface::cmd_init_distance),
338  },
339  {
341  (&CSGInterface::cmd_get_distance_matrix),
343  },
344 
345 
346  { "Classifier", NULL, NULL },
347  {
348  N_CLASSIFY,
349  (&CSGInterface::cmd_classify),
350  USAGE_O(N_CLASSIFY, "result")
351  },
352  {
354  (&CSGInterface::cmd_classify),
355  USAGE_O(N_SVM_CLASSIFY, "result")
356  },
357  {
359  (&CSGInterface::cmd_classify_example),
360  USAGE_IO(N_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
361  },
362  {
364  (&CSGInterface::cmd_classify_example),
365  USAGE_IO(N_SVM_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
366  },
367  {
369  (&CSGInterface::cmd_get_classifier),
370  USAGE_IO(N_GET_CLASSIFIER, "[index in case of MultiClassSVM]", "bias" USAGE_COMMA "weights")
371  },
372  {
374  (&CSGInterface::cmd_get_classifier),
375  USAGE_O(N_GET_CLUSTERING, "radi" USAGE_COMMA "centers|merge_distances" USAGE_COMMA "pairs")
376  },
377  {
378  N_NEW_SVM,
379  (&CSGInterface::cmd_new_classifier),
380  USAGE_I(N_NEW_SVM, USAGE_STR "LIBSVM_ONECLASS|LIBSVM_MULTICLASS|LIBSVM"
381  "|SVMLIGHT|LIGHT|LIGHT_ONECLASS|SVMLIN|GPBTSVM|MPDSVM|GNPPSVM|GMNPSVM"
382  "|SUBGRADIENTSVM|WDSVMOCAS|SVMOCAS|SVMSGD|SVMBMRM|SVMPERF"
383  "|KERNELPERCEPTRON|PERCEPTRON|LIBLINEAR_LR|LIBLINEAR_L2|LDA"
384  "|LPM|LPBOOST|SUBGRADIENTLPM|KNN" USAGE_STR)
385  },
386  {
388  (&CSGInterface::cmd_new_classifier),
389  USAGE_I(N_NEW_CLASSIFIER, USAGE_STR "LIBSVM_ONECLASS|LIBSVM_MULTICLASS"
390  "|LIBSVM|SVMLIGHT|LIGHT|LIGHT_ONECLASS|SVMLIN|GPBTSVM|MPDSVM|GNPPSVM|GMNPSVM"
391  "|SUBGRADIENTSVM|WDSVMOCAS|SVMOCAS|SVMSGD|SVMBMRM|SVMPERF"
392  "|KERNELPERCEPTRON|PERCEPTRON|LIBLINEAR_LR|LIBLINEAR_L2|LDA"
393  "|LPM|LPBOOST|SUBGRADIENTLPM|KNN" USAGE_STR)
394  },
395  {
397  (&CSGInterface::cmd_new_classifier),
398  USAGE_I(N_NEW_REGRESSION, USAGE_STR "SVRLIGHT|LIBSVR|KRR" USAGE_STR)
399  },
400  {
402  (&CSGInterface::cmd_new_classifier),
403  USAGE_I(N_NEW_CLUSTERING, USAGE_STR "KMEANS|HIERARCHICAL" USAGE_STR)
404  },
405  {
407  (&CSGInterface::cmd_load_classifier),
408  USAGE_O(N_LOAD_CLASSIFIER, "filename" USAGE_COMMA "type")
409  },
410  {
412  (&CSGInterface::cmd_save_classifier),
413  USAGE_I(N_SAVE_CLASSIFIER, "filename")
414  },
415  {
417  (&CSGInterface::cmd_get_num_svms),
418  USAGE_O(N_GET_NUM_SVMS, "number of SVMs in MultiClassSVM")
419  },
420  {
421  N_GET_SVM,
422  (&CSGInterface::cmd_get_svm),
423  USAGE_IO(N_GET_SVM, "[index in case of MultiClassSVM]", "bias" USAGE_COMMA "alphas")
424  },
425  {
426  N_SET_SVM,
427  (&CSGInterface::cmd_set_svm),
428  USAGE_I(N_SET_SVM, "bias" USAGE_COMMA "alphas")
429  },
430  {
432  (&CSGInterface::cmd_set_linear_classifier),
434  },
435  {
437  (&CSGInterface::cmd_get_svm_objective),
438  USAGE_O(N_GET_SVM_OBJECTIVE, "objective")
439  },
440  {
442  (&CSGInterface::cmd_compute_svm_primal_objective),
444  },
445  {
447  (&CSGInterface::cmd_compute_svm_dual_objective),
449  },
450  {
452  (&CSGInterface::cmd_compute_svm_primal_objective),
454  },
455  {
457  (&CSGInterface::cmd_compute_mkl_dual_objective),
459  },
460  {
462  (&CSGInterface::cmd_compute_relative_mkl_duality_gap),
464  },
465  {
467  (&CSGInterface::cmd_compute_absolute_mkl_duality_gap),
469  },
470  {
472  (&CSGInterface::cmd_do_auc_maximization),
474  },
475  {
477  (&CSGInterface::cmd_set_perceptron_parameters),
478  USAGE_I(N_SET_PERCEPTRON_PARAMETERS, "learnrate" USAGE_COMMA "maxiter")
479  },
480  {
482  (&CSGInterface::cmd_train_classifier),
483  USAGE_I(N_TRAIN_CLASSIFIER, "[classifier-specific parameters]")
484  },
485  {
487  (&CSGInterface::cmd_train_classifier),
489  },
490  {
492  (&CSGInterface::cmd_train_classifier),
494  },
495  {
496  N_SVM_TRAIN,
497  (&CSGInterface::cmd_train_classifier),
498  USAGE_I(N_SVM_TRAIN, "[classifier-specific parameters]")
499  },
500  {
501  N_SVMQPSIZE,
502  (&CSGInterface::cmd_set_svm_qpsize),
503  USAGE_I(N_SVMQPSIZE, "size")
504  },
505  {
507  (&CSGInterface::cmd_set_svm_max_qpsize),
508  USAGE_I(N_SVMMAXQPSIZE, "size")
509  },
510  {
511  N_SVMBUFSIZE,
512  (&CSGInterface::cmd_set_svm_bufsize),
513  USAGE_I(N_SVMBUFSIZE, "size")
514  },
515  {
516  N_C,
517  (&CSGInterface::cmd_set_svm_C),
518  USAGE_I(N_C, "C1[" USAGE_COMMA "C2]")
519  },
520  {
522  (&CSGInterface::cmd_set_svm_epsilon),
523  USAGE_I(N_SVM_EPSILON, "epsilon")
524  },
525  {
527  (&CSGInterface::cmd_set_svr_tube_epsilon),
528  USAGE_I(N_SVR_TUBE_EPSILON, "tube_epsilon")
529  },
530  {
531  N_SVM_NU,
532  (&CSGInterface::cmd_set_svm_nu),
533  USAGE_I(N_SVM_NU, "nu")
534  },
535  {
537  (&CSGInterface::cmd_set_svm_mkl_parameters),
538  USAGE_I(N_MKL_PARAMETERS, "weight_epsilon" USAGE_COMMA "C_MKL [" USAGE_COMMA "mkl_norm ]")
539  },
540  {
541  N_ENT_LAMBDA,
542  (&CSGInterface::cmd_set_elasticnet_lambda),
543  USAGE_I(N_ENT_LAMBDA, "ent_lambda")
544  },
545  {
547  (&CSGInterface::cmd_set_mkl_block_norm),
548  USAGE_I(N_MKL_BLOCK_NORM, "mkl_block_norm")
549  },
550  {
552  (&CSGInterface::cmd_set_max_train_time),
553  USAGE_I(N_SVM_MAX_TRAIN_TIME, "max_train_time")
554  },
555  {
557  (&CSGInterface::cmd_set_svm_shrinking_enabled),
558  USAGE_I(N_USE_SHRINKING, "enable_shrinking")
559  },
560  {
562  (&CSGInterface::cmd_set_svm_batch_computation_enabled),
563  USAGE_I(N_USE_BATCH_COMPUTATION, "enable_batch_computation")
564  },
565  {
566  N_USE_LINADD,
567  (&CSGInterface::cmd_set_svm_linadd_enabled),
568  USAGE_I(N_USE_LINADD, "enable_linadd")
569  },
570  {
572  (&CSGInterface::cmd_set_svm_bias_enabled),
573  USAGE_I(N_SVM_USE_BIAS, "enable_bias")
574  },
575  {
577  (&CSGInterface::cmd_set_mkl_interleaved_enabled),
578  USAGE_I(N_MKL_USE_INTERLEAVED_OPTIMIZATION, "enable_interleaved_optimization")
579  },
580  {
581  N_KRR_TAU,
582  (&CSGInterface::cmd_set_krr_tau),
583  USAGE_I(N_KRR_TAU, "tau")
584  },
585 
586 
587  { "Preprocessors", NULL, NULL },
588  {
590  (&CSGInterface::cmd_add_preproc),
591  USAGE_I(N_ADD_PREPROC, "preproc[, preproc-specific parameters]")
592  },
593  {
595  (&CSGInterface::cmd_del_preproc),
597  },
598  {
600  (&CSGInterface::cmd_attach_preproc),
602  },
603  {
605  (&CSGInterface::cmd_clean_preproc),
607  },
608 
609  { "Converters", NULL, NULL },
610  {
612  (&CSGInterface::cmd_set_converter),
614  },
615  {
616  N_EMBED,
617  (&CSGInterface::cmd_embed),
618  USAGE_IO(N_EMBED,"target dim","embedding")
619  },
620 
621 
622  { "HMM", NULL, NULL },
623  {
624  N_NEW_HMM,
625  (&CSGInterface::cmd_new_hmm),
626  USAGE_I(N_NEW_HMM, "N" USAGE_COMMA "M")
627  },
628  {
629  N_LOAD_HMM,
630  (&CSGInterface::cmd_load_hmm),
631  USAGE_I(N_LOAD_HMM, "filename")
632  },
633  {
634  N_SAVE_HMM,
635  (&CSGInterface::cmd_save_hmm),
636  USAGE_I(N_SAVE_HMM, "filename[" USAGE_COMMA "save_binary]")
637  },
638  {
639  N_GET_HMM,
640  (&CSGInterface::cmd_get_hmm),
642  },
643  {
644  N_APPEND_HMM,
645  (&CSGInterface::cmd_append_hmm),
647  },
648  {
650  (&CSGInterface::cmd_append_model),
651  USAGE_I(N_APPEND_MODEL, USAGE_STR "filename" USAGE_STR "[" USAGE_COMMA "base1" USAGE_COMMA "base2]")
652  },
653  {
654  N_SET_HMM,
655  (&CSGInterface::cmd_set_hmm),
657  },
658  {
659  N_SET_HMM_AS,
660  (&CSGInterface::cmd_set_hmm_as),
661  USAGE_I(N_SET_HMM_AS, "POS|NEG|TEST")
662  },
663  {
664  N_CHOP,
665  (&CSGInterface::cmd_set_chop),
666  USAGE_I(N_CHOP, "chop")
667  },
668  {
669  N_PSEUDO,
670  (&CSGInterface::cmd_set_pseudo),
671  USAGE_I(N_PSEUDO, "pseudo")
672  },
673  {
675  (&CSGInterface::cmd_load_definitions),
676  USAGE_I(N_LOAD_DEFINITIONS, "filename" USAGE_COMMA "init")
677  },
678  {
680  (&CSGInterface::cmd_hmm_classify),
681  USAGE_O(N_HMM_CLASSIFY, "result")
682  },
683  {
685  (&CSGInterface::cmd_one_class_linear_hmm_classify),
687  },
688  {
690  (&CSGInterface::cmd_one_class_hmm_classify),
692  },
693  {
695  (&CSGInterface::cmd_one_class_hmm_classify_example),
696  USAGE_IO(N_ONE_CLASS_HMM_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
697  },
698  {
700  (&CSGInterface::cmd_hmm_classify_example),
701  USAGE_IO(N_HMM_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
702  },
703  {
704  N_OUTPUT_HMM,
705  (&CSGInterface::cmd_output_hmm),
707  },
708  {
710  (&CSGInterface::cmd_output_hmm_defined),
712  },
713  {
715  (&CSGInterface::cmd_hmm_likelihood),
716  USAGE_O(N_HMM_LIKELIHOOD, "likelihood")
717  },
718  {
719  N_LIKELIHOOD,
720  (&CSGInterface::cmd_likelihood),
722  },
723  {
725  (&CSGInterface::cmd_save_likelihood),
726  USAGE_I(N_SAVE_LIKELIHOOD, "filename[" USAGE_COMMA "save_binary]")
727  },
728  {
730  (&CSGInterface::cmd_get_viterbi_path),
731  USAGE_IO(N_GET_VITERBI_PATH, "dim", "path" USAGE_COMMA "likelihood")
732  },
733  {
735  (&CSGInterface::cmd_viterbi_train_defined),
737  },
738  {
740  (&CSGInterface::cmd_viterbi_train),
742  },
743  {
745  (&CSGInterface::cmd_baum_welch_train),
747  },
748  {
750  (&CSGInterface::cmd_baum_welch_train_defined),
752  },
753  {
755  (&CSGInterface::cmd_baum_welch_trans_train),
757  },
758  {
760  (&CSGInterface::cmd_linear_train),
762  },
763  {
764  N_SAVE_PATH,
765  (&CSGInterface::cmd_save_path),
766  USAGE_I(N_SAVE_PATH, "filename[" USAGE_COMMA "save_binary]")
767  },
768  {
770  (&CSGInterface::cmd_convergence_criteria),
771  USAGE_I(N_CONVERGENCE_CRITERIA, "num_iterations" USAGE_COMMA "epsilon")
772  },
773  {
774  N_NORMALIZE,
775  (&CSGInterface::cmd_normalize),
776  USAGE_I(N_NORMALIZE, "[keep_dead_states]")
777  },
778  {
779  N_ADD_STATES,
780  (&CSGInterface::cmd_add_states),
781  USAGE_I(N_ADD_STATES, "states" USAGE_COMMA "value")
782  },
783  {
785  (&CSGInterface::cmd_permutation_entropy),
786  USAGE_I(N_PERMUTATION_ENTROPY, "width" USAGE_COMMA "seqnum")
787  },
788  {
790  (&CSGInterface::cmd_relative_entropy),
791  USAGE_O(N_RELATIVE_ENTROPY, "result")
792  },
793  {
794  N_ENTROPY,
795  (&CSGInterface::cmd_entropy),
796  USAGE_O(N_ENTROPY, "result")
797  },
798  {
799  (char*) N_SET_FEATURE_MATRIX,
800  (&CSGInterface::cmd_set_feature_matrix),
801  (char*) USAGE_I(N_SET_FEATURE_MATRIX, "features")
802  },
803  {
805  (&CSGInterface::cmd_set_feature_matrix_sparse),
806  (char*) USAGE_I(N_SET_FEATURE_MATRIX_SPARSE, "sp1" USAGE_COMMA "sp2" )
807  },
808  {
810  (&CSGInterface::cmd_new_plugin_estimator),
811  USAGE_I(N_NEW_PLUGIN_ESTIMATOR, "pos_pseudo" USAGE_COMMA "neg_pseudo")
812  },
813  {
815  (&CSGInterface::cmd_train_estimator),
817  },
818  {
820  (&CSGInterface::cmd_plugin_estimate_classify_example),
821  USAGE_IO(N_PLUGIN_ESTIMATE_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
822  },
823  {
825  (&CSGInterface::cmd_plugin_estimate_classify),
827  },
828  {
830  (&CSGInterface::cmd_set_plugin_estimate),
831  USAGE_I(N_SET_PLUGIN_ESTIMATE, "emission_probs" USAGE_COMMA "model_sizes")
832  },
833  {
835  (&CSGInterface::cmd_get_plugin_estimate),
836  USAGE_O(N_GET_PLUGIN_ESTIMATE, "emission_probs" USAGE_COMMA "model_sizes")
837  },
838  { "Signals", NULL, NULL },
839  {
841  (&CSGInterface::cmd_signals_set_model),
843  },
844  {
846  (&CSGInterface::cmd_signals_set_positions),
847  USAGE_I(N_SIGNALS_SET_POSITIONS, "positions")
848  },
849  {
851  (&CSGInterface::cmd_signals_set_labels),
852  USAGE_I(N_SIGNALS_SET_LABELS, "labels")
853  },
854  {
856  (&CSGInterface::cmd_signals_set_split),
857  USAGE_I(N_SIGNALS_SET_SPLIT, "split")
858  },
859  {
861  (&CSGInterface::cmd_signals_set_train_mask),
863  },
864  {
866  (&CSGInterface::cmd_signals_add_feature),
867  USAGE_I(N_SIGNALS_ADD_FEATURE, "feature")
868  },
869  {
871  (&CSGInterface::cmd_signals_add_kernel),
872  USAGE_I(N_SIGNALS_ADD_KERNEL, "kernelparam")
873  },
874  {
876  (&CSGInterface::cmd_signals_run),
877  USAGE_I(N_SIGNALS_RUN, "arg1")
878  },
879  { "Structure", NULL, NULL },
880  {
881  N_BEST_PATH,
882  (&CSGInterface::cmd_best_path),
883  USAGE_I(N_BEST_PATH, "from" USAGE_COMMA "to")
884  },
885  {
887  (&CSGInterface::cmd_best_path_2struct),
889  USAGE_COMMA "q"
890  USAGE_COMMA "cmd_trans"
891  USAGE_COMMA "seq"
892  USAGE_COMMA "pos"
893  USAGE_COMMA "genestr"
894  USAGE_COMMA "penalties"
895  USAGE_COMMA "penalty_info"
896  USAGE_COMMA "nbest"
897  USAGE_COMMA "content_weights"
898  USAGE_COMMA "segment_sum_weights",
899  "prob" USAGE_COMMA "path" USAGE_COMMA "pos")
900  },
901  {
902  (char*) N_SET_PLIF_STRUCT,
903  (&CSGInterface::cmd_set_plif_struct),
904  (char*) USAGE_I(N_SET_PLIF_STRUCT, "id"
905  USAGE_COMMA "name"
906  USAGE_COMMA "limits"
907  USAGE_COMMA "penalties"
908  USAGE_COMMA "transform"
909  USAGE_COMMA "min_value"
910  USAGE_COMMA "max_value"
911  USAGE_COMMA "use_cache"
912  USAGE_COMMA "use_svm")
913  },
914  {
915  (char*) N_GET_PLIF_STRUCT,
916  (&CSGInterface::cmd_get_plif_struct),
917  (char*) USAGE_O(N_GET_PLIF_STRUCT, "id"
918  USAGE_COMMA "name"
919  USAGE_COMMA "limits"
920  USAGE_COMMA "penalties"
921  USAGE_COMMA "transform"
922  USAGE_COMMA "min_value"
923  USAGE_COMMA "max_value"
924  USAGE_COMMA "use_cache"
925  USAGE_COMMA "use_svm")
926  },
927  {
928  (char*) N_PRECOMPUTE_SUBKERNELS,
929  (&CSGInterface::cmd_precompute_subkernels),
931  },
932  {
934  (&CSGInterface::cmd_precompute_content_svms),
935  (char*) USAGE_I(N_PRECOMPUTE_CONTENT_SVMS, "sequence"
936  USAGE_COMMA "position_list"
937  USAGE_COMMA "weights")
938  },
939  {
940  (char*) N_GET_LIN_FEAT,
941  (&CSGInterface::cmd_get_lin_feat),
942  (char*) USAGE_O(N_GET_LIN_FEAT, "lin_feat")
943  },
944  {
945  (char*) N_SET_LIN_FEAT,
946  (&CSGInterface::cmd_set_lin_feat),
947  (char*) USAGE_I(N_SET_LIN_FEAT, "lin_feat")
948  },
949  {
950  (char*) N_INIT_DYN_PROG,
951  (&CSGInterface::cmd_init_dyn_prog),
952  (char*) USAGE_I(N_INIT_DYN_PROG, "num_svms")
953  },
954  {
955  (char*) N_CLEAN_UP_DYN_PROG,
956  (&CSGInterface::cmd_clean_up_dyn_prog),
957  (char*) USAGE(N_CLEAN_UP_DYN_PROG)
958  },
959  {
960  (char*) N_INIT_INTRON_LIST,
961  (&CSGInterface::cmd_init_intron_list),
962  (char*) USAGE_I(N_INIT_INTRON_LIST, "start_positions"
963  USAGE_COMMA "end_positions"
964  USAGE_COMMA "quality")
965  },
966  {
968  (&CSGInterface::cmd_precompute_tiling_features),
969  (char*) USAGE_I(N_PRECOMPUTE_TILING_FEATURES, "intensities"
970  USAGE_COMMA "probe_pos"
971  USAGE_COMMA "tiling_plif_ids")
972  },
973  {
975  (&CSGInterface::cmd_long_transition_settings),
976  (char*) USAGE_I(N_LONG_TRANSITION_SETTINGS, "use_long_transitions"
977  USAGE_COMMA "threshold"
978  USAGE_COMMA "max_len")
979  },
980 
981  {
982  (char*) N_SET_MODEL,
983  (&CSGInterface::cmd_set_model),
984  (char*) USAGE_I(N_SET_MODEL, "content_weights"
985  USAGE_COMMA "transition_pointers"
986  USAGE_COMMA "use_orf"
987  USAGE_COMMA "mod_words")
988  },
989 
990  {
991  (char*) N_BEST_PATH_TRANS,
992  (&CSGInterface::cmd_best_path_trans),
994  USAGE_COMMA "q"
995  USAGE_COMMA "nbest"
996  USAGE_COMMA "seq_path"
997  USAGE_COMMA "a_trans"
998  USAGE_COMMA "segment_loss",
999  "prob" USAGE_COMMA "path" USAGE_COMMA "pos")
1000  },
1001  {
1003  (&CSGInterface::cmd_best_path_trans_deriv),
1005  USAGE_COMMA "my_path"
1006  USAGE_COMMA "my_pos"
1007  USAGE_COMMA "p"
1008  USAGE_COMMA "q"
1009  USAGE_COMMA "cmd_trans"
1010  USAGE_COMMA "seq"
1011  USAGE_COMMA "pos"
1012  USAGE_COMMA "genestr"
1013  USAGE_COMMA "penalties"
1014  USAGE_COMMA "state_signals"
1015  USAGE_COMMA "penalty_info"
1016  USAGE_COMMA "dict_weights"
1017  USAGE_COMMA "mod_words ["
1018  USAGE_COMMA "segment_loss"
1019  USAGE_COMMA "segmend_ids_mask]", "p_deriv"
1020  USAGE_COMMA "q_deriv"
1021  USAGE_COMMA "cmd_deriv"
1022  USAGE_COMMA "penalties_deriv"
1023  USAGE_COMMA "my_scores"
1024  USAGE_COMMA "my_loss")
1025  },
1026 
1027  { "POIM", NULL, NULL },
1028  {
1030  (&CSGInterface::cmd_compute_POIM_WD),
1031  USAGE_IO(N_COMPUTE_POIM_WD, "max_order" USAGE_COMMA "distribution", "W")
1032  },
1033  {
1035  (&CSGInterface::cmd_get_SPEC_consensus),
1037  },
1038  {
1040  (&CSGInterface::cmd_get_SPEC_scoring),
1041  USAGE_IO(N_GET_SPEC_SCORING, "max_order", "W")
1042  },
1043  {
1045  (&CSGInterface::cmd_get_WD_consensus),
1047  },
1048  {
1050  (&CSGInterface::cmd_get_WD_scoring),
1051  USAGE_IO(N_GET_WD_SCORING, "max_order", "W")
1052  },
1053 
1054 
1055  { "Utility", NULL, NULL },
1056  {
1057  N_CRC,
1058  (&CSGInterface::cmd_crc),
1059  USAGE_IO(N_CRC, "string", "crc32")
1060  },
1061  {
1062  N_SYSTEM,
1063  (&CSGInterface::cmd_system),
1064  USAGE_I(N_SYSTEM, "system_command")
1065  },
1066  {
1067  N_EXIT,
1068  (&CSGInterface::cmd_exit),
1069  USAGE(N_EXIT)
1070  },
1071  {
1072  N_QUIT,
1073  (&CSGInterface::cmd_exit),
1074  USAGE(N_QUIT)
1075  },
1076  {
1077  N_EXEC,
1078  (&CSGInterface::cmd_exec),
1079  USAGE_I(N_EXEC, "filename")
1080  },
1081  {
1082  N_SET_OUTPUT,
1083  (&CSGInterface::cmd_set_output),
1084  USAGE_I(N_SET_OUTPUT, USAGE_STR "STDERR|STDOUT|filename" USAGE_STR)
1085  },
1086  {
1088  (&CSGInterface::cmd_set_threshold),
1089  USAGE_I(N_SET_THRESHOLD, "threshold")
1090  },
1091  {
1092  N_INIT_RANDOM,
1093  (&CSGInterface::cmd_init_random),
1094  USAGE_I(N_INIT_RANDOM, "value_to_initialize_RNG_with")
1095  },
1096  {
1097  N_THREADS,
1098  (&CSGInterface::cmd_set_num_threads),
1099  USAGE_I(N_THREADS, "num_threads")
1100  },
1101  {
1103  (&CSGInterface::cmd_translate_string),
1105  "string, order, start", "translation")
1106  },
1107  {
1108  N_CLEAR,
1109  (&CSGInterface::cmd_clear),
1110  USAGE(N_CLEAR)
1111  },
1112  {
1113  N_TIC,
1114  (&CSGInterface::cmd_tic),
1115  USAGE(N_TIC)
1116  },
1117  {
1118  N_TOC,
1119  (&CSGInterface::cmd_toc),
1120  USAGE(N_TOC)
1121  },
1122  {
1123  N_PRINT,
1124  (&CSGInterface::cmd_print),
1125  USAGE_I(N_PRINT, "msg")
1126  },
1127  {
1128  N_ECHO,
1129  (&CSGInterface::cmd_echo),
1130  USAGE_I(N_ECHO, "level")
1131  },
1132  {
1133  N_LOGLEVEL,
1134  (&CSGInterface::cmd_loglevel),
1135  USAGE_I(N_LOGLEVEL, USAGE_STR "ALL|DEBUG|INFO|NOTICE|WARN|ERROR|CRITICAL|ALERT|EMERGENCY" USAGE_STR)
1136  },
1137  {
1139  (&CSGInterface::cmd_syntax_highlight),
1141  },
1142  {
1143  N_PROGRESS,
1144  (&CSGInterface::cmd_progress),
1146  },
1147  {
1148  N_GET_VERSION,
1149  (&CSGInterface::cmd_get_version),
1150  USAGE_O(N_GET_VERSION, "version")
1151  },
1152  {
1153  N_HELP,
1154  (&CSGInterface::cmd_help),
1155  USAGE(N_HELP)
1156  },
1157  {
1158  N_WHOS,
1159  (&CSGInterface::cmd_whos),
1160  USAGE(N_WHOS)
1161  },
1162  {
1164  (&CSGInterface::cmd_send_command),
1165  NULL
1166  },
1167  {
1168  N_RUN_PYTHON,
1169  (&CSGInterface::cmd_run_python),
1171  "'Var1', Var1, 'Var2', Var2,..., python_function", "results")
1172  },
1173  {
1174  N_RUN_OCTAVE,
1175  (&CSGInterface::cmd_run_octave),
1177  "'Var1', Var1, 'Var2', Var2,..., octave_function", "results")
1178  },
1179  {
1180  N_RUN_R,
1181  (&CSGInterface::cmd_run_r),
1182  USAGE_IO(N_RUN_R,
1183  "'Var1', Var1, 'Var2', Var2,..., r_function", "results")
1184  },
1185  {NULL, NULL, NULL} /* Sentinel */
1186 };
1187 
1188 
1189 CSGInterface::CSGInterface(bool print_copyright)
1190 : CSGObject(),
1191  ui_classifier(new CGUIClassifier(this)),
1192  ui_distance(new CGUIDistance(this)),
1193  ui_features(new CGUIFeatures(this)),
1194  ui_hmm(new CGUIHMM(this)),
1195  ui_kernel(new CGUIKernel(this)),
1196  ui_labels(new CGUILabels(this)),
1197  ui_math(new CGUIMath(this)),
1198  ui_pluginestimate(new CGUIPluginEstimate(this)),
1199  ui_preproc(new CGUIPreprocessor(this)),
1200  ui_time(new CGUITime(this)),
1201  ui_structure(new CGUIStructure(this)),
1202  ui_converter(new CGUIConverter(this))/*,
1203 / ui_signals(new CGUISignals(this))*/
1204 {
1205  if (print_copyright)
1206  {
1207  version->print_version();
1208  SG_PRINT("( seeding random number generator with %u (seed size %d))\n",
1210 #ifdef USE_LOGCACHE
1211  SG_PRINT( "initializing log-table (size=%i*%i*%i=%2.1fMB) ... ) ",
1212  CMath::get_log_range(),CMath::get_log_accuracy(),sizeof(float64_t),
1213  CMath::get_log_range()*CMath::get_log_accuracy()*sizeof(float64_t)/(1024.0*1024.0));
1214 #else
1215  SG_PRINT("determined range for x in log(1+exp(-x)) is:%d )\n", CMath::get_log_range());
1216 #endif
1217  }
1218 
1219  reset();
1220 }
1221 
1222 CSGInterface::~CSGInterface()
1223 {
1224  delete ui_classifier;
1225  delete ui_hmm;
1226  delete ui_pluginestimate;
1227  delete ui_kernel;
1228  delete ui_preproc;
1229  delete ui_features;
1230  delete ui_labels;
1231  delete ui_math;
1232  delete ui_structure;
1233  //delete ui_signals;
1234  delete ui_time;
1235  delete ui_distance;
1236  delete ui_converter;
1237 
1238  if (file_out)
1239  fclose(file_out);
1240 }
1241 
1242 void CSGInterface::reset()
1243 {
1244  m_lhs_counter=0;
1245  m_rhs_counter=0;
1246  m_nlhs=0;
1247  m_nrhs=0;
1248  m_legacy_strptr=NULL;
1249  file_out=NULL;
1250  echo=true;
1251 }
1252 
1253 void CSGInterface::translate_arg(CSGInterface* source, CSGInterface* target)
1254 {
1255  switch (source->get_argument_type())
1256  {
1257  case SCALAR_INT:
1258  target->set_int(source->get_int());
1259  break;
1260  case SCALAR_REAL:
1261  target->set_real(source->get_real());
1262  break;
1263  case SCALAR_BOOL:
1264  target->set_bool(source->get_bool());
1265  break;
1266  case VECTOR_BOOL:
1267  {
1268  bool* v=NULL;
1269  int32_t len=0;
1270  source->get_vector(v, len);
1271  target->set_vector(v, len);
1272  SG_FREE(v);
1273  break;
1274  }
1275  case VECTOR_BYTE:
1276  {
1277  uint8_t* v=NULL;
1278  int32_t len=0;
1279  source->get_vector(v, len);
1280  target->set_vector(v, len);
1281  SG_FREE(v);
1282  break;
1283  }
1284  case VECTOR_CHAR:
1285  {
1286  char* v=NULL;
1287  int32_t len=0;
1288  source->get_vector(v, len);
1289  target->set_vector(v, len);
1290  SG_FREE(v);
1291  break;
1292  }
1293  case VECTOR_INT:
1294  {
1295  int32_t* v=NULL;
1296  int32_t len=0;
1297  source->get_vector(v, len);
1298  target->set_vector(v, len);
1299  SG_FREE(v);
1300  break;
1301  }
1302  case VECTOR_REAL:
1303  {
1304  float64_t* v=NULL;
1305  int32_t len=0;
1306  source->get_vector(v, len);
1307  target->set_vector(v, len);
1308  SG_FREE(v);
1309  break;
1310  }
1311  case VECTOR_SHORTREAL:
1312  {
1313  float32_t* v=NULL;
1314  int32_t len=0;
1315  source->get_vector(v, len);
1316  target->set_vector(v, len);
1317  SG_FREE(v);
1318  break;
1319  }
1320  case VECTOR_SHORT:
1321  {
1322  int16_t* v=NULL;
1323  int32_t len=0;
1324  source->get_vector(v, len);
1325  target->set_vector(v, len);
1326  SG_FREE(v);
1327  break;
1328  }
1329  case VECTOR_WORD:
1330  {
1331  uint16_t* v=NULL;
1332  int32_t len=0;
1333  source->get_vector(v, len);
1334  target->set_vector(v, len);
1335  SG_FREE(v);
1336  break;
1337  }
1338 
1339  case STRING_BYTE:
1340  {
1341  int32_t num_str=0;
1342  int32_t max_str_len=0;
1343  SGString<uint8_t>* strs=NULL;
1344  source->get_string_list(strs, num_str, max_str_len);
1345  target->set_string_list(strs, num_str);
1346  SG_FREE(strs);
1347  break;
1348  }
1349  case STRING_CHAR:
1350  {
1351  int32_t num_str=0;
1352  int32_t max_str_len=0;
1353  SGString<char>* strs;
1354  source->get_string_list(strs, num_str,max_str_len);
1355  target->set_string_list(strs, num_str);
1356  SG_FREE(strs);
1357  break;
1358  }
1359  case STRING_INT:
1360  {
1361  int32_t num_str=0;
1362  int32_t max_str_len=0;
1363  SGString<int32_t>* strs;
1364  source->get_string_list(strs, num_str,max_str_len);
1365  target->set_string_list(strs, num_str);
1366  SG_FREE(strs);
1367  break;
1368  }
1369  case STRING_SHORT:
1370  {
1371  int32_t num_str=0;
1372  int32_t max_str_len=0;
1373  SGString<int16_t>* strs=NULL;
1374  source->get_string_list(strs, num_str, max_str_len);
1375  target->set_string_list(strs, num_str);
1376  SG_FREE(strs);
1377  break;
1378  }
1379  case STRING_WORD:
1380  {
1381  int32_t num_str=0;
1382  int32_t max_str_len=0;
1383  SGString<uint16_t>* strs=NULL;
1384  source->get_string_list(strs, num_str, max_str_len);
1385  target->set_string_list(strs, num_str);
1386  SG_FREE(strs);
1387  break;
1388  }
1389  case DENSE_INT:
1390  {
1391  int32_t num_feat=0;
1392  int32_t num_vec=0;
1393  int32_t* fmatrix=NULL;
1394  source->get_matrix(fmatrix, num_feat, num_vec);
1395  target->set_matrix(fmatrix, num_feat, num_vec);
1396  SG_FREE(fmatrix);
1397  break;
1398  }
1399  case DENSE_REAL:
1400  {
1401  int32_t num_feat=0;
1402  int32_t num_vec=0;
1403  float64_t* fmatrix=NULL;
1404  source->get_matrix(fmatrix, num_feat, num_vec);
1405  target->set_matrix(fmatrix, num_feat, num_vec);
1406  SG_FREE(fmatrix);
1407  break;
1408  }
1409  case DENSE_SHORT:
1410  {
1411  int32_t num_feat=0;
1412  int32_t num_vec=0;
1413  int16_t* fmatrix=NULL;
1414  source->get_matrix(fmatrix, num_feat, num_vec);
1415  target->set_matrix(fmatrix, num_feat, num_vec);
1416  SG_FREE(fmatrix);
1417  break;
1418  }
1419  case DENSE_SHORTREAL:
1420  {
1421  int32_t num_feat=0;
1422  int32_t num_vec=0;
1423  float32_t* fmatrix=NULL;
1424  source->get_matrix(fmatrix, num_feat, num_vec);
1425  target->set_matrix(fmatrix, num_feat, num_vec);
1426  SG_FREE(fmatrix);
1427  break;
1428  }
1429  case DENSE_WORD:
1430  {
1431  int32_t num_feat=0;
1432  int32_t num_vec=0;
1433  uint16_t* fmatrix=NULL;
1434  source->get_matrix(fmatrix, num_feat, num_vec);
1435  target->set_matrix(fmatrix, num_feat, num_vec);
1436  SG_FREE(fmatrix);
1437  break;
1438  }
1439  /*
1440  case NDARRAY_BYTE:
1441  {
1442  uint8_t* a=NULL;
1443  int32_t* dims=NULL;
1444  int32_t num_dims=0;
1445  source->get_ndarray(a, dims, num_dims);
1446  target->set_ndarray(a, dims, num_dims);
1447  SG_FREE(a);
1448  SG_FREE(dims);
1449  break;
1450  }
1451  case NDARRAY_CHAR:
1452  {
1453  char* a=NULL;
1454  int32_t* dims=NULL;
1455  int32_t num_dims=0;
1456  source->get_ndarray(a, dims, num_dims);
1457  target->set_ndarray(a, dims, num_dims);
1458  SG_FREE(a);
1459  SG_FREE(dims);
1460  break;
1461  }
1462  case NDARRAY_INT:
1463  {
1464  int32_t* a=NULL;
1465  int32_t* dims=NULL;
1466  int32_t num_dims=0;
1467  source->get_ndarray(a, dims, num_dims);
1468  target->set_ndarray(a, dims, num_dims);
1469  SG_FREE(a);
1470  SG_FREE(dims);
1471  break;
1472  }
1473  case NDARRAY_REAL:
1474  {
1475  float64_t* a=NULL;
1476  int32_t* dims=NULL;
1477  int32_t num_dims=0;
1478  source->get_ndarray(a, dims, num_dims);
1479  target->set_ndarray(a, dims, num_dims);
1480  SG_FREE(a);
1481  SG_FREE(dims);
1482  break;
1483  }
1484  case NDARRAY_SHORTREAL:
1485  {
1486  float32_t* a=NULL;
1487  int32_t* dims=NULL;
1488  int32_t num_dims=0;
1489  source->get_ndarray(a, dims, num_dims);
1490  target->set_ndarray(a, dims, num_dims);
1491  SG_FREE(a);
1492  SG_FREE(dims);
1493  break;
1494  }
1495  case NDARRAY_SHORT:
1496  {
1497  int16_t* a=NULL;
1498  int32_t* dims=NULL;
1499  int32_t num_dims=0;
1500  source->get_ndarray(a, dims, num_dims);
1501  target->set_ndarray(a, dims, num_dims);
1502  SG_FREE(a);
1503  SG_FREE(dims);
1504  break;
1505  }
1506  case NDARRAY_WORD:
1507  {
1508  uint16_t* a=NULL;
1509  int32_t* dims=NULL;
1510  int32_t num_dims=0;
1511  source->get_ndarray(a, dims, num_dims);
1512  target->set_ndarray(a, dims, num_dims);
1513  SG_FREE(a);
1514  SG_FREE(dims);
1515  break;
1516  }*/
1517  case SPARSE_REAL:
1518  {
1519  int32_t num_feat=0;
1520  int32_t num_vec=0;
1521  SGSparseVector<float64_t>* fmatrix=NULL;
1522  source->get_sparse_matrix(fmatrix, num_feat, num_vec);
1523  int64_t nnz=0;
1524  for (int32_t i=0; i<num_vec; i++)
1525  nnz+=fmatrix[i].num_feat_entries;
1526  target->set_sparse_matrix(fmatrix, num_feat, num_vec, nnz);
1527  SG_FREE(fmatrix);
1528  break;
1529  }
1530 
1531  default:
1532  SG_ERROR("unknown return type");
1533  break;
1534  }
1535 }
1536 
1538 // commands
1540 
1541 /* Features */
1542 
1543 bool CSGInterface::cmd_load_features()
1544 {
1545  if (m_nrhs<8 || !create_return_values(0))
1546  return false;
1547 
1548  int32_t len=0;
1549  char* filename=get_str_from_str_or_direct(len);
1550  char* fclass=get_str_from_str_or_direct(len);
1551  char* type=get_str_from_str_or_direct(len);
1552  char* target=get_str_from_str_or_direct(len);
1553  int32_t size=get_int_from_int_or_str();
1554  int32_t comp_features=get_int_from_int_or_str();
1555 
1556  bool success=ui_features->load(
1557  filename, fclass, type, target, size, comp_features);
1558 
1559  SG_FREE(filename);
1560  SG_FREE(fclass);
1561  SG_FREE(type);
1562  SG_FREE(target);
1563  return success;
1564 }
1565 
1566 bool CSGInterface::cmd_save_features()
1567 {
1568  if (m_nrhs<5 || !create_return_values(0))
1569  return false;
1570 
1571  int32_t len=0;
1572  char* filename=get_str_from_str_or_direct(len);
1573  char* type=get_str_from_str_or_direct(len);
1574  char* target=get_str_from_str_or_direct(len);
1575 
1576  bool success=ui_features->save(filename, type, target);
1577 
1578  SG_FREE(filename);
1579  SG_FREE(type);
1580  SG_FREE(target);
1581  return success;
1582 }
1583 
1584 bool CSGInterface::cmd_clean_features()
1585 {
1586  if (m_nrhs<2 || !create_return_values(0))
1587  return false;
1588 
1589  int32_t len=0;
1590  char* target=get_str_from_str_or_direct(len);
1591 
1592  bool success=ui_features->clean(target);
1593 
1594  SG_FREE(target);
1595  return success;
1596 }
1597 
1598 bool CSGInterface::cmd_get_features()
1599 {
1600  if (m_nrhs!=2 || !create_return_values(1))
1601  return false;
1602 
1603  int32_t tlen=0;
1604  char* target=get_string(tlen);
1605  CFeatures* feat=NULL;
1606 
1607  if (strmatch(target, "TRAIN"))
1608  feat=ui_features->get_train_features();
1609  else if (strmatch(target, "TEST"))
1610  feat=ui_features->get_test_features();
1611  else
1612  {
1613  SG_FREE(target);
1614  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n");
1615  }
1616  SG_FREE(target);
1617 
1618  ASSERT(feat);
1619 
1620  switch (feat->get_feature_class())
1621  {
1622  case C_DENSE:
1623  {
1624  int32_t num_feat=0;
1625  int32_t num_vec=0;
1626 
1627  switch (feat->get_feature_type())
1628  {
1629  case F_BYTE:
1630  {
1631  uint8_t* fmatrix=((CDenseFeatures<uint8_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1632  set_matrix(fmatrix, num_feat, num_vec);
1633  break;
1634  }
1635 
1636  case F_CHAR:
1637  {
1638  char* fmatrix=((CDenseFeatures<char> *) feat)->get_feature_matrix(num_feat, num_vec);
1639  set_matrix(fmatrix, num_feat, num_vec);
1640  break;
1641  }
1642 
1643  case F_DREAL:
1644  {
1645  float64_t* fmatrix=((CDenseFeatures<float64_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1646  set_matrix(fmatrix, num_feat, num_vec);
1647  break;
1648  }
1649 
1650  case F_INT:
1651  {
1652  int32_t* fmatrix=((CDenseFeatures<int32_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1653  set_matrix(fmatrix, num_feat, num_vec);
1654  break;
1655  }
1656 
1657  case F_SHORT:
1658  {
1659  int16_t* fmatrix=((CDenseFeatures<int16_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1660  set_matrix(fmatrix, num_feat, num_vec);
1661  break;
1662  }
1663 
1664  case F_SHORTREAL:
1665  {
1666  float32_t* fmatrix=((CDenseFeatures<float32_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1667  set_matrix(fmatrix, num_feat, num_vec);
1668  break;
1669  }
1670 
1671  case F_WORD:
1672  {
1673  uint16_t* fmatrix=((CDenseFeatures<uint16_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1674  set_matrix(fmatrix, num_feat, num_vec);
1675  break;
1676  }
1677 
1678  default:
1680  }
1681  break;
1682  }
1683 
1684  case C_SPARSE:
1685  {
1686  switch (feat->get_feature_type())
1687  {
1688  case F_DREAL:
1689  {
1690  int64_t nnz=((CSparseFeatures<float64_t>*) feat)->
1691  get_num_nonzero_entries();
1692  SGSparseMatrix<float64_t> fmatrix = ((CSparseFeatures<float64_t>*) feat)->get_sparse_feature_matrix();
1693  SG_INFO("sparse matrix has %d feats, %d vecs and %d nnz elemements\n", fmatrix.num_features, fmatrix.num_vectors, nnz);
1694 
1695  set_sparse_matrix(fmatrix.sparse_matrix, fmatrix.num_features, fmatrix.num_vectors, nnz);
1696  break;
1697  }
1698 
1699  default:
1701  }
1702  break;
1703  }
1704 
1705  case C_STRING:
1706  {
1707  int32_t num_str=0;
1708  int32_t max_str_len=0;
1709  switch (feat->get_feature_type())
1710  {
1711  case F_BYTE:
1712  {
1713  SGString<uint8_t>* fmatrix=((CStringFeatures<uint8_t>*) feat)->get_features(num_str, max_str_len);
1714  set_string_list(fmatrix, num_str);
1715  break;
1716  }
1717 
1718  case F_CHAR:
1719  {
1720  SGString<char>* fmatrix=((CStringFeatures<char>*) feat)->get_features(num_str, max_str_len);
1721  set_string_list(fmatrix, num_str);
1722  break;
1723  }
1724 
1725  case F_WORD:
1726  {
1727  SGString<uint16_t>* fmatrix=((CStringFeatures<uint16_t>*) feat)->get_features(num_str, max_str_len);
1728  set_string_list(fmatrix, num_str);
1729  break;
1730  }
1731 
1732  default:
1734  }
1735  break;
1736  }
1737 
1738  case C_WD:
1739  case C_WEIGHTEDSPEC:
1740  case C_SPEC:
1741  case C_COMBINED_DOT:
1742  case C_POLY:
1743  {
1744 
1745  SGMatrix<float64_t> fmatrix = ((CDotFeatures*) feat)->get_computed_dot_feature_matrix();
1746  set_matrix(fmatrix.matrix, fmatrix.num_cols, fmatrix.num_rows);
1747  break;
1748  }
1749 
1750  default:
1752  }
1753 
1754  return true;
1755 }
1756 
1757 bool CSGInterface::cmd_add_features()
1758 {
1759  if (m_nrhs<3 || !create_return_values(0))
1760  return false;
1761 
1762  return do_set_features(true, false);
1763 }
1764 
1765 bool CSGInterface::cmd_add_multiple_features()
1766 {
1767  if ((m_nrhs!=4 && m_nrhs<5) || !create_return_values(0))
1768  return false;
1769 
1770  int32_t repetitions=get_int();
1771 
1772  ASSERT(repetitions>=1);
1773 
1774  return do_set_features(true, false, repetitions);
1775 }
1776 
1777 bool CSGInterface::cmd_add_dotfeatures()
1778 {
1779  if (m_nrhs<3 || !create_return_values(0))
1780  return false;
1781 
1782  return do_set_features(true, true);
1783 }
1784 
1785 bool CSGInterface::cmd_set_features()
1786 {
1787  if (m_nrhs<3 || !create_return_values(0))
1788  return false;
1789 
1790  return do_set_features(false, false);
1791 }
1792 
1793 bool CSGInterface::do_set_features(bool add, bool check_dot, int32_t repetitions)
1794 {
1795  int32_t tlen=0;
1796  char* target=get_string(tlen);
1797  if (!strmatch(target, "TRAIN") && !strmatch(target, "TEST"))
1798  {
1799  SG_FREE(target);
1800  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n");
1801  }
1802 
1803  CFeatures* feat=NULL;
1804  int32_t num_feat=0;
1805  int32_t num_vec=0;
1806 
1807  switch (get_argument_type())
1808  {
1809  case SPARSE_REAL:
1810  {
1811  SGSparseVector<float64_t>* fmatrix=NULL;
1812  get_sparse_matrix(fmatrix, num_feat, num_vec);
1813 
1814  feat=new CSparseFeatures<float64_t>(fmatrix, num_feat, num_vec);
1815  break;
1816  }
1817 
1818  case DENSE_REAL:
1819  {
1820  float64_t* fmatrix=NULL;
1821  get_matrix(fmatrix, num_feat, num_vec);
1822 
1823  feat=new CDenseFeatures<float64_t>(0);
1824  ((CDenseFeatures<float64_t>*) feat)->
1825  set_feature_matrix(SGMatrix<float64_t>(fmatrix, num_feat, num_vec));
1826 
1827  if (m_nrhs==6)
1828  feat = create_custom_real_features((CDenseFeatures<float64_t>*) feat);
1829 
1830  break;
1831  }
1832 
1833  case DENSE_INT:
1834  {
1835  int32_t* fmatrix=NULL;
1836  get_matrix(fmatrix, num_feat, num_vec);
1837 
1838  feat=new CDenseFeatures<int32_t>(0);
1839  ((CDenseFeatures<int32_t>*) feat)->
1840  set_feature_matrix(SGMatrix<int32_t>(fmatrix, num_feat, num_vec));
1841  break;
1842  }
1843 
1844  case DENSE_SHORT:
1845  {
1846  int16_t* fmatrix=NULL;
1847  get_matrix(fmatrix, num_feat, num_vec);
1848 
1849  feat=new CDenseFeatures<int16_t>(0);
1850  ((CDenseFeatures<int16_t>*) feat)->
1851  set_feature_matrix(SGMatrix<int16_t>(fmatrix, num_feat, num_vec));
1852  break;
1853  }
1854 
1855  case DENSE_WORD:
1856  {
1857  uint16_t* fmatrix=NULL;
1858  get_matrix(fmatrix, num_feat, num_vec);
1859 
1860  feat=new CDenseFeatures<uint16_t>(0);
1861  ((CDenseFeatures<uint16_t>*) feat)->
1862  set_feature_matrix(SGMatrix<uint16_t>(fmatrix, num_feat, num_vec));
1863  break;
1864  }
1865 
1866  case DENSE_SHORTREAL:
1867  {
1868  float32_t* fmatrix=NULL;
1869  get_matrix(fmatrix, num_feat, num_vec);
1870 
1871  feat=new CDenseFeatures<float32_t>(0);
1872  ((CDenseFeatures<float32_t>*) feat)->
1873  set_feature_matrix(SGMatrix<float32_t>(fmatrix, num_feat, num_vec));
1874  break;
1875  }
1876 
1877  case STRING_CHAR:
1878  {
1879  if (m_nrhs<4)
1880  SG_ERROR("Please specify alphabet!\n");
1881 
1882  int32_t num_str=0;
1883  int32_t max_str_len=0;
1884  SGString<char>* fmatrix=NULL;
1885  get_string_list(fmatrix, num_str, max_str_len);
1886 
1887  int32_t alphabet_len=0;
1888  char* alphabet_str=get_string(alphabet_len);
1889  ASSERT(alphabet_str);
1890 
1891  if (strmatch(alphabet_str, "DNABINFILE"))
1892  {
1893  SG_FREE(alphabet_str);
1894 
1895  ASSERT(fmatrix[0].string);
1896  feat=new CStringFeatures<uint8_t>(DNA);
1897 
1898  try
1899  {
1900  ((CStringFeatures<uint8_t>*) feat)->load_ascii_file(fmatrix[0].string);
1901  }
1902  catch (...)
1903  {
1904  SG_UNREF(feat);
1905  SG_ERROR("Couldn't load DNA features from file.\n");
1906  }
1907  feat=create_custom_string_features((CStringFeatures<uint8_t>*) feat);
1908  break;
1909  }
1910  else
1911  {
1912  bool convert_to_word=false;
1913  bool convert_to_ulong=false;
1914  CAlphabet* alphabet=NULL;
1915  if (strmatch(alphabet_str, "DNAWORD"))
1916  {
1917  alphabet=new CAlphabet(DNA);
1918  convert_to_word=true;
1919  }
1920  else if (strmatch(alphabet_str, "DNAULONG"))
1921  {
1922  alphabet=new CAlphabet(DNA);
1923  convert_to_ulong=true;
1924  }
1925  else
1926  alphabet=new CAlphabet(alphabet_str, alphabet_len);
1927 
1928  SG_REF(alphabet);
1929  SG_FREE(alphabet_str);
1930 
1931  feat=new CStringFeatures<char>(alphabet);
1932 
1933  if (!((CStringFeatures<char>*) feat)->set_features(fmatrix, num_str, max_str_len))
1934  {
1935  SG_UNREF(alphabet);
1936  SG_UNREF(feat);
1937  SG_ERROR("Couldnt set byte string features.\n");
1938  }
1939 
1940  SG_UNREF(alphabet);
1941 
1942  if (convert_to_word || convert_to_ulong)
1943  convert_to_bitembedding(feat, convert_to_word, convert_to_ulong);
1944  }
1945 
1946  obtain_from_single_string(feat);
1947  break;
1948  }
1949 
1950  case STRING_BYTE:
1951  {
1952  if (m_nrhs<4)
1953  SG_ERROR("Please specify alphabet!\n");
1954 
1955  int32_t num_str=0;
1956  int32_t max_str_len=0;
1957  SGString<uint8_t>* fmatrix=NULL;
1958  get_string_list(fmatrix, num_str, max_str_len);
1959 
1960  int32_t alphabet_len=0;
1961  char* alphabet_str=get_string(alphabet_len);
1962  ASSERT(alphabet_str);
1963  CAlphabet* alphabet=NULL;
1964  alphabet=new CAlphabet(alphabet_str, alphabet_len);
1965  SG_FREE(alphabet_str);
1966 
1967  feat=new CStringFeatures<uint8_t>(alphabet);
1968  if (!((CStringFeatures<uint8_t>*) feat)->set_features(fmatrix, num_str, max_str_len))
1969  {
1970  SG_UNREF(alphabet);
1971  SG_UNREF(feat);
1972  SG_ERROR("Couldnt set byte string features.\n");
1973  }
1974  feat=create_custom_string_features((CStringFeatures<uint8_t>*) feat);
1975  break;
1976  }
1977 
1978  default:
1979  SG_ERROR("Wrong argument type %d.\n", get_argument_type());
1980  }
1981 
1982  if (check_dot && !feat->has_property(FP_DOT))
1983  {
1984  SG_UNREF(feat);
1985  SG_ERROR("Feature type not supported by DOT Features\n");
1986  }
1987 
1988  if (strmatch(target, "TRAIN"))
1989  {
1990  if (!add)
1991  ui_features->set_train_features(feat);
1992  else if (check_dot)
1993  {
1994  for (int32_t i=0; i<repetitions; i++)
1995  ui_features->add_train_dotfeatures((CDotFeatures*) feat);
1996  }
1997  else
1998  {
1999  for (int32_t i=0; i<repetitions; i++)
2000  ui_features->add_train_features(feat);
2001  }
2002  }
2003  else
2004  {
2005  if (!add)
2006  ui_features->set_test_features(feat);
2007  else if (check_dot)
2008  {
2009  for (int32_t i=0; i<repetitions; i++)
2010  ui_features->add_test_dotfeatures((CDotFeatures*) feat);
2011  }
2012  else
2013  {
2014  for (int32_t i=0; i<repetitions; i++)
2015  ui_features->add_test_features(feat);
2016  }
2017  }
2018 
2019  SG_FREE(target);
2020 
2021  return true;
2022 }
2023 
2024 bool CSGInterface::cmd_set_reference_features()
2025 {
2026  if (m_nrhs<3 || !create_return_values(0))
2027  return false;
2028 
2029  int32_t len=0;
2030  char* target=get_str_from_str_or_direct(len);
2031 
2032  bool success=ui_features->set_reference_features(target);
2033 
2034  SG_FREE(target);
2035  return success;
2036 }
2037 
2038 bool CSGInterface::cmd_del_last_features()
2039 {
2040  if (m_nrhs<2 || !create_return_values(0))
2041  return false;
2042 
2043  int32_t len=0;
2044  char* target=get_str_from_str_or_direct(len);
2045  bool success=ui_features->del_last_feature_obj(target);
2046 
2047  SG_FREE(target);
2048  return success;
2049 }
2050 
2051 bool CSGInterface::cmd_convert()
2052 {
2053  if (m_nrhs<5 || !create_return_values(0))
2054  return false;
2055 
2056  int32_t len=0;
2057  char* target=get_str_from_str_or_direct(len);
2058  CFeatures* features=ui_features->get_convert_features(target);
2059  if (!features)
2060  {
2061  SG_FREE(target);
2062  SG_ERROR("No \"%s\" features available.\n", target);
2063  }
2064 
2065  char* from_class=get_str_from_str_or_direct(len);
2066  char* from_type=get_str_from_str_or_direct(len);
2067  char* to_class=get_str_from_str_or_direct(len);
2068  char* to_type=get_str_from_str_or_direct(len);
2069 
2070  CFeatures* result=NULL;
2071  if (strmatch(from_class, "SIMPLE"))
2072  {
2073  if (strmatch(from_type, "REAL"))
2074  {
2075  if (strmatch(to_class, "SPARSE") &&
2076  strmatch(to_type, "REAL"))
2077  {
2078  result=ui_features->convert_simple_real_to_sparse_real(
2079  ((CDenseFeatures<float64_t>*) features));
2080  }
2081  else
2083  } // from_type REAL
2084 
2085  else if (strmatch(from_type, "CHAR"))
2086  {
2087  if (strmatch(to_class, "STRING") &&
2088  strmatch(to_type, "CHAR"))
2089  {
2090  result=ui_features->convert_simple_char_to_string_char(
2091  ((CDenseFeatures<char>*) features));
2092  }
2093  else if (strmatch(to_class, "SIMPLE"))
2094  {
2095  if (strmatch(to_type, "ALIGN") && m_nrhs==8)
2096  {
2097  float64_t gap_cost=get_real_from_real_or_str();
2098  result=ui_features->convert_simple_char_to_simple_align(
2099  (CDenseFeatures<char>*) features, gap_cost);
2100  }
2101  else
2103  }
2104  else
2106  } // from_type CHAR
2107 
2108  else if (strmatch(from_type, "WORD"))
2109  {
2110  if (strmatch(to_class, "SIMPLE") &&
2111  strmatch(to_type, "SALZBERG"))
2112  {
2113  result=ui_features->convert_simple_word_to_simple_salzberg(
2114  (CDenseFeatures<uint16_t>*) features);
2115  }
2116  else
2118  } // from_type WORD
2119 
2120  else
2122  } // from_class SIMPLE
2123 
2124  else if (strmatch(from_class, "SPARSE"))
2125  {
2126  if (strmatch(from_type, "REAL"))
2127  {
2128  if (strmatch(to_class, "SIMPLE") &&
2129  strmatch(to_type, "REAL"))
2130  {
2131  result=ui_features->convert_sparse_real_to_simple_real(
2132  (CSparseFeatures<float64_t>*) features);
2133  }
2134  else
2136  } // from_type REAL
2137  else
2139  } // from_class SPARSE
2140 
2141  else if (strmatch(from_class, "STRING"))
2142  {
2143  if (strmatch(from_type, "CHAR"))
2144  {
2145  if (strmatch(to_class, "STRING"))
2146  {
2147  int32_t order=1;
2148  int32_t start=0;
2149  int32_t gap=0;
2150  char rev='f';
2151 
2152  if (m_nrhs>6)
2153  {
2154  order=get_int_from_int_or_str();
2155 
2156  if (m_nrhs>7)
2157  {
2158  start=get_int_from_int_or_str();
2159 
2160  if (m_nrhs>8)
2161  {
2162  gap=get_int_from_int_or_str();
2163 
2164  if (m_nrhs>9)
2165  {
2166  char* rev_str=get_str_from_str_or_direct(len);
2167  if (rev_str)
2168  rev=rev_str[0];
2169 
2170  SG_FREE(rev_str);
2171  }
2172  }
2173  }
2174  }
2175 
2176  if (strmatch(to_type, "BYTE"))
2177  {
2178  result=ui_features->convert_string_char_to_string_generic<char,uint8_t>(
2179  (CStringFeatures<char>*) features, order, start,
2180  gap, rev);
2181  }
2182  else if (strmatch(to_type, "WORD"))
2183  {
2184  result=ui_features->convert_string_char_to_string_generic<char,uint16_t>(
2185  (CStringFeatures<char>*) features, order, start,
2186  gap, rev);
2187  }
2188  else if (strmatch(to_type, "ULONG"))
2189  {
2190  result=ui_features->convert_string_char_to_string_generic<char,uint64_t>(
2191  (CStringFeatures<char>*) features, order, start,
2192  gap, rev);
2193  }
2194  else
2196  }
2197  else
2199  } // from_type CHAR
2200 
2201  else if (strmatch(from_type, "BYTE"))
2202  {
2203  if (strmatch(to_class, "STRING"))
2204  {
2205  int32_t order=1;
2206  int32_t start=0;
2207  int32_t gap=0;
2208  char rev='f';
2209 
2210  if (m_nrhs>6)
2211  {
2212  order=get_int_from_int_or_str();
2213 
2214  if (m_nrhs>7)
2215  {
2216  start=get_int_from_int_or_str();
2217 
2218  if (m_nrhs>8)
2219  {
2220  gap=get_int_from_int_or_str();
2221 
2222  if (m_nrhs>9)
2223  {
2224  char* rev_str=get_str_from_str_or_direct(len);
2225  if (rev_str)
2226  rev=rev_str[0];
2227 
2228  SG_FREE(rev_str);
2229  }
2230  }
2231  }
2232  }
2233 
2234  if (strmatch(to_type, "WORD"))
2235  {
2236  result=ui_features->convert_string_char_to_string_generic<uint8_t,uint16_t>(
2237  (CStringFeatures<uint8_t>*) features, order, start,
2238  gap, rev);
2239  }
2240  else if (strmatch(to_type, "ULONG"))
2241  {
2242  result=ui_features->convert_string_char_to_string_generic<uint8_t,uint64_t>(
2243  (CStringFeatures<uint8_t>*) features, order, start,
2244  gap, rev);
2245  }
2246  else
2248  }
2249  else
2251  } // from_type uint8_t
2252 
2253  else if (strmatch(from_type, "WORD"))
2254  {
2255  if (strmatch(to_class, "SIMPLE") && strmatch(to_type, "TOP"))
2256  {
2257  result=ui_features->convert_string_word_to_simple_top(
2258  (CStringFeatures<uint16_t>*) features);
2259  }
2260  else if (strmatch(to_class, "SPEC") && strmatch(to_type, "WORD") && m_nrhs==7)
2261  {
2262  bool use_norm=get_bool();
2263  result=ui_features->convert_string_byte_to_spec_word((CStringFeatures<uint16_t>*) features, use_norm);
2264 
2265  }
2266  else
2268  } // from_type WORD
2269 
2270  else if (strmatch(to_class, "SIMPLE") && strmatch(to_type, "FK"))
2271  {
2272  result=ui_features->convert_string_word_to_simple_fk(
2273  (CStringFeatures<uint16_t>*) features);
2274  } // to_type FK
2275 
2276  else
2278 
2279  } // from_class STRING
2280 
2281  if (result && ui_features->set_convert_features(result, target))
2282  SG_INFO("Conversion was successful.\n");
2283  else
2284  SG_ERROR("Conversion failed.\n");
2285 
2286  SG_FREE(target);
2287  SG_FREE(from_class);
2288  SG_FREE(from_type);
2289  SG_FREE(to_class);
2290  SG_FREE(to_type);
2291  return (result!=NULL);
2292 }
2293 
2294 void CSGInterface::convert_to_bitembedding(CFeatures* &features, bool convert_to_word, bool convert_to_ulong)
2295 {
2296  int32_t order=1;
2297  int32_t start=0;
2298  int32_t gap=0;
2299  char rev='f';
2300 
2301  if (m_nrhs<5)
2302  return;
2303 
2304  order=get_int();
2305  // remove arg, for parameters to come
2306  m_nrhs--;
2307 
2308  if (convert_to_word)
2309  {
2310  SG_INFO("Converting into word-bitembedding\n");
2311  features=ui_features->convert_string_char_to_string_generic<char,uint16_t>(
2312  (CStringFeatures<char>*) features, order, start, gap, rev);
2313  }
2314 
2315  if (convert_to_ulong)
2316  {
2317  SG_INFO("Converting into ulong-bitembedding\n");
2318  features=ui_features->convert_string_char_to_string_generic<char,uint64_t>(
2319  (CStringFeatures<char>*) features, order, start, gap, rev);
2320  }
2321 }
2322 
2323 void CSGInterface::obtain_from_single_string(CFeatures* features)
2324 {
2325  if (m_nrhs<5)
2326  return;
2327 
2328  int32_t len=0;
2329  char* str=get_string(len);
2330  ASSERT(str);
2331 
2332  if (strmatch(str, "from_position_list"))
2333  {
2334  obtain_from_position_list(features);
2335  }
2336  else if (strmatch(str, "slide_window"))
2337  {
2338  obtain_by_sliding_window(features);
2339  }
2340  else
2341  SG_SERROR("Unknown conversion\n");
2342 }
2343 
2344 bool CSGInterface::obtain_from_position_list(CFeatures* features)
2345 {
2346  int32_t winsize=get_int();
2347 
2348  int32_t* shifts=NULL;
2349  int32_t num_shift=0;
2350  get_vector(shifts, num_shift);
2351 
2352  int32_t skip=0;
2353  if (m_nrhs==8)
2354  skip=get_int();
2355 
2356  SG_DEBUG("winsize: %d num_shifts: %d skip: %d\n", winsize, num_shift, skip);
2357 
2358  CDynamicArray<int32_t> positions(num_shift+1);
2359 
2360  for (int32_t i=0; i<num_shift; i++)
2361  positions.set_element(shifts[i], i);
2362 
2363  if (features->get_feature_class()!=C_STRING)
2364  SG_ERROR("No string features.\n");
2365 
2366  bool success=false;
2367  switch (features->get_feature_type())
2368  {
2369  case F_CHAR:
2370  {
2371  success=(((CStringFeatures<char>*) features)->
2372  obtain_by_position_list(winsize, &positions, skip)>0);
2373  break;
2374  }
2375  case F_BYTE:
2376  {
2377  success=(((CStringFeatures<uint8_t>*) features)->
2378  obtain_by_position_list(winsize, &positions, skip)>0);
2379  break;
2380  }
2381  case F_WORD:
2382  {
2383  success=(((CStringFeatures<uint16_t>*) features)->
2384  obtain_by_position_list(winsize, &positions, skip)>0);
2385  break;
2386  }
2387  case F_ULONG:
2388  {
2389  success=(((CStringFeatures<uint64_t>*) features)->
2390  obtain_by_position_list(winsize, &positions, skip)>0);
2391  break;
2392  }
2393  default:
2394  SG_ERROR("Unsupported string features type.\n");
2395  }
2396 
2397  return success;
2398 }
2399 
2400 bool CSGInterface::obtain_by_sliding_window(CFeatures* features)
2401 {
2402  int32_t winsize=get_int();
2403  int32_t shift=get_int();
2404  int32_t skip=0;
2405 
2406  if (m_nrhs==8)
2407  skip=get_int();
2408 
2409  bool success=false;
2410 
2411  ASSERT(features);
2412  ASSERT(((CFeatures*) features)->get_feature_class()==C_STRING);
2413 
2414  switch (features->get_feature_type())
2415  {
2416  case F_CHAR:
2417  return ( ((CStringFeatures<char>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2418  case F_BYTE:
2419  return ( ((CStringFeatures<uint8_t>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2420  case F_WORD:
2421  return ( ((CStringFeatures<uint16_t>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2422  case F_ULONG:
2423  return ( ((CStringFeatures<uint64_t>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2424  default:
2425  SG_SERROR("Unsupported string features type.\n");
2426  return false;
2427  }
2428 
2429  return success;
2430 }
2431 
2432 bool CSGInterface::cmd_reshape()
2433 {
2434  if (m_nrhs<4 || !create_return_values(0))
2435  return false;
2436 
2437  int32_t len=0;
2438  char* target=get_str_from_str_or_direct(len);
2439  int32_t num_feat=get_int_from_int_or_str();
2440  int32_t num_vec=get_int_from_int_or_str();
2441 
2442  bool success=ui_features->reshape(target, num_feat, num_vec);
2443 
2444  SG_FREE(target);
2445  return success;
2446 }
2447 
2448 bool CSGInterface::cmd_load_labels()
2449 {
2450  if (m_nrhs<4 || !create_return_values(0))
2451  return false;
2452 
2453  int32_t len=0;
2454  char* filename=get_str_from_str_or_direct(len);
2455  char* target=get_str_from_str_or_direct(len);
2456 
2457  bool success=ui_labels->load(filename, target);
2458 
2459  SG_FREE(filename);
2460  SG_FREE(target);
2461  return success;
2462 }
2463 
2464 bool CSGInterface::cmd_set_labels()
2465 {
2466  if (m_nrhs!=3 || !create_return_values(0))
2467  return false;
2468 
2469  int32_t tlen=0;
2470  char* target=get_string(tlen);
2471  if (!strmatch(target, "TRAIN") && !strmatch(target, "TEST"))
2472  {
2473  SG_FREE(target);
2474  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n");
2475  }
2476 
2477  float64_t* lab=NULL;
2478  int32_t len=0;
2479  get_vector(lab, len);
2480 
2481  CLabels* labels=ui_labels->infer_labels(lab, len);
2482 
2483  SG_INFO("num labels: %d\n", labels->get_num_labels());
2484 
2485  if (strmatch(target, "TRAIN"))
2486  ui_labels->set_train_labels(labels);
2487  else if (strmatch(target, "TEST"))
2488  ui_labels->set_test_labels(labels);
2489  else
2490  {
2491  SG_FREE(target);
2492  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n");
2493  }
2494  SG_FREE(target);
2495 
2496  return true;
2497 }
2498 
2499 bool CSGInterface::cmd_get_labels()
2500 {
2501  if (m_nrhs!=2 || !create_return_values(1))
2502  return false;
2503 
2504  int32_t tlen=0;
2505  char* target=get_string(tlen);
2506  CLabels* labels=NULL;
2507 
2508  if (strmatch(target, "TRAIN"))
2509  labels=ui_labels->get_train_labels();
2510  else if (strmatch(target, "TEST"))
2511  labels=ui_labels->get_test_labels();
2512  else
2513  {
2514  SG_FREE(target);
2515  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n");
2516  }
2517  SG_FREE(target);
2518 
2519  if (!labels)
2520  SG_ERROR("No labels.\n");
2521 
2522  //FIXME
2523  SGVector<float64_t> lab=((CBinaryLabels*) labels)->get_labels();
2524 
2525  set_vector(lab.vector, lab.vlen);
2526  return true;
2527 }
2528 
2529 
2532 bool CSGInterface::cmd_set_kernel_normalization()
2533 {
2534  if (m_nrhs<2 || !create_return_values(0))
2535  return false;
2536 
2537  int32_t len=0;
2538  char* normalization=get_string(len);
2539 
2540  float64_t c=0;
2541  float64_t r=0;
2542 
2543  if (m_nrhs>=3)
2544  c=get_real();
2545  if (m_nrhs>=4)
2546  r=get_real();
2547 
2548  bool success=ui_kernel->set_normalization(normalization, c, r);
2549 
2550  SG_FREE(normalization);
2551  return success;
2552 }
2553 
2554 bool CSGInterface::cmd_set_kernel()
2555 {
2556  if (m_nrhs<2 || !create_return_values(0))
2557  return false;
2558 
2559  SG_DEBUG("SGInterface: set_kernel\n");
2560  CKernel* kernel=create_kernel();
2561  return ui_kernel->set_kernel(kernel);
2562 }
2563 
2564 bool CSGInterface::cmd_add_kernel()
2565 {
2566  if (m_nrhs<3 || !create_return_values(0))
2567  return false;
2568 
2569  float64_t weight=get_real_from_real_or_str();
2570  // adjust m_nrhs to play well with checks in create_kernel
2571  m_nrhs--;
2572  CKernel* kernel=create_kernel();
2573 
2574  SG_DEBUG("SGInterface: add_kernel\n");
2575  return ui_kernel->add_kernel(kernel, weight);
2576 }
2577 
2578 bool CSGInterface::cmd_del_last_kernel()
2579 {
2580  if (m_nrhs<1 || !create_return_values(0))
2581  return false;
2582 
2583  return ui_kernel->del_last_kernel();
2584 }
2585 
2586 CKernel* CSGInterface::create_kernel()
2587 {
2588  CKernel* kernel=NULL;
2589  int32_t len=0;
2590  char* type=get_str_from_str_or_direct(len);
2591 
2592  SG_DEBUG("set_kernel with type: %s\n", type);
2593 
2594  if (strmatch(type, "COMBINED"))
2595  {
2596  if (m_nrhs<3)
2597  return NULL;
2598 
2599  int32_t size=get_int_from_int_or_str();
2600  bool append_subkernel_weights=false;
2601  if (m_nrhs>3)
2602  append_subkernel_weights=get_bool_from_bool_or_str();
2603 
2604  kernel=ui_kernel->create_combined(size, append_subkernel_weights);
2605  }
2606  else if (strmatch(type, "DISTANCE"))
2607  {
2608  if (m_nrhs<3)
2609  return NULL;
2610 
2611  int32_t size=get_int_from_int_or_str();
2612  float64_t width=1;
2613  if (m_nrhs>3)
2614  width=get_real_from_real_or_str();
2615 
2616  kernel=ui_kernel->create_distance(size, width);
2617  }
2618  else if (strmatch(type, "WAVELET"))
2619  {
2620 
2621  if (m_nrhs<4)
2622  return NULL;
2623 
2624  char* dtype=get_str_from_str_or_direct(len);
2625  if (strmatch(dtype, "REAL"))
2626  {
2627  int32_t size=get_int_from_int_or_str();
2628  float64_t Wdilation=5.0;
2629  float64_t Wtranslation=2.0;
2630 
2631  if (m_nrhs>4)
2632  {
2633  Wdilation=get_real_from_real_or_str();
2634 
2635  if (m_nrhs>5)
2636  Wtranslation=get_real_from_real_or_str();
2637  }
2638 
2639  kernel=ui_kernel->create_sigmoid(size, Wdilation, Wtranslation);
2640  }
2641 
2642  SG_FREE(dtype);
2643  }
2644  else if (strmatch(type, "LINEAR"))
2645  {
2646  if (m_nrhs<4)
2647  return NULL;
2648  if (m_nrhs>5)
2649  return NULL;
2650 
2651  char* dtype=get_str_from_str_or_direct(len);
2652  int32_t size=get_int_from_int_or_str();
2653  float64_t scale=-1;
2654  if (m_nrhs==5)
2655  scale=get_real_from_real_or_str();
2656 
2657  if (strmatch(dtype, "BYTE"))
2658  kernel=ui_kernel->create_linearbyte(size, scale);
2659  else if (strmatch(dtype, "WORD"))
2660  kernel=ui_kernel->create_linearword(size, scale);
2661  else if (strmatch(dtype, "CHAR"))
2662  kernel=ui_kernel->create_linearstring(size, scale);
2663  else if (strmatch(dtype, "REAL"))
2664  kernel=ui_kernel->create_linear(size, scale);
2665  else if (strmatch(dtype, "SPARSEREAL"))
2666  kernel=ui_kernel->create_sparselinear(size, scale);
2667 
2668  SG_FREE(dtype);
2669  }
2670  else if (strmatch(type, "HISTOGRAM"))
2671  {
2672  if (m_nrhs<4)
2673  return NULL;
2674 
2675  char* dtype=get_str_from_str_or_direct(len);
2676  if (strmatch(dtype, "WORD"))
2677  {
2678  int32_t size=get_int_from_int_or_str();
2679  kernel=ui_kernel->create_histogramword(size);
2680  }
2681 
2682  SG_FREE(dtype);
2683  }
2684  else if (strmatch(type, "SALZBERG"))
2685  {
2686  if (m_nrhs<4)
2687  return NULL;
2688 
2689  char* dtype=get_str_from_str_or_direct(len);
2690  if (strmatch(dtype, "WORD"))
2691  {
2692  int32_t size=get_int_from_int_or_str();
2693  kernel=ui_kernel->create_salzbergword(size);
2694  }
2695 
2696  SG_FREE(dtype);
2697  }
2698  else if (strmatch(type, "POLYMATCH"))
2699  {
2700  if (m_nrhs<4)
2701  return NULL;
2702 
2703  char* dtype=get_str_from_str_or_direct(len);
2704  int32_t size=get_int_from_int_or_str();
2705  int32_t degree=3;
2706  bool inhomogene=false;
2707  bool normalize=true;
2708 
2709  if (m_nrhs>4)
2710  {
2711  degree=get_int_from_int_or_str();
2712  if (m_nrhs>5)
2713  {
2714  inhomogene=get_bool_from_bool_or_str();
2715  if (m_nrhs>6)
2716  normalize=get_bool_from_bool_or_str();
2717  }
2718  }
2719 
2720  if (strmatch(dtype, "CHAR"))
2721  {
2722  kernel=ui_kernel->create_polymatchstring(
2723  size, degree, inhomogene, normalize);
2724  }
2725  else if (strmatch(dtype, "WORD"))
2726  {
2727  kernel=ui_kernel->create_polymatchwordstring(
2728  size, degree, inhomogene, normalize);
2729  }
2730 
2731  SG_FREE(dtype);
2732  }
2733  else if (strmatch(type, "MATCH"))
2734  {
2735  if (m_nrhs<4)
2736  return NULL;
2737 
2738  char* dtype=get_str_from_str_or_direct(len);
2739  if (strmatch(dtype, "WORD"))
2740  {
2741  int32_t size=get_int_from_int_or_str();
2742  int32_t d=3;
2743  bool normalize=true;
2744 
2745  if (m_nrhs>4)
2746  d=get_int_from_int_or_str();
2747  if (m_nrhs>5)
2748  normalize=get_bool_from_bool_or_str();
2749 
2750  kernel=ui_kernel->create_matchwordstring(size, d, normalize);
2751  }
2752 
2753  SG_FREE(dtype);
2754  }
2755  else if (strmatch(type, "WEIGHTEDCOMMSTRING") || strmatch(type, "COMMSTRING"))
2756  {
2757  char* dtype=get_str_from_str_or_direct(len);
2758  int32_t size=get_int_from_int_or_str();
2759  bool use_sign=false;
2760  char* norm_str=NULL;
2761 
2762  if (m_nrhs>4)
2763  {
2764  use_sign=get_bool_from_bool_or_str();
2765 
2766  if (m_nrhs>5)
2767  norm_str=get_str_from_str_or_direct(len);
2768  }
2769 
2770  if (strmatch(dtype, "WORD"))
2771  {
2772  if (strmatch(type, "WEIGHTEDCOMMSTRING"))
2773  {
2774  kernel=ui_kernel->create_commstring(
2775  size, use_sign, norm_str, K_WEIGHTEDCOMMWORDSTRING);
2776  }
2777  else if (strmatch(type, "COMMSTRING"))
2778  {
2779  kernel=ui_kernel->create_commstring(
2780  size, use_sign, norm_str, K_COMMWORDSTRING);
2781  }
2782  }
2783  else if (strmatch(dtype, "ULONG"))
2784  {
2785  kernel=ui_kernel->create_commstring(
2786  size, use_sign, norm_str, K_COMMULONGSTRING);
2787  }
2788 
2789  SG_FREE(dtype);
2790  SG_FREE(norm_str);
2791  }
2792  else if (strmatch(type, "CHI2"))
2793  {
2794  if (m_nrhs<4)
2795  return NULL;
2796 
2797  char* dtype=get_str_from_str_or_direct(len);
2798  if (strmatch(dtype, "REAL"))
2799  {
2800  int32_t size=get_int_from_int_or_str();
2801  float64_t width=1;
2802 
2803  if (m_nrhs>4)
2804  width=get_real_from_real_or_str();
2805 
2806  kernel=ui_kernel->create_chi2(size, width);
2807  }
2808 
2809  SG_FREE(dtype);
2810  }
2811  else if (strmatch(type, "FIXEDDEGREE"))
2812  {
2813  if (m_nrhs<4)
2814  return NULL;
2815 
2816  char* dtype=get_str_from_str_or_direct(len);
2817  if (strmatch(dtype, "CHAR"))
2818  {
2819  int32_t size=get_int_from_int_or_str();
2820  int32_t d=3;
2821  if (m_nrhs>4)
2822  d=get_int_from_int_or_str();
2823 
2824  kernel=ui_kernel->create_fixeddegreestring(size, d);
2825  }
2826 
2827  SG_FREE(dtype);
2828  }
2829  else if (strmatch(type, "LOCALALIGNMENT"))
2830  {
2831  if (m_nrhs<4)
2832  return NULL;
2833 
2834  char* dtype=get_str_from_str_or_direct(len);
2835  if (strmatch(dtype, "CHAR"))
2836  {
2837  int32_t size=get_int_from_int_or_str();
2838 
2839  kernel=ui_kernel->create_localalignmentstring(size);
2840  }
2841 
2842  SG_FREE(dtype);
2843  }
2844  else if (strmatch(type, "OLIGO"))
2845  {
2846  if (m_nrhs<6)
2847  return NULL;
2848 
2849  char* dtype=get_str_from_str_or_direct(len);
2850  if (strmatch(dtype, "CHAR"))
2851  {
2852  int32_t size=get_int_from_int_or_str();
2853  int32_t k=get_int_from_int_or_str();
2854  float64_t w=get_real_from_real_or_str();
2855 
2856  kernel=ui_kernel->create_oligo(size, k, w);
2857  }
2858 
2859  SG_FREE(dtype);
2860  }
2861  else if (strmatch(type, "WEIGHTEDDEGREEPOS2") ||
2862  strmatch(type, "WEIGHTEDDEGREEPOS2_NONORM"))
2863  {
2864  if (m_nrhs<7)
2865  return NULL;
2866 
2867  char* dtype=get_str_from_str_or_direct(len);
2868  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2869  {
2870  int32_t size=get_int_from_int_or_str();
2871  int32_t order=get_int_from_int_or_str();
2872  int32_t max_mismatch=get_int_from_int_or_str();
2873  int32_t length=get_int_from_int_or_str();
2874  int32_t* shifts=NULL;
2875  int32_t l=0;
2876  get_vector_from_int_vector_or_str(shifts, l);
2877 
2878  ASSERT(l==length);
2879 
2880  bool use_normalization=true;
2881  if (strmatch(type, "WEIGHTEDDEGREEPOS2_NONORM"))
2882  use_normalization=false;
2883 
2884  kernel=ui_kernel->create_weighteddegreepositionstring2(
2885  size, order, max_mismatch, shifts, length,
2886  use_normalization);
2887 
2888  SG_FREE(shifts);
2889  }
2890 
2891  SG_FREE(dtype);
2892  }
2893  else if (strmatch(type, "WEIGHTEDDEGREEPOS3"))
2894  {
2895  if (m_nrhs<7)
2896  return NULL;
2897 
2898  char* dtype=get_str_from_str_or_direct(len);
2899  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2900  {
2901  int32_t size=get_int_from_int_or_str();
2902  int32_t order=get_int_from_int_or_str();
2903  int32_t max_mismatch=get_int_from_int_or_str();
2904  int32_t length=get_int_from_int_or_str();
2905  int32_t mkl_stepsize=get_int_from_int_or_str();
2906  int32_t* shifts=NULL;
2907  int32_t l=0;
2908  get_vector_from_int_vector_or_str(shifts, l);
2909  ASSERT(l==length);
2910 
2911  float64_t* position_weights=NULL;
2912  if (m_nrhs>9+length)
2913  {
2914  get_vector_from_real_vector_or_str(
2915  position_weights, length);
2916  }
2917 
2918  kernel=ui_kernel->create_weighteddegreepositionstring3(
2919  size, order, max_mismatch, shifts, length,
2920  mkl_stepsize, position_weights);
2921 
2922  SG_FREE(position_weights);
2923  SG_FREE(shifts);
2924  }
2925 
2926  SG_FREE(dtype);
2927  }
2928  else if (strmatch(type, "WEIGHTEDDEGREEPOS"))
2929  {
2930  if (m_nrhs<4)
2931  return NULL;
2932 
2933  char* dtype=get_str_from_str_or_direct(len);
2934  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2935  {
2936  int32_t size=get_int_from_int_or_str();
2937  int32_t order=3;
2938  int32_t max_mismatch=0;
2939  int32_t length=0;
2940  int32_t center=0;
2941  float64_t step=1;
2942 
2943  if (m_nrhs>4)
2944  {
2945  order=get_int_from_int_or_str();
2946 
2947  if (m_nrhs>5)
2948  {
2949  max_mismatch=get_int_from_int_or_str();
2950 
2951  if (m_nrhs>6)
2952  {
2953  length=get_int_from_int_or_str();
2954 
2955  if (m_nrhs>7)
2956  {
2957  center=get_int_from_int_or_str();
2958 
2959  if (m_nrhs>8)
2960  step=get_real_from_real_or_str();
2961  }
2962  }
2963  }
2964  }
2965 
2966  kernel=ui_kernel->create_weighteddegreepositionstring(
2967  size, order, max_mismatch, length, center, step);
2968  }
2969 
2970  SG_FREE(dtype);
2971  }
2972  else if (strmatch(type, "WEIGHTEDDEGREE"))
2973  {
2974  if (m_nrhs<4)
2975  return NULL;
2976 
2977  char* dtype=get_str_from_str_or_direct(len);
2978  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2979  {
2980  int32_t size=get_int_from_int_or_str();
2981  int32_t order=3;
2982  int32_t max_mismatch=0;
2983  bool use_normalization=true;
2984  int32_t mkl_stepsize=1;
2985  bool block_computation=true;
2986  int32_t single_degree=-1;
2987 
2988  if (m_nrhs>4)
2989  {
2990  order=get_int_from_int_or_str();
2991 
2992  if (m_nrhs>5)
2993  {
2994  max_mismatch=get_int_from_int_or_str();
2995 
2996  if (m_nrhs>6)
2997  {
2998  use_normalization=get_bool_from_bool_or_str();
2999 
3000  if (m_nrhs>7)
3001  {
3002  mkl_stepsize=get_int_from_int_or_str();
3003 
3004  if (m_nrhs>8)
3005  {
3006  block_computation=get_int_from_int_or_str();
3007 
3008  if (m_nrhs>9)
3009  single_degree=get_int_from_int_or_str();
3010  }
3011  }
3012  }
3013  }
3014  }
3015 
3016  kernel=ui_kernel->create_weighteddegreestring(
3017  size, order, max_mismatch, use_normalization,
3018  mkl_stepsize, block_computation, single_degree);
3019  }
3020 
3021  SG_FREE(dtype);
3022  }
3023  else if (strmatch(type, "WEIGHTEDDEGREERBF"))
3024  {
3025  if (m_nrhs<5)
3026  return NULL;
3027 
3028  char* dtype=get_str_from_str_or_direct(len);
3029  int32_t size=get_int_from_int_or_str();
3030  int32_t nof_properties=get_int_from_int_or_str();
3031  int32_t degree=1;
3032  float64_t width=1;
3033  if (m_nrhs>5)
3034  {
3035  degree=get_int_from_int_or_str();
3036  if (m_nrhs>6)
3037  {
3038  width=get_real_from_real_or_str();
3039  }
3040 
3041  }
3042  //if (strmatch(dtype, "REAL"))
3043 
3044  kernel=ui_kernel->create_weighteddegreerbf(size, degree, nof_properties, width);
3045 
3046  SG_FREE(dtype);
3047 
3048  }
3049  else if (strmatch(type, "SPECTRUMMISMATCHRBF"))
3050  {
3051  if (m_nrhs<7)
3052  return NULL;
3053 
3054  char* dtype=get_str_from_str_or_direct(len);
3055  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
3056  {
3057  int32_t size=get_int_from_int_or_str();
3058  int32_t degree=get_int_from_int_or_str();
3059  int32_t max_mismatch=get_int_from_int_or_str();
3060  float64_t width=get_real_from_real_or_str();
3061  float64_t* AA_matrix = NULL;
3062 
3063  //int32_t length=128*128;
3064  //get_vector_from_real_vector_or_str(AA_matrix, length);
3065  float64_t* helper_matrix=NULL;
3066  int32_t N=0;
3067  int32_t M=0;
3068  get_matrix(helper_matrix, N, M);
3069 
3070  if (N == 128 && M == 128)
3071  {
3072  AA_matrix=SG_MALLOC(float64_t, N*M);
3073  memcpy(AA_matrix, helper_matrix, N*M*sizeof(float64_t)) ;
3074  kernel=ui_kernel->create_spectrummismatchrbf(size, AA_matrix, max_mismatch, degree, width);
3075  }
3076  else
3077  {
3078  SG_ERROR("Matrix size %d %d\n", N, M);
3079  }
3080  }
3081  SG_FREE(dtype);
3082 
3083  }
3084 
3085  else if (strmatch(type, "SLIK") || strmatch(type, "LIK"))
3086  {
3087  if (m_nrhs<4)
3088  return NULL;
3089 
3090  char* dtype=get_str_from_str_or_direct(len);
3091  if (strmatch(dtype, "CHAR"))
3092  {
3093  int32_t size=get_int_from_int_or_str();
3094  int32_t length=3;
3095  int32_t inner_degree=3;
3096  int32_t outer_degree=1;
3097 
3098  if (m_nrhs>4)
3099  {
3100  length=get_int_from_int_or_str();
3101 
3102  if (m_nrhs>5)
3103  {
3104  inner_degree=get_int_from_int_or_str();
3105 
3106  if (m_nrhs>6)
3107  outer_degree=get_int_from_int_or_str();
3108  }
3109  }
3110 
3111  if (strmatch(type, "SLIK"))
3112  {
3113  kernel=ui_kernel->create_localityimprovedstring(
3114  size, length, inner_degree, outer_degree,
3116  }
3117  else
3118  {
3119  kernel=ui_kernel->create_localityimprovedstring(
3120  size, length, inner_degree, outer_degree,
3122  }
3123  }
3124 
3125  SG_FREE(dtype);
3126  }
3127  else if (strmatch(type, "POLY"))
3128  {
3129  if (m_nrhs<4)
3130  return NULL;
3131 
3132  char* dtype=get_str_from_str_or_direct(len);
3133  int32_t size=get_int_from_int_or_str();
3134  int32_t degree=2;
3135  bool inhomogene=false;
3136  bool normalize=true;
3137 
3138  if (m_nrhs>4)
3139  {
3140  degree=get_int_from_int_or_str();
3141 
3142  if (m_nrhs>5)
3143  {
3144  inhomogene=get_bool_from_bool_or_str();
3145 
3146  if (m_nrhs>6)
3147  normalize=get_bool_from_bool_or_str();
3148  }
3149  }
3150 
3151  if (strmatch(dtype, "REAL"))
3152  {
3153  kernel=ui_kernel->create_poly(
3154  size, degree, inhomogene, normalize);
3155  }
3156  else if (strmatch(dtype, "SPARSEREAL"))
3157  {
3158  kernel=ui_kernel->create_sparsepoly(
3159  size, degree, inhomogene, normalize);
3160  }
3161 
3162  SG_FREE(dtype);
3163  }
3164  else if (strmatch(type, "SIGMOID"))
3165  {
3166  if (m_nrhs<4)
3167  return NULL;
3168 
3169  char* dtype=get_str_from_str_or_direct(len);
3170  if (strmatch(dtype, "REAL"))
3171  {
3172  int32_t size=get_int_from_int_or_str();
3173  float64_t gamma=0.01;
3174  float64_t coef0=0;
3175 
3176  if (m_nrhs>4)
3177  {
3178  gamma=get_real_from_real_or_str();
3179 
3180  if (m_nrhs>5)
3181  coef0=get_real_from_real_or_str();
3182  }
3183 
3184  kernel=ui_kernel->create_sigmoid(size, gamma, coef0);
3185  }
3186 
3187  SG_FREE(dtype);
3188  }
3189  else if (strmatch(type, "GAUSSIAN")) // RBF
3190  {
3191  if (m_nrhs<4)
3192  return NULL;
3193 
3194  char* dtype=get_str_from_str_or_direct(len);
3195  int32_t size=get_int_from_int_or_str();
3196  float64_t width=1;
3197  if (m_nrhs>4)
3198  width=get_real_from_real_or_str();
3199 
3200  if (strmatch(dtype, "REAL"))
3201  kernel=ui_kernel->create_gaussian(size, width);
3202  else if (strmatch(dtype, "SPARSEREAL"))
3203  kernel=ui_kernel->create_sparsegaussian(size, width);
3204 
3205  SG_FREE(dtype);
3206  }
3207  else if (strmatch(type, "GAUSSIANSHIFT")) // RBF
3208  {
3209  if (m_nrhs<7)
3210  return NULL;
3211 
3212  char* dtype=get_str_from_str_or_direct(len);
3213  if (strmatch(dtype, "REAL"))
3214  {
3215  int32_t size=get_int_from_int_or_str();
3216  float64_t width=get_real_from_real_or_str();
3217  int32_t max_shift=get_int_from_int_or_str();
3218  int32_t shift_step=get_int_from_int_or_str();
3219 
3220  kernel=ui_kernel->create_gaussianshift(
3221  size, width, max_shift, shift_step);
3222  }
3223 
3224  SG_FREE(dtype);
3225  }
3226  else if (strmatch(type, "CUSTOM"))
3227  {
3228  if (m_nrhs!=4 || !create_return_values(0))
3229  return NULL;
3230 
3231  float64_t* kmatrix=NULL;
3232  int32_t num_feat=0;
3233  int32_t num_vec=0;
3234  get_matrix(kmatrix, num_feat, num_vec);
3235 
3236  int32_t tlen=0;
3237  char* ktype=get_string(tlen);
3238 
3239  if (!strmatch(ktype, "DIAG") &&
3240  !strmatch(ktype, "FULL") &&
3241  !strmatch(ktype, "FULL2DIAG"))
3242  {
3243  SG_FREE(ktype);
3244  SG_ERROR("Undefined type, not DIAG, FULL or FULL2DIAG.\n");
3245  }
3246 
3247  bool source_is_diag=false;
3248  bool dest_is_diag=false;
3249 
3250  if (strmatch(ktype, "FULL2DIAG"))
3251  dest_is_diag=true;
3252  else if (strmatch(ktype, "DIAG"))
3253  {
3254  source_is_diag=true;
3255  dest_is_diag=true;
3256  }
3257 
3258  kernel=ui_kernel->create_custom(kmatrix, num_feat, num_vec,
3259  source_is_diag, dest_is_diag);
3260  }
3261  else if (strmatch(type, "CONST"))
3262  {
3263  if (m_nrhs<4)
3264  return NULL;
3265 
3266  char* dtype=get_str_from_str_or_direct(len);
3267  if (strmatch(dtype, "REAL"))
3268  {
3269  int32_t size=get_int_from_int_or_str();
3270  float64_t c=1;
3271  if (m_nrhs>4)
3272  c=get_real_from_real_or_str();
3273 
3274  kernel=ui_kernel->create_const(size, c);
3275  }
3276 
3277  SG_FREE(dtype);
3278  }
3279  else if (strmatch(type, "DIAG"))
3280  {
3281  if (m_nrhs<4)
3282  return NULL;
3283 
3284  char* dtype=get_str_from_str_or_direct(len);
3285  if (strmatch(dtype, "REAL"))
3286  {
3287  int32_t size=get_int_from_int_or_str();
3288  float64_t diag=1;
3289  if (m_nrhs>4)
3290  diag=get_real_from_real_or_str();
3291 
3292  kernel=ui_kernel->create_diag(size, diag);
3293  }
3294 
3295  SG_FREE(dtype);
3296  }
3297 
3298  else if (strmatch(type, "TPPK"))
3299  {
3300  if (m_nrhs!=5)
3301  return NULL;
3302 
3303  char* dtype=get_str_from_str_or_direct(len);
3304  if (strmatch(dtype, "INT"))
3305  {
3306  int32_t size=get_int_from_int_or_str();
3307  float64_t* km=NULL;
3308  int32_t rows=0;
3309  int32_t cols=0;
3310  get_matrix(km, rows, cols);
3311  kernel=ui_kernel->create_tppk(size, km, rows, cols);
3312  }
3313 
3314  SG_FREE(dtype);
3315  }
3316  else
3318 
3319  SG_FREE(type);
3320  SG_DEBUG("created kernel: %p\n", kernel);
3321  return kernel;
3322 }
3323 
3324 
3325 CFeatures* CSGInterface::create_custom_string_features(CStringFeatures<uint8_t>* orig_feat)
3326 {
3327  CFeatures* feat=orig_feat;
3328 
3329  if (m_nrhs>4)
3330  {
3331  int32_t start=-1;
3332  int32_t order=0;
3333  int32_t from_order=0;
3334  bool normalize=true;
3335 
3336  int32_t feature_class_len=0;
3337  char* feature_class_str=get_string(feature_class_len);
3338  ASSERT(feature_class_str);
3339  CAlphabet* alphabet=NULL;
3340  if (strmatch(feature_class_str, "WD"))
3341  {
3342  if (m_nrhs!=7)
3343  SG_ERROR("Please specify alphabet, WD, order, from_order\n");
3344 
3345  alphabet=new CAlphabet(RAWDNA);
3346  order=get_int();
3347  from_order=get_int();
3348  feat = new CWDFeatures((CStringFeatures<uint8_t>*) feat, order, from_order);
3349  }
3350  else if (strmatch(feature_class_str, "WSPEC"))
3351  {
3352  if (m_nrhs!=8)
3353  SG_ERROR("Please specify alphabet, order, WSPEC, start, normalize\n");
3354 
3355  alphabet=new CAlphabet(RAWDNA);
3356  order=get_int();
3357  start=get_int();
3358  normalize=get_bool();
3360  sf->obtain_from_char_features((CStringFeatures<uint8_t>*) feat, start, order, 0, normalize);
3361  sf->add_preprocessor(new CSortWordString());
3362  sf->apply_preprocessor();
3363  SG_UNREF(feat);
3364  feat = new CImplicitWeightedSpecFeatures(sf, normalize);
3365  }
3366  SG_FREE(feature_class_str);
3367 
3368  SG_UNREF(alphabet);
3369  }
3370 
3371  return feat;
3372 }
3373 
3374 CFeatures* CSGInterface::create_custom_real_features(CDenseFeatures<float64_t>* orig_feat)
3375 {
3376  CFeatures* feat=orig_feat;
3377 
3378  if (m_nrhs==6)
3379  {
3380  int32_t degree=0;
3381  int32_t feature_class_len=0;
3382  bool normalize;
3383  char* feature_class_str=get_string(feature_class_len);
3384  ASSERT(feature_class_str);
3385  if (strmatch(feature_class_str, "POLY"))
3386  {
3387  //if (m_nrhs!=7)
3388  // SG_ERROR("Please specify POLY, degree\n");
3389 
3390  degree=get_int();
3391  normalize = get_bool();
3392  feat = new CPolyFeatures((CDenseFeatures<float64_t>*) feat, degree, normalize);
3393 
3394  }
3395  else
3396  SG_ERROR("Unknown feature class: %s\n", feature_class_str);
3397 
3398  SG_FREE(feature_class_str);
3399  }
3400 
3401  return feat;
3402 }
3403 
3404 bool CSGInterface::cmd_init_kernel()
3405 {
3406  SG_DEPRECATED;
3407  return true;
3408 }
3409 
3410 bool CSGInterface::cmd_clean_kernel()
3411 {
3412  if (m_nrhs<1 || !create_return_values(0))
3413  return false;
3414 
3415  return ui_kernel->clean_kernel();
3416 }
3417 
3418 bool CSGInterface::cmd_save_kernel()
3419 {
3420  if (m_nrhs<2 || !create_return_values(0))
3421  return false;
3422 
3423  int32_t len=0;
3424  char* filename=get_str_from_str_or_direct(len);
3425 
3426  bool success=ui_kernel->save_kernel(filename);
3427 
3428  SG_FREE(filename);
3429  return success;
3430 }
3431 
3432 bool CSGInterface::cmd_get_kernel_matrix()
3433 {
3434  if (m_nrhs>2 || !create_return_values(1))
3435  return false;
3436 
3437  int32_t len=0;
3438  char* target=NULL;
3439 
3440  if (m_nrhs==2)
3441  target=get_string(len);
3442  bool success=ui_kernel->init_kernel(target);
3443 
3444  if (success)
3445  {
3446  CKernel* kernel=ui_kernel->get_kernel();
3447  if (!kernel || !kernel->has_features())
3448  SG_ERROR("No kernel defined or not initialized.\n");
3449 
3451  set_matrix(km.matrix, km.num_rows, km.num_cols);
3452  }
3453 
3454  SG_FREE(target);
3455 
3456  return success;
3457 }
3458 
3459 bool CSGInterface::cmd_set_WD_position_weights()
3460 {
3461  if (m_nrhs<2 || m_nrhs>3 || !create_return_values(0))
3462  return false;
3463 
3464  CKernel* kernel=ui_kernel->get_kernel();
3465  if (!kernel)
3466  SG_ERROR("No kernel.\n");
3467 
3468  if (kernel->get_kernel_type()==K_COMBINED)
3469  {
3470  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3471  if (!kernel)
3472  SG_ERROR("No last kernel.\n");
3473 
3474  EKernelType ktype=kernel->get_kernel_type();
3475  if (ktype!=K_WEIGHTEDDEGREE && ktype!=K_WEIGHTEDDEGREEPOS)
3476  SG_ERROR("Unsupported kernel.\n");
3477  }
3478 
3479  bool success=false;
3480  float64_t* weights=NULL;
3481  int32_t dim=0;
3482  int32_t len=0;
3483  get_matrix(weights, dim, len);
3484 
3485  if (kernel->get_kernel_type()==K_WEIGHTEDDEGREE)
3486  {
3488  (CWeightedDegreeStringKernel*) kernel;
3489 
3490  if (dim!=1 && len>0)
3491  SG_ERROR("Dimension mismatch (should be 1 x seq_length or 0x0\n");
3492 
3493  ui_kernel->init_kernel("TRAIN");
3494  success=k->set_position_weights(weights, len);
3495  }
3496  else
3497  {
3500  char* target=NULL;
3501  bool is_train=true;
3502 
3503  if (m_nrhs==3)
3504  {
3505  int32_t tlen=0;
3506  target=get_string(tlen);
3507  if (!target)
3508  {
3509  SG_FREE(weights);
3510  SG_ERROR("Couldn't find second argument to method.\n");
3511  }
3512 
3513  if (!strmatch(target, "TRAIN") && !strmatch(target, "TEST"))
3514  {
3515  SG_FREE(target);
3516  SG_ERROR("Second argument none of TRAIN or TEST.\n");
3517  }
3518 
3519  if (strmatch(target, "TEST"))
3520  is_train=false;
3521  }
3522 
3523  if (dim!=1 && len>0)
3524  {
3525  SG_FREE(target);
3526  SG_ERROR("Dimension mismatch (should be 1 x seq_length or 0x0\n");
3527  }
3528 
3529  if (dim==0 && len==0)
3530  {
3531  if (create_return_values(3))
3532  {
3533  if (is_train)
3534  success=k->delete_position_weights_lhs();
3535  else
3536  success=k->delete_position_weights_rhs();
3537  }
3538  else
3539  success=k->delete_position_weights();
3540  }
3541  else
3542  {
3543  if (create_return_values(3))
3544  {
3545  if (is_train)
3546  success=k->set_position_weights_lhs(weights, dim, len);
3547  else
3548  success=k->set_position_weights_rhs(weights, dim, len);
3549  }
3550  else
3551  {
3552  ui_kernel->init_kernel("TRAIN");
3553  k->set_position_weights(SGVector<float64_t>(weights, len));
3554  success=true;
3555  }
3556  }
3557 
3558  SG_FREE(target);
3559  }
3560 
3561  return success;
3562 }
3563 
3564 bool CSGInterface::cmd_get_subkernel_weights()
3565 {
3566  if (m_nrhs!=1 || !create_return_values(1))
3567  return false;
3568 
3569  CKernel *kernel=ui_kernel->get_kernel();
3570  if (!kernel)
3571  SG_ERROR("Invalid kernel.\n");
3572 
3573  EKernelType ktype=kernel->get_kernel_type();
3574  const float64_t* weights=NULL;
3575 
3576  if (ktype==K_COMBINED)
3577  {
3578  int32_t num_weights=-1;
3579  weights=((CCombinedKernel *) kernel)->get_subkernel_weights(num_weights);
3580 
3581  // matrices of shape 1 x num_weight are returned
3582  set_matrix(weights, 1, num_weights);
3583  return true;
3584  }
3585 
3586  int32_t degree=-1;
3587  int32_t length=-1;
3588 
3589  if (ktype==K_WEIGHTEDDEGREE)
3590  {
3591  weights=((CWeightedDegreeStringKernel *) kernel)->
3592  get_degree_weights(degree, length);
3593  }
3594  else if (ktype==K_WEIGHTEDDEGREEPOS)
3595  {
3596  weights=((CWeightedDegreePositionStringKernel *) kernel)->
3597  get_degree_weights(degree, length);
3598  }
3599  else
3600  SG_ERROR("Setting subkernel weights not supported on this kernel.\n");
3601 
3602  if (length==0)
3603  length=1;
3604 
3605  set_matrix(weights, degree, length);
3606  return true;
3607 }
3608 
3609 bool CSGInterface::cmd_set_subkernel_weights()
3610 {
3611  if (m_nrhs!=2 || !create_return_values(0))
3612  return false;
3613 
3614  CKernel* kernel=ui_kernel->get_kernel();
3615  if (!kernel)
3616  SG_ERROR("No kernel.\n");
3617 
3618  bool success=false;
3619  float64_t* weights=NULL;
3620  int32_t dim=0;
3621  int32_t len=0;
3622  get_matrix(weights, dim, len);
3623 
3624  EKernelType ktype=kernel->get_kernel_type();
3625  if (ktype==K_WEIGHTEDDEGREE)
3626  {
3628  (CWeightedDegreeStringKernel*) kernel;
3629  int32_t degree=k->get_degree();
3630  if (dim!=degree || len<1)
3631  SG_ERROR("WD: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len, degree);
3632 
3633  if (len==1)
3634  len=0;
3635 
3636  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3637  }
3638  else if (ktype==K_WEIGHTEDDEGREEPOS)
3639  {
3642  int32_t degree=k->get_degree();
3643  if (dim!=degree || len<1)
3644  SG_ERROR("WDPos: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len, degree);
3645 
3646  if (len==1)
3647  len=0;
3648 
3649  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3650  }
3651  else // all other kernels
3652  {
3653  int32_t num_subkernels=kernel->get_num_subkernels();
3654  if (dim!=1 || len!=num_subkernels)
3655  SG_ERROR("All: Dimension mismatch (should be 1 x num_subkernels)\n");
3656 
3657  kernel->set_subkernel_weights(SGVector<float64_t>(weights, len));
3658  success=true;
3659  }
3660 
3661  return success;
3662 }
3663 
3664 bool CSGInterface::cmd_set_subkernel_weights_combined()
3665 {
3666  if (m_nrhs!=3 || !create_return_values(0))
3667  return false;
3668 
3669  CKernel* kernel=ui_kernel->get_kernel();
3670  if (!kernel)
3671  SG_ERROR("No kernel.\n");
3672  if (kernel->get_kernel_type()!=K_COMBINED)
3673  SG_ERROR("Only works for combined kernels.\n");
3674 
3675  bool success=false;
3676  float64_t* weights=NULL;
3677  int32_t dim=0;
3678  int32_t len=0;
3679  get_matrix(weights, dim, len);
3680 
3681  int32_t idx=get_int();
3682  SG_DEBUG("using kernel_idx=%i\n", idx);
3683 
3684  kernel=((CCombinedKernel*) kernel)->get_kernel(idx);
3685  if (!kernel)
3686  SG_ERROR("No subkernel at idx %d.\n", idx);
3687 
3688  EKernelType ktype=kernel->get_kernel_type();
3689  if (ktype==K_WEIGHTEDDEGREE)
3690  {
3692  (CWeightedDegreeStringKernel*) kernel;
3693  int32_t degree=k->get_degree();
3694  if (dim!=degree || len<1)
3695  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n");
3696 
3697  if (len==1)
3698  len=0;
3699 
3700  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3701  }
3702  else if (ktype==K_WEIGHTEDDEGREEPOS)
3703  {
3706  int32_t degree=k->get_degree();
3707  if (dim!=degree || len<1)
3708  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n");
3709 
3710  if (len==1)
3711  len=0;
3712 
3713  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3714  }
3715  else // all other kernels
3716  {
3717  int32_t num_subkernels=kernel->get_num_subkernels();
3718  if (dim!=1 || len!=num_subkernels)
3719  SG_ERROR("Dimension mismatch (should be 1 x num_subkernels)\n");
3720 
3721  kernel->set_subkernel_weights(SGVector<float64_t>(weights, len));
3722  success=true;
3723  }
3724 
3725  return success;
3726 }
3727 
3728 bool CSGInterface::cmd_get_dotfeature_weights_combined()
3729 {
3730  if (m_nrhs!=2 || !create_return_values(1))
3731  return false;
3732 
3733  int32_t tlen=0;
3734  char* target=get_string(tlen);
3735  CFeatures* features=NULL;
3736 
3737  if (strmatch(target, "TRAIN"))
3738  features=ui_features->get_train_features();
3739  else if (strmatch(target, "TEST"))
3740  features=ui_features->get_test_features();
3741  else
3742  {
3743  SG_FREE(target);
3744  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n");
3745  }
3746  SG_FREE(target);
3747 
3748  if (!features)
3749  SG_ERROR("No features.\n");
3750  if (features->get_feature_class()!=C_COMBINED_DOT)
3751  SG_ERROR("Only works for combined dot features.\n");
3752 
3753  SGVector<float64_t> weights = ((CCombinedDotFeatures*) features)->get_subfeature_weights();
3754  set_vector(weights.vector, weights.vlen);
3755 
3756  return true;
3757 }
3758 
3759 bool CSGInterface::cmd_set_dotfeature_weights_combined()
3760 {
3761  if (m_nrhs!=3 || !create_return_values(0))
3762  return false;
3763 
3764  int32_t tlen=0;
3765  char* target=get_string(tlen);
3766  CFeatures* features=NULL;
3767 
3768  if (strmatch(target, "TRAIN"))
3769  features=ui_features->get_train_features();
3770  else if (strmatch(target, "TEST"))
3771  features=ui_features->get_test_features();
3772  else
3773  {
3774  SG_FREE(target);
3775  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n");
3776  }
3777  SG_FREE(target);
3778 
3779  if (!features)
3780  SG_ERROR("No features.\n");
3781  if (features->get_feature_class()!=C_COMBINED_DOT)
3782  SG_ERROR("Only works for combined dot features.\n");
3783 
3784  float64_t* weights=NULL;
3785  int32_t dim=0;
3786  int32_t len=0;
3787  get_matrix(weights, dim, len);
3788 
3789  ((CCombinedDotFeatures*) features)->set_subfeature_weights(SGVector<float64_t>(weights, len));
3790 
3791  return true;
3792 }
3793 
3794 bool CSGInterface::cmd_set_last_subkernel_weights()
3795 {
3796  if (m_nrhs!=2 || !create_return_values(0))
3797  return false;
3798 
3799  CKernel* kernel=ui_kernel->get_kernel();
3800  if (!kernel)
3801  SG_ERROR("No kernel.\n");
3802  if (kernel->get_kernel_type()!=K_COMBINED)
3803  SG_ERROR("Only works for Combined kernels.\n");
3804 
3805  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3806  if (!kernel)
3807  SG_ERROR("No last kernel.\n");
3808 
3809  bool success=false;
3810  float64_t* weights=NULL;
3811  int32_t dim=0;
3812  int32_t len=0;
3813  get_matrix(weights, dim, len);
3814 
3815  EKernelType ktype=kernel->get_kernel_type();
3816  if (ktype==K_WEIGHTEDDEGREE)
3817  {
3819  if (dim!=k->get_degree() || len<1)
3820  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n");
3821 
3822  if (len==1)
3823  len=0;
3824 
3825  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3826  }
3827  else if (ktype==K_WEIGHTEDDEGREEPOS)
3828  {
3831  if (dim!=k->get_degree() || len<1)
3832  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n");
3833 
3834  if (len==1)
3835  len=0;
3836 
3837  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3838  }
3839  else // all other kernels
3840  {
3841  int32_t num_subkernels=kernel->get_num_subkernels();
3842  if (dim!=1 || len!=num_subkernels)
3843  SG_ERROR("Dimension mismatch (should be 1 x num_subkernels)\n");
3844 
3845  kernel->set_subkernel_weights(SGVector<float64_t>(weights, len));
3846  success=true;
3847  }
3848 
3849  return success;
3850 }
3851 
3852 bool CSGInterface::cmd_get_WD_position_weights()
3853 {
3854  if (m_nrhs!=1 || !create_return_values(1))
3855  return false;
3856 
3857  CKernel* kernel=ui_kernel->get_kernel();
3858  if (!kernel)
3859  SG_ERROR("No kernel.\n");
3860 
3861  if (kernel->get_kernel_type()==K_COMBINED)
3862  {
3863  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3864  if (!kernel)
3865  SG_ERROR("Couldn't find last kernel.\n");
3866 
3867  EKernelType ktype=kernel->get_kernel_type();
3868  if (ktype!=K_WEIGHTEDDEGREE && ktype!=K_WEIGHTEDDEGREEPOS)
3869  SG_ERROR("Wrong subkernel type.\n");
3870  }
3871 
3872  int32_t len=0;
3873  const float64_t* position_weights;
3874 
3875  if (kernel->get_kernel_type()==K_WEIGHTEDDEGREE)
3876  position_weights=((CWeightedDegreeStringKernel*) kernel)->get_position_weights(len);
3877  else
3878  position_weights=((CWeightedDegreePositionStringKernel*) kernel)->get_position_weights(len);
3879 
3880  if (position_weights==NULL)
3881  set_vector(position_weights, 0);
3882  else
3883  set_vector(position_weights, len);
3884 
3885  return true;
3886 }
3887 
3888 bool CSGInterface::cmd_get_last_subkernel_weights()
3889 {
3890  if (m_nrhs!=1 || !create_return_values(1))
3891  return false;
3892 
3893  CKernel* kernel=ui_kernel->get_kernel();
3894  EKernelType ktype=kernel->get_kernel_type();
3895  if (!kernel)
3896  SG_ERROR("No kernel.\n");
3897  if (ktype!=K_COMBINED)
3898  SG_ERROR("Only works for Combined kernels.\n");
3899 
3900  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3901  if (!kernel)
3902  SG_ERROR("Couldn't find last kernel.\n");
3903 
3904  int32_t degree=0;
3905  int32_t len=0;
3906 
3907  if (ktype==K_COMBINED)
3908  {
3909  int32_t num_weights=0;
3910  const float64_t* weights=
3911  ((CCombinedKernel*) kernel)->get_subkernel_weights(num_weights);
3912 
3913  set_vector(weights, num_weights);
3914  return true;
3915  }
3916 
3917  float64_t* weights=NULL;
3918  if (ktype==K_WEIGHTEDDEGREE)
3919  weights=((CWeightedDegreeStringKernel*) kernel)->
3920  get_degree_weights(degree, len);
3921  else if (ktype==K_WEIGHTEDDEGREEPOS)
3922  weights=((CWeightedDegreePositionStringKernel*) kernel)->
3923  get_degree_weights(degree, len);
3924  else
3925  SG_ERROR("Only works for Weighted Degree (Position) kernels.\n");
3926 
3927  if (len==0)
3928  len=1;
3929 
3930  set_matrix(weights, degree, len);
3931 
3932  return true;
3933 }
3934 
3935 bool CSGInterface::cmd_compute_by_subkernels()
3936 {
3937  if (m_nrhs!=1 || !create_return_values(1))
3938  return false;
3939 
3940  CKernel* kernel=ui_kernel->get_kernel();
3941  if (!kernel)
3942  SG_ERROR("No kernel.\n");
3943  if (!kernel->get_rhs())
3944  SG_ERROR("No rhs.\n");
3945 
3946  int32_t num_vec=kernel->get_rhs()->get_num_vectors();
3947  int32_t degree=0;
3948  int32_t len=0;
3949  EKernelType ktype=kernel->get_kernel_type();
3950 
3951  // it would be nice to have a common base class for the WD kernels
3952  if (ktype==K_WEIGHTEDDEGREE)
3953  {
3955  k->get_degree_weights(degree, len);
3956  if (!k->is_tree_initialized())
3957  SG_ERROR("Kernel optimization not initialized.\n");
3958  }
3959  else if (ktype==K_WEIGHTEDDEGREEPOS)
3960  {
3963  k->get_degree_weights(degree, len);
3964  if (!k->is_tree_initialized())
3965  SG_ERROR("Kernel optimization not initialized.\n");
3966  }
3967  else
3968  SG_ERROR("Only works for Weighted Degree (Position) kernels.\n");
3969 
3970  if (len==0)
3971  len=1;
3972 
3973  int32_t num_feat=degree*len;
3974  int32_t num=num_feat*num_vec;
3975  float64_t* result=SG_MALLOC(float64_t, num);
3976 
3977  for (int32_t i=0; i<num; i++)
3978  result[i]=0;
3979 
3980  if (ktype==K_WEIGHTEDDEGREE)
3981  {
3983  for (int32_t i=0; i<num_vec; i++)
3984  k->compute_by_tree(i, &result[i*num_feat]);
3985  }
3986  else
3987  {
3990  for (int32_t i=0; i<num_vec; i++)
3991  k->compute_by_tree(i, &result[i*num_feat]);
3992  }
3993 
3994  set_matrix(result, num_feat, num_vec);
3995  SG_FREE(result);
3996 
3997  return true;
3998 }
3999 
4000 bool CSGInterface::cmd_init_kernel_optimization()
4001 {
4002  if (m_nrhs<1 || !create_return_values(0))
4003  return false;
4004 
4005  return ui_kernel->init_kernel_optimization();
4006 }
4007 
4008 bool CSGInterface::cmd_get_kernel_optimization()
4009 {
4010  if (m_nrhs<1 || !create_return_values(1))
4011  return false;
4012 
4013  CKernel* kernel=ui_kernel->get_kernel();
4014  if (!kernel)
4015  SG_ERROR("No kernel defined.\n");
4016 
4017  switch (kernel->get_kernel_type())
4018  {
4019  case K_WEIGHTEDDEGREEPOS:
4020  {
4021  if (m_nrhs!=2)
4022  SG_ERROR("parameter missing\n");
4023 
4024  int32_t max_order=get_int();
4025  if ((max_order<1) || (max_order>12))
4026  {
4027  SG_WARNING( "max_order out of range 1..12 (%d). setting to 1\n", max_order);
4028  max_order=1;
4029  }
4030 
4032  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4033  if (!svm)
4034  SG_ERROR("No SVM defined.\n");
4035 
4036  int32_t num_suppvec=svm->get_num_support_vectors();
4037  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4038  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4039  int32_t num_feat=0;
4040  int32_t num_sym=0;
4041 
4042  for (int32_t i=0; i<num_suppvec; i++)
4043  {
4044  sv_idx[i]=svm->get_support_vector(i);
4045  sv_weight[i]=svm->get_alpha(i);
4046  }
4047 
4048  float64_t* position_weights=k->extract_w(max_order, num_feat,
4049  num_sym, NULL, num_suppvec, sv_idx, sv_weight);
4050  SG_FREE(sv_idx);
4051  SG_FREE(sv_weight);
4052 
4053  set_matrix(position_weights, num_sym, num_feat);
4054  SG_FREE(position_weights);
4055 
4056  return true;
4057  }
4058 
4059  case K_COMMWORDSTRING:
4061  {
4063  int32_t len=0;
4064  float64_t* weights;
4065  k->get_dictionary(len, weights);
4066 
4067  set_vector(weights, len);
4068  return true;
4069  }
4070  case K_LINEAR:
4071  {
4072  CLinearKernel* k=(CLinearKernel*) kernel;
4073  int32_t len=0;
4074  const float64_t* weights=k->get_normal(len);
4075 
4076  set_vector(weights, len);
4077  return true;
4078  }
4079  default:
4080  SG_ERROR("Unsupported kernel %s.\n", kernel->get_name());
4081  }
4082 
4083  return true;
4084 }
4085 
4086 bool CSGInterface::cmd_delete_kernel_optimization()
4087 {
4088  if (m_nrhs<1 || !create_return_values(0))
4089  return false;
4090 
4091  return ui_kernel->delete_kernel_optimization();
4092 }
4093 
4094 bool CSGInterface::cmd_use_diagonal_speedup()
4095 {
4096  if (m_nrhs<2 || !create_return_values(0))
4097  return false;
4098 
4099  bool speedup=get_bool();
4100 
4101  CKernel* kernel=ui_kernel->get_kernel();
4102  if (!kernel)
4103  SG_ERROR("No kernel defined.\n");
4104 
4105  if (kernel->get_kernel_type()==K_COMBINED)
4106  {
4107  SG_DEBUG("Identified combined kernel.\n");
4108  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
4109  if (!kernel)
4110  SG_ERROR("No last kernel defined.\n");
4111  }
4112 
4113  if (kernel->get_kernel_type()!=K_COMMWORDSTRING)
4114  SG_ERROR("Currently only commwordstring kernel supports diagonal speedup\n");
4115 
4116  ((CCommWordStringKernel*) kernel)->set_use_dict_diagonal_optimization(speedup);
4117 
4118  SG_INFO("Diagonal speedup %s.\n", speedup ? "enabled" : "disabled");
4119 
4120  return true;
4121 }
4122 
4123 bool CSGInterface::cmd_set_kernel_optimization_type()
4124 {
4125  if (m_nrhs<2 || !create_return_values(0))
4126  return false;
4127 
4128  int32_t len=0;
4129  char* opt_type=get_str_from_str_or_direct(len);
4130 
4131  bool success=ui_kernel->set_optimization_type(opt_type);
4132 
4133  SG_FREE(opt_type);
4134  return success;
4135 }
4136 
4137 bool CSGInterface::cmd_set_solver()
4138 {
4139  if (m_nrhs<2 || !create_return_values(0))
4140  return false;
4141 
4142  int32_t len=0;
4143  char* solver=get_str_from_str_or_direct(len);
4144 
4145  bool success=ui_classifier->set_solver(solver);
4146 
4147  SG_FREE(solver);
4148  return success;
4149 }
4150 
4151 bool CSGInterface::cmd_set_constraint_generator()
4152 {
4153  if (m_nrhs<2 || !create_return_values(0))
4154  return false;
4155 
4156  int32_t len=0;
4157  char* cg=get_str_from_str_or_direct(len);
4158 
4159  bool success=ui_classifier->set_constraint_generator(cg);
4160 
4161  SG_FREE(cg);
4162  return success;
4163 }
4164 
4165 bool CSGInterface::cmd_set_prior_probs()
4166 {
4167  if (m_nrhs<3 || !create_return_values(0))
4168  return false;
4169 
4170  CSalzbergWordStringKernel* kernel=
4171  (CSalzbergWordStringKernel*) ui_kernel->get_kernel();
4172  if (kernel->get_kernel_type()!=K_SALZBERG)
4173  SG_ERROR("SalzbergWordStringKernel required for setting prior probs!\n");
4174 
4175  float64_t pos_probs=get_real_from_real_or_str();
4176  float64_t neg_probs=get_real_from_real_or_str();
4177 
4178  kernel->set_prior_probs(pos_probs, neg_probs);
4179 
4180  return true;
4181 }
4182 
4183 bool CSGInterface::cmd_set_prior_probs_from_labels()
4184 {
4185  if (m_nrhs<2 || !create_return_values(0))
4186  return false;
4187 
4188  CSalzbergWordStringKernel* kernel=
4189  (CSalzbergWordStringKernel*) ui_kernel->get_kernel();
4190  if (kernel->get_kernel_type()!=K_SALZBERG)
4191  SG_ERROR("SalzbergWordStringKernel required for setting prior probs!\n");
4192 
4193  float64_t* lab=NULL;
4194  int32_t len=0;
4195  get_vector(lab, len);
4196 
4197  CBinaryLabels* labels=new CBinaryLabels(len);
4198  for (int32_t i=0; i<len; i++)
4199  {
4200  if (!labels->set_label(i, lab[i]))
4201  SG_ERROR("Couldn't set label %d (of %d): %f.\n", i, len, lab[i]);
4202  }
4203  SG_FREE(lab);
4204 
4205  kernel->set_prior_probs_from_labels(labels);
4206 
4207  SG_UNREF(labels);
4208  return true;
4209 }
4210 
4211 #ifdef USE_SVMLIGHT
4212 bool CSGInterface::cmd_resize_kernel_cache()
4213 {
4214  if (m_nrhs<2 || !create_return_values(0))
4215  return false;
4216 
4217  int32_t size=get_int_from_int_or_str();
4218  return ui_kernel->resize_kernel_cache(size);
4219 }
4220 #endif //USE_SVMLIGHT
4221 
4222 
4225 bool CSGInterface::cmd_set_distance()
4226 {
4227  if (m_nrhs<3 || !create_return_values(0))
4228  return false;
4229 
4230  CDistance* distance=NULL;
4231  int32_t len=0;
4232  char* type=get_str_from_str_or_direct(len);
4233  char* dtype=get_str_from_str_or_direct(len);
4234 
4235  if (strmatch(type, "MINKOWSKI") && m_nrhs==4)
4236  {
4237  float64_t k=get_real_from_real_or_str();
4238  distance=ui_distance->create_minkowski(k);
4239  }
4240  else if (strmatch(type, "MANHATTAN"))
4241  {
4242  if (strmatch(dtype, "REAL"))
4243  distance=ui_distance->create_generic(D_MANHATTAN);
4244  else if (strmatch(dtype, "WORD"))
4245  distance=ui_distance->create_generic(D_MANHATTANWORD);
4246  }
4247  else if (strmatch(type, "HAMMING") && strmatch(dtype, "WORD"))
4248  {
4249  bool use_sign=false;
4250  if (m_nrhs==4)
4251  use_sign=get_bool_from_bool_or_str(); // optional
4252 
4253  distance=ui_distance->create_hammingword(use_sign);
4254  }
4255  else if (strmatch(type, "CANBERRA"))
4256  {
4257  if (strmatch(dtype, "REAL"))
4258  distance=ui_distance->create_generic(D_CANBERRA);
4259  else if (strmatch(dtype, "WORD"))
4260  distance=ui_distance->create_generic(D_CANBERRAWORD);
4261  }
4262  else if (strmatch(type, "CHEBYSHEW") && strmatch(dtype, "REAL"))
4263  {
4264  distance=ui_distance->create_generic(D_CHEBYSHEW);
4265  }
4266  else if (strmatch(type, "GEODESIC") && strmatch(dtype, "REAL"))
4267  {
4268  distance=ui_distance->create_generic(D_GEODESIC);
4269  }
4270  else if (strmatch(type, "JENSEN") && strmatch(dtype, "REAL"))
4271  {
4272  distance=ui_distance->create_generic(D_JENSEN);
4273  }
4274  else if (strmatch(type, "CHISQUARE") && strmatch(dtype, "REAL"))
4275  {
4276  distance=ui_distance->create_generic(D_CHISQUARE);
4277  }
4278  else if (strmatch(type, "TANIMOTO") && strmatch(dtype, "REAL"))
4279  {
4280  distance=ui_distance->create_generic(D_TANIMOTO);
4281  }
4282  else if (strmatch(type, "COSINE") && strmatch(dtype, "REAL"))
4283  {
4284  distance=ui_distance->create_generic(D_COSINE);
4285  }
4286  else if (strmatch(type, "BRAYCURTIS") && strmatch(dtype, "REAL"))
4287  {
4288  distance=ui_distance->create_generic(D_BRAYCURTIS);
4289  }
4290  else if (strmatch(type, "EUCLIDEAN"))
4291  {
4292  if (strmatch(dtype, "REAL"))
4293  distance=ui_distance->create_generic(D_EUCLIDEAN);
4294  else if (strmatch(dtype, "SPARSEREAL"))
4295  distance=ui_distance->create_generic(D_SPARSEEUCLIDEAN);
4296  }
4297  else
4299 
4300  SG_FREE(type);
4301  SG_FREE(dtype);
4302  return ui_distance->set_distance(distance);
4303 }
4304 
4305 bool CSGInterface::cmd_init_distance()
4306 {
4307  SG_DEPRECATED;
4308  return true;
4309 }
4310 
4311 bool CSGInterface::cmd_get_distance_matrix()
4312 {
4313  if (m_nrhs!=2 || !create_return_values(1))
4314  return false;
4315 
4316  int32_t len=0;
4317  char* target=get_string(len);
4318 
4319  bool success=ui_distance->init_distance(target);
4320 
4321  if (success)
4322  {
4323  CDistance* distance=ui_distance->get_distance();
4324  if (!distance || !distance->has_features())
4325  SG_ERROR("No distance defined or not initialized.\n");
4326 
4327  int32_t num_vec_lhs=0;
4328  int32_t num_vec_rhs=0;
4329  float64_t* dmatrix=NULL;
4330  dmatrix=distance->get_distance_matrix_real(num_vec_lhs, num_vec_rhs, dmatrix);
4331 
4332  set_matrix(dmatrix, num_vec_lhs, num_vec_rhs);
4333  SG_FREE(dmatrix);
4334  }
4335 
4336  return success;
4337 }
4338 
4339 
4340 /* POIM */
4341 
4342 bool CSGInterface::cmd_get_SPEC_consensus()
4343 {
4344  if (m_nrhs!=1 || !create_return_values(1))
4345  return false;
4346 
4347  CKernel* kernel=ui_kernel->get_kernel();
4348  if (!kernel)
4349  SG_ERROR("No kernel.\n");
4350  if (kernel->get_kernel_type()!=K_COMMWORDSTRING)
4351  SG_ERROR("Only works for CommWordString kernels.\n");
4352 
4353  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4354  ASSERT(svm);
4355  int32_t num_suppvec=svm->get_num_support_vectors();
4356  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4357  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4358  int32_t num_feat=0;
4359 
4360  for (int32_t i=0; i<num_suppvec; i++)
4361  {
4362  sv_idx[i]=svm->get_support_vector(i);
4363  sv_weight[i]=svm->get_alpha(i);
4364  }
4365 
4366  char* consensus=((CCommWordStringKernel*) kernel)->compute_consensus(
4367  num_feat, num_suppvec, sv_idx, sv_weight);
4368  SG_FREE(sv_idx);
4369  SG_FREE(sv_weight);
4370 
4371  set_vector(consensus, num_feat);
4372  SG_FREE(consensus);
4373 
4374  return true;
4375 }
4376 
4377 bool CSGInterface::cmd_get_SPEC_scoring()
4378 {
4379  if (m_nrhs!=2 || !create_return_values(1))
4380  return false;
4381 
4382  int32_t max_order=get_int();
4383  CKernel* kernel=ui_kernel->get_kernel();
4384  if (!kernel)
4385  SG_ERROR("No kernel.\n");
4386 
4387  EKernelType ktype=kernel->get_kernel_type();
4388  if (ktype!=K_COMMWORDSTRING && ktype!=K_WEIGHTEDCOMMWORDSTRING)
4389  SG_ERROR("Only works for (Weighted) CommWordString kernels.\n");
4390 
4391  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4392  ASSERT(svm);
4393  int32_t num_suppvec=svm->get_num_support_vectors();
4394  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4395  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4396  int32_t num_feat=0;
4397  int32_t num_sym=0;
4398 
4399  for (int32_t i=0; i<num_suppvec; i++)
4400  {
4401  sv_idx[i]=svm->get_support_vector(i);
4402  sv_weight[i]=svm->get_alpha(i);
4403  }
4404 
4405  if ((max_order<1) || (max_order>8))
4406  {
4407  SG_WARNING( "max_order out of range 1..8 (%d). setting to 1\n", max_order);
4408  max_order=1;
4409  }
4410 
4411  float64_t* position_weights=NULL;
4412  if (ktype==K_COMMWORDSTRING)
4413  position_weights=((CCommWordStringKernel*) kernel)->compute_scoring(
4414  max_order, num_feat, num_sym, NULL,
4415  num_suppvec, sv_idx, sv_weight);
4416  else
4417  position_weights=((CWeightedCommWordStringKernel*) kernel)->compute_scoring(
4418  max_order, num_feat, num_sym, NULL,
4419  num_suppvec, sv_idx, sv_weight);
4420  SG_FREE(sv_idx);
4421  SG_FREE(sv_weight);
4422 
4423  set_matrix(position_weights, num_sym, num_feat);
4424  SG_FREE(position_weights);
4425 
4426  return true;
4427 }
4428 
4429 bool CSGInterface::cmd_get_WD_consensus()
4430 {
4431  if (m_nrhs!=1 || !create_return_values(1))
4432  return false;
4433 
4434  CKernel* kernel=ui_kernel->get_kernel();
4435  if (!kernel)
4436  SG_ERROR("No kernel.\n");
4437  if (kernel->get_kernel_type()!=K_WEIGHTEDDEGREEPOS)
4438  SG_ERROR("Only works for Weighted Degree Position kernels.\n");
4439 
4440  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4441  ASSERT(svm);
4442  int32_t num_suppvec=svm->get_num_support_vectors();
4443  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4444  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4445  int32_t num_feat=0;
4446 
4447  for (int32_t i=0; i<num_suppvec; i++)
4448  {
4449  sv_idx[i]=svm->get_support_vector(i);
4450  sv_weight[i]=svm->get_alpha(i);
4451  }
4452 
4453  char* consensus=((CWeightedDegreePositionStringKernel*) kernel)->compute_consensus(
4454  num_feat, num_suppvec, sv_idx, sv_weight);
4455  SG_FREE(sv_idx);
4456  SG_FREE(sv_weight);
4457 
4458  set_vector(consensus, num_feat);
4459  SG_FREE(consensus);
4460 
4461  return true;
4462 }
4463 
4464 bool CSGInterface::cmd_compute_POIM_WD()
4465 {
4466  if (m_nrhs!=3 || !create_return_values(1))
4467  return false;
4468 
4469  int32_t max_order=get_int();
4470  float64_t* distribution=NULL;
4471  int32_t num_dfeat=0;
4472  int32_t num_dvec=0;
4473  get_matrix(distribution, num_dfeat, num_dvec);
4474 
4475  if (!distribution)
4476  SG_ERROR("Wrong distribution.\n");
4477 
4478  CKernel* kernel=ui_kernel->get_kernel();
4479  if (!kernel)
4480  SG_ERROR("No Kernel.\n");
4481  if (kernel->get_kernel_type()!=K_WEIGHTEDDEGREEPOS)
4482  SG_ERROR("Only works for Weighted Degree Position kernels.\n");
4483 
4484  int32_t seqlen=0;
4485  int32_t num_sym=0;
4487  (((CWeightedDegreePositionStringKernel*) kernel)->get_lhs());
4488  ASSERT(sfeat);
4489  seqlen=sfeat->get_max_vector_length();
4490  num_sym=(int32_t) sfeat->get_num_symbols();
4491 
4492  if (num_dvec!=seqlen || num_dfeat!=num_sym)
4493  {
4494  SG_ERROR("distribution should have (seqlen x num_sym) elements"
4495  "(seqlen: %d vs. %d symbols: %d vs. %d)\n", seqlen,
4496  num_dvec, num_sym, num_dfeat);
4497  }
4498 
4499  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4500  ASSERT(svm);
4501  int32_t num_suppvec=svm->get_num_support_vectors();
4502  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4503  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4504 
4505  for (int32_t i=0; i<num_suppvec; i++)
4506  {
4507  sv_idx[i]=svm->get_support_vector(i);
4508  sv_weight[i]=svm->get_alpha(i);
4509  }
4510 
4511  /*
4512  if ((max_order < 1) || (max_order > 12))
4513  {
4514  SG_WARNING( "max_order out of range 1..12 (%d). setting to 1.\n", max_order);
4515  max_order=1;
4516  }
4517  */
4518 
4519  float64_t* position_weights;
4520  position_weights=((CWeightedDegreePositionStringKernel*) kernel)->compute_POIM(
4521  max_order, seqlen, num_sym, NULL,
4522  num_suppvec, sv_idx, sv_weight, distribution);
4523  SG_FREE(sv_idx);
4524  SG_FREE(sv_weight);
4525 
4526  set_matrix(position_weights, num_sym, seqlen);
4527  SG_FREE(position_weights);
4528 
4529  return true;
4530  }
4531 
4532  bool CSGInterface::cmd_get_WD_scoring()
4533  {
4534  if (m_nrhs!=2 || !create_return_values(1))
4535  return false;
4536 
4537  int32_t max_order=get_int();
4538 
4539  CKernel* kernel=ui_kernel->get_kernel();
4540  if (!kernel)
4541  SG_ERROR("No kernel.\n");
4542  if (kernel->get_kernel_type()!=K_WEIGHTEDDEGREEPOS)
4543  SG_ERROR("Only works for Weighted Degree Position kernels.\n");
4544 
4545  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4546  ASSERT(svm);
4547  int32_t num_suppvec=svm->get_num_support_vectors();
4548  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4549  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4550  int32_t num_feat=0;
4551  int32_t num_sym=0;
4552 
4553  for (int32_t i=0; i<num_suppvec; i++)
4554  {
4555  sv_idx[i]=svm->get_support_vector(i);
4556  sv_weight[i]=svm->get_alpha(i);
4557  }
4558 
4559  if ((max_order<1) || (max_order>12))
4560  {
4561  SG_WARNING("max_order out of range 1..12 (%d). setting to 1\n", max_order);
4562  max_order=1;
4563  }
4564 
4565  float64_t* position_weights=
4566  ((CWeightedDegreePositionStringKernel*) kernel)->compute_scoring(
4567  max_order, num_feat, num_sym, NULL, num_suppvec, sv_idx, sv_weight);
4568  SG_FREE(sv_idx);
4569  SG_FREE(sv_weight);
4570 
4571  set_matrix(position_weights, num_sym, num_feat);
4572  SG_FREE(position_weights);
4573 
4574  return true;
4575 }
4576 
4577 
4578 /* Classifier */
4579 
4580 bool CSGInterface::cmd_classify()
4581 {
4582  if (m_nrhs!=1 || !create_return_values(1))
4583  return false;
4584 
4585  if (!ui_kernel->get_kernel() ||
4586  !ui_kernel->get_kernel()->get_kernel_type()==K_CUSTOM)
4587  {
4588  CFeatures* feat=ui_features->get_test_features();
4589  if (!feat)
4590  SG_ERROR("No features found.\n");
4591  }
4592 
4593  CLabels* labels=ui_classifier->classify();
4594  if (!labels)
4595  SG_ERROR("Classify failed\n");
4596 
4597  int32_t num_vec=labels->get_num_labels();
4598  float64_t* result=SG_MALLOC(float64_t, num_vec);
4599  for (int32_t i=0; i<num_vec; i++)
4600  result[i]=((CRegressionLabels*) labels)->get_label(i);
4601  SG_UNREF(labels);
4602 
4603  set_vector(result, num_vec);
4604  SG_FREE(result);
4605 
4606  return true;
4607 }
4608 
4609 bool CSGInterface::cmd_classify_example()
4610 {
4611  if (m_nrhs!=2 || !create_return_values(1))
4612  return false;
4613 
4614  int32_t idx=get_int();
4615  float64_t result=0;
4616 
4617  if (!ui_classifier->classify_example(idx, result))
4618  SG_ERROR("Classify_example failed.\n");
4619 
4620  set_real(result);
4621 
4622  return true;
4623 }
4624 
4625 bool CSGInterface::cmd_get_classifier()
4626 {
4627  if (m_nrhs<1 || m_nrhs>2 || !create_return_values(2))
4628  return false;
4629 
4630  int32_t idx=-1;
4631  if (m_nrhs==2)
4632  idx=get_int();
4633 
4634  float64_t* bias=NULL;
4635  float64_t* weights=NULL;
4636  int32_t rows=0;
4637  int32_t cols=0;
4638  int32_t brows=0;
4639  int32_t bcols=0;
4640 
4641  if (!ui_classifier->get_trained_classifier(
4642  weights, rows, cols, bias, brows, bcols, idx))
4643  return false;
4644 
4645  //SG_PRINT("brows %d, bcols %d\n", brows, bcols);
4646  //CMath::display_matrix(bias, brows, bcols);
4647  set_matrix(bias, brows, bcols);
4648  SG_FREE(bias);
4649 
4650  //SG_PRINT("rows %d, cols %d\n", rows, cols);
4651  //CMath::display_matrix(weights, rows, cols);
4652  set_matrix(weights, rows, cols);
4653  SG_FREE(weights);
4654 
4655  return true;
4656 }
4657 
4658 bool CSGInterface::cmd_new_classifier()
4659 {
4660  if (m_nrhs<2 || !create_return_values(0))
4661  return false;
4662 
4663  int32_t len=0;
4664  char* name=get_str_from_str_or_direct(len);
4665  int32_t d=6;
4666  int32_t from_d=40;
4667 
4668  if (m_nrhs>2)
4669  {
4670  d=get_int_from_int_or_str();
4671 
4672  if (m_nrhs>3)
4673  from_d=get_int_from_int_or_str();
4674  }
4675 
4676  bool success=ui_classifier->new_classifier(name, d, from_d);
4677 
4678  SG_FREE(name);
4679  return success;
4680 }
4681 
4682 bool CSGInterface::cmd_save_classifier()
4683 {
4684  if (m_nrhs<2 || !create_return_values(0))
4685  return false;
4686 
4687  int32_t len=0;
4688  char* filename=get_str_from_str_or_direct(len);
4689 
4690  bool success=ui_classifier->save(filename);
4691 
4692  SG_FREE(filename);
4693  return success;
4694 }
4695 
4696 bool CSGInterface::cmd_load_classifier()
4697 {
4698  if (m_nrhs<3 || !create_return_values(0))
4699  return false;
4700 
4701  int32_t len=0;
4702  char* filename=get_str_from_str_or_direct(len);
4703  char* type=get_str_from_str_or_direct(len);
4704 
4705  bool success=ui_classifier->load(filename, type);
4706 
4707  SG_FREE(filename);
4708  SG_FREE(type);
4709  return success;
4710 }
4711 
4712 
4713 bool CSGInterface::cmd_get_num_svms()
4714 {
4715  if (m_nrhs!=1 || !create_return_values(1))
4716  return false;
4717 
4718  set_int(ui_classifier->get_num_svms());
4719 
4720  return true;
4721 }
4722 
4723 
4724 bool CSGInterface::cmd_get_svm()
4725 {
4726  return cmd_get_classifier();
4727 }
4728 
4729 bool CSGInterface::cmd_set_svm()
4730 {
4731  if (m_nrhs!=3 || !create_return_values(0))
4732  return false;
4733 
4734  float64_t bias=get_real();
4735 
4736  float64_t* alphas=NULL;
4737  int32_t num_feat_alphas=0;
4738  int32_t num_vec_alphas=0;
4739  get_matrix(alphas, num_feat_alphas, num_vec_alphas);
4740 
4741  if (!alphas)
4742  SG_ERROR("No proper alphas given.\n");
4743  if (num_vec_alphas!=2)
4744  SG_ERROR("Not 2 vectors in alphas.\n");
4745 
4746  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4747  if (!svm)
4748  SG_ERROR("No SVM object available.\n");
4749 
4750  svm->create_new_model(num_feat_alphas);
4751  svm->set_bias(bias);
4752 
4753  int32_t num_support_vectors=svm->get_num_support_vectors();
4754  for (int32_t i=0; i<num_support_vectors; i++)
4755  {
4756  svm->set_alpha(i, alphas[i]);
4757  svm->set_support_vector(i, (int32_t) alphas[i+num_support_vectors]);
4758  }
4759  SG_FREE(alphas);
4760 
4761  return true;
4762 }
4763 
4764 bool CSGInterface::cmd_set_linear_classifier()
4765 {
4766  if (m_nrhs!=3 || !create_return_values(0))
4767  return false;
4768 
4769  float64_t bias=get_real();
4770 
4771  float64_t* w=NULL;
4772  int32_t len=0;
4773  get_vector(w, len);
4774 
4775  if (!len)
4776  SG_ERROR("No proper weight vector given.\n");
4777 
4778  CLinearMachine* c=(CLinearMachine*) ui_classifier->get_classifier();
4779  if (!c)
4780  SG_ERROR("No Linear Classifier object available.\n");
4781 
4782  c->set_w(SGVector<float64_t>(w, len));
4783  c->set_bias(bias);
4784  return true;
4785 }
4786 
4787 bool CSGInterface::cmd_get_svm_objective()
4788 {
4789  if (m_nrhs!=1 || !create_return_values(1))
4790  return false;
4791 
4792  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4793  if (!svm)
4794  SG_ERROR("No SVM set.\n");
4795 
4796  set_real(svm->get_objective());
4797 
4798  return true;
4799 }
4800 
4801 bool CSGInterface::cmd_compute_svm_primal_objective()
4802 {
4803  return do_compute_objective(SVM_PRIMAL);
4804 }
4805 
4806 bool CSGInterface::cmd_compute_svm_dual_objective()
4807 {
4808  return do_compute_objective(SVM_DUAL);
4809 }
4810 
4811 bool CSGInterface::cmd_compute_mkl_dual_objective()
4812 {
4813  return do_compute_objective(MKL_DUAL);
4814 }
4815 
4816 bool CSGInterface::cmd_compute_relative_mkl_duality_gap()
4817 {
4818  return do_compute_objective(MKL_RELATIVE_DUALITY_GAP);
4819 }
4820 
4821 bool CSGInterface::cmd_compute_absolute_mkl_duality_gap()
4822 {
4823  return do_compute_objective(MKL_ABSOLUTE_DUALITY_GAP);
4824 }
4825 
4826 bool CSGInterface::do_compute_objective(E_WHICH_OBJ obj)
4827 {
4828  if (m_nrhs!=1 || !create_return_values(1))
4829  return false;
4830 
4831  float64_t result=23.5;
4832 
4833  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4834  if (!svm)
4835  SG_ERROR("No SVM set.\n");
4836 
4837  CLabels* trainlabels=NULL;
4838  trainlabels=ui_labels->get_train_labels();
4839 
4840  if (!trainlabels)
4841  SG_ERROR("No trainlabels available.\n");
4842 
4843  CKernel* kernel=ui_kernel->get_kernel();
4844  if (!kernel)
4845  SG_ERROR("No kernel available.\n");
4846 
4847  if (!ui_kernel->is_initialized() || !kernel->has_features())
4848  SG_ERROR("Kernel not initialized.\n");
4849 
4850  ((CKernelMachine*) svm)->set_labels(trainlabels);
4851  ((CKernelMachine*) svm)->set_kernel(kernel);
4852 
4853 
4854  switch (obj)
4855  {
4856  case SVM_PRIMAL:
4857  result=svm->compute_svm_primal_objective();
4858  break;
4859  case SVM_DUAL:
4860  result=svm->compute_svm_dual_objective();
4861  break;
4862  case MKL_PRIMAL:
4864  result=((CMKL*) svm)->compute_mkl_primal_objective();
4865  break;
4866  case MKL_DUAL:
4868  result=((CMKL*) svm)->compute_mkl_dual_objective();
4869  break;
4870  case MKL_RELATIVE_DUALITY_GAP:
4871  {
4873  float64_t primal=((CMKL*) svm)->compute_mkl_dual_objective();
4874  float64_t dual=((CMKL*) svm)->compute_mkl_primal_objective();
4875  result=(primal-dual)/dual;
4876  }
4877  break;
4878  case MKL_ABSOLUTE_DUALITY_GAP:
4879  {
4881  float64_t primal=((CMKL*) svm)->compute_mkl_dual_objective();
4882  float64_t dual=((CMKL*) svm)->compute_mkl_primal_objective();
4883  result=dual-primal;
4884  }
4885  break;
4886  default:
4887  SG_SERROR("Error calling do_compute_objective\n");
4888  return false;
4889  };
4890 
4891  set_real(result);
4892  return true;
4893 }
4894 
4895 bool CSGInterface::cmd_train_classifier()
4896 {
4897  if (m_nrhs<1 || !create_return_values(0))
4898  return false;
4899 
4900  CMachine* classifier=ui_classifier->get_classifier();
4901  if (!classifier)
4902  SG_ERROR("No classifier available.\n");
4903 
4904  EMachineType type=classifier->get_classifier_type();
4905  switch (type)
4906  {
4907  case CT_LIGHT:
4908  case CT_LIGHTONECLASS:
4909  case CT_LIBSVM:
4910  case CT_SCATTERSVM:
4911  case CT_MPD:
4912  case CT_GPBT:
4913  case CT_CPLEXSVM:
4914  case CT_GMNPSVM:
4915  case CT_GNPPSVM:
4916  case CT_KERNELPERCEPTRON:
4917  case CT_LIBSVR:
4918  case CT_LIBSVMMULTICLASS:
4919  case CT_LIBSVMONECLASS:
4920  case CT_SVRLIGHT:
4921  case CT_LARANK:
4922  return ui_classifier->train_svm();
4923  case CT_MKLMULTICLASS:
4924  return ui_classifier->train_mkl_multiclass();
4925  case CT_MKLCLASSIFICATION:
4926  case CT_MKLREGRESSION:
4927  case CT_MKLONECLASS:
4928  return ui_classifier->train_mkl();
4929 
4931  return ui_classifier->train_krr();
4932 
4933  case CT_KNN:
4934  {
4935  if (m_nrhs<2)
4936  return false;
4937 
4938  int32_t k=get_int_from_int_or_str();
4939 
4940  return ui_classifier->train_knn(k);
4941  }
4942 
4943  case CT_KMEANS:
4944  {
4945  if (m_nrhs<3)
4946  return false;
4947 
4948  int32_t k=get_int_from_int_or_str();
4949  int32_t max_iter=get_int_from_int_or_str();
4950 
4951  return ui_classifier->train_clustering(k, max_iter);
4952  }
4953 
4954  case CT_HIERARCHICAL:
4955  {
4956  if (m_nrhs<2)
4957  return false;
4958 
4959  int32_t merges=get_int_from_int_or_str();
4960 
4961  return ui_classifier->train_clustering(merges);
4962  }
4963 
4964  case CT_LDA:
4965  {
4966  float64_t gamma=0;
4967  if (m_nrhs==2)
4968  gamma=get_real_from_real_or_str();
4969 
4970  return ui_classifier->train_linear(gamma);
4971  }
4972 
4973  case CT_PERCEPTRON:
4974  case CT_SVMLIN:
4975  case CT_SVMPERF:
4976  case CT_SUBGRADIENTSVM:
4977  case CT_SVMOCAS:
4978  case CT_SVMSGD:
4979  case CT_LPM:
4980  case CT_LPBOOST:
4981  case CT_SUBGRADIENTLPM:
4982  case CT_LIBLINEAR:
4983  return ui_classifier->train_linear();
4984 
4985  case CT_WDSVMOCAS:
4986  return ui_classifier->train_wdocas();
4987 
4988  default:
4989  SG_ERROR("Unknown classifier type %d.\n", type);
4990  }
4991 
4992  return false;
4993 }
4994 
4995 bool CSGInterface::cmd_do_auc_maximization()
4996 {
4997  if (m_nrhs!=2 || !create_return_values(0))
4998  return false;
4999 
5000  bool do_auc=get_bool_from_bool_or_str();
5001 
5002  return ui_classifier->set_do_auc_maximization(do_auc);
5003 }
5004 
5005 bool CSGInterface::cmd_set_perceptron_parameters()
5006 {
5007  if (m_nrhs!=3 || !create_return_values(0))
5008  return false;
5009 
5010  float64_t lernrate=get_real_from_real_or_str();
5011  int32_t maxiter=get_int_from_int_or_str();
5012 
5013  return ui_classifier->set_perceptron_parameters(lernrate, maxiter);
5014 }
5015 
5016 bool CSGInterface::cmd_set_svm_qpsize()
5017 {
5018  if (m_nrhs!=2 || !create_return_values(0))
5019  return false;
5020 
5021  int32_t qpsize=get_int_from_int_or_str();
5022 
5023  return ui_classifier->set_svm_qpsize(qpsize);
5024 }
5025 
5026 bool CSGInterface::cmd_set_svm_max_qpsize()
5027 {
5028  if (m_nrhs!=2 || !create_return_values(0))
5029  return false;
5030 
5031  int32_t max_qpsize=get_int_from_int_or_str();
5032 
5033  return ui_classifier->set_svm_max_qpsize(max_qpsize);
5034 }
5035 
5036 bool CSGInterface::cmd_set_svm_bufsize()
5037 {
5038  if (m_nrhs!=2 || !create_return_values(0))
5039  return false;
5040 
5041  int32_t bufsize=get_int_from_int_or_str();
5042 
5043  return ui_classifier->set_svm_bufsize(bufsize);
5044 }
5045 
5046 bool CSGInterface::cmd_set_svm_C()
5047 {
5048  if (m_nrhs<2 || !create_return_values(0))
5049  return false;
5050 
5051  float64_t C1=get_real_from_real_or_str();
5052  float64_t C2=C1;
5053 
5054  if (m_nrhs==3)
5055  C2=get_real_from_real_or_str();
5056 
5057  return ui_classifier->set_svm_C(C1, C2);
5058 }
5059 
5060 bool CSGInterface::cmd_set_svm_epsilon()
5061 {
5062  if (m_nrhs!=2 || !create_return_values(0))
5063  return false;
5064 
5065  float64_t epsilon=get_real_from_real_or_str();
5066 
5067  return ui_classifier->set_svm_epsilon(epsilon);
5068 }
5069 
5070 bool CSGInterface::cmd_set_svr_tube_epsilon()
5071 {
5072  if (m_nrhs!=2 || !create_return_values(0))
5073  return false;
5074 
5075  float64_t tube_epsilon=get_real_from_real_or_str();
5076 
5077  return ui_classifier->set_svr_tube_epsilon(tube_epsilon);
5078 }
5079 
5080 bool CSGInterface::cmd_set_svm_nu()
5081 {
5082  if (m_nrhs!=2 || !create_return_values(0))
5083  return false;
5084 
5085  float64_t nu=get_real_from_real_or_str();
5086 
5087  return ui_classifier->set_svm_nu(nu);
5088 }
5089 
5090 bool CSGInterface::cmd_set_svm_mkl_parameters()
5091 {
5092  if (m_nrhs<3 || m_nrhs>4 || !create_return_values(0))
5093  return false;
5094 
5095  float64_t weight_epsilon=get_real_from_real_or_str();
5096  float64_t C_mkl=get_real_from_real_or_str();
5097  float64_t mkl_norm=1.0;
5098 
5099  if (m_nrhs==4)
5100  mkl_norm=get_real_from_real_or_str();
5101 
5102  return ui_classifier->set_svm_mkl_parameters(weight_epsilon, C_mkl, mkl_norm);
5103 }
5104 
5105 bool CSGInterface::cmd_set_elasticnet_lambda()
5106 {
5107  if (m_nrhs!=2 || !create_return_values(0))
5108  return false;
5109  float64_t lambda=get_real_from_real_or_str();
5110  return ui_classifier->set_elasticnet_lambda(lambda);
5111 }
5112 
5113 bool CSGInterface::cmd_set_mkl_block_norm()
5114 {
5115  if (m_nrhs!=2 || !create_return_values(0))
5116  return false;
5117  float64_t bnorm=get_real_from_real_or_str();
5118  return ui_classifier->set_mkl_block_norm(bnorm);
5119 }
5120 
5121 
5122 bool CSGInterface::cmd_set_max_train_time()
5123 {
5124  if (m_nrhs!=2 || !create_return_values(0))
5125  return false;
5126 
5127  float64_t max_train_time=get_real_from_real_or_str();
5128 
5129  return ui_classifier->set_max_train_time(max_train_time);
5130 }
5131 
5132 bool CSGInterface::cmd_set_svm_shrinking_enabled()
5133 {
5134  if (m_nrhs!=2 || !create_return_values(0))
5135  return false;
5136 
5137  bool shrinking_enabled=get_bool_from_bool_or_str();
5138 
5139  return ui_classifier->set_svm_shrinking_enabled(shrinking_enabled);
5140 }
5141 
5142 bool CSGInterface::cmd_set_svm_batch_computation_enabled()
5143 {
5144  if (m_nrhs!=2 || !create_return_values(0))
5145  return false;
5146 
5147  bool batch_computation_enabled=get_bool_from_bool_or_str();
5148 
5149  return ui_classifier->set_svm_batch_computation_enabled(
5150  batch_computation_enabled);
5151 }
5152 
5153 bool CSGInterface::cmd_set_svm_linadd_enabled()
5154 {
5155  if (m_nrhs!=2 || !create_return_values(0))
5156  return false;
5157 
5158  bool linadd_enabled=get_bool_from_bool_or_str();
5159 
5160  return ui_classifier->set_svm_linadd_enabled(linadd_enabled);
5161 }
5162 
5163 bool CSGInterface::cmd_set_svm_bias_enabled()
5164 {
5165  if (m_nrhs!=2 || !create_return_values(0))
5166  return false;
5167 
5168  bool bias_enabled=get_bool_from_bool_or_str();
5169 
5170  return ui_classifier->set_svm_bias_enabled(bias_enabled);
5171 }
5172 
5173 bool CSGInterface::cmd_set_mkl_interleaved_enabled()
5174 {
5175  if (m_nrhs!=2 || !create_return_values(0))
5176  return false;
5177 
5178  bool interleaved_enabled=get_bool_from_bool_or_str();
5179 
5180  return ui_classifier->set_mkl_interleaved_enabled(interleaved_enabled);
5181 }
5182 
5183 bool CSGInterface::cmd_set_krr_tau()
5184 {
5185  if (m_nrhs!=2 || !create_return_values(0))
5186  return false;
5187 
5188  float64_t tau=get_real_from_real_or_str();
5189 
5190  return ui_classifier->set_krr_tau(tau);
5191 }
5192 
5193 
5194 /* Preproc */
5195 
5196 bool CSGInterface::cmd_add_preproc()
5197 {
5198  if (m_nrhs<2 || !create_return_values(0))
5199  return false;
5200 
5201  int32_t len=0;
5202  char* type=get_str_from_str_or_direct(len);
5203  CPreprocessor* preproc=NULL;
5204 
5205  if (strmatch(type, "NORMONE"))
5206  preproc=ui_preproc->create_generic(P_NORMONE);
5207  else if (strmatch(type, "LOGPLUSONE"))
5208  preproc=ui_preproc->create_generic(P_LOGPLUSONE);
5209  else if (strmatch(type, "SORTWORDSTRING"))
5210  preproc=ui_preproc->create_generic(P_SORTWORDSTRING);
5211  else if (strmatch(type, "SORTULONGSTRING"))
5212  preproc=ui_preproc->create_generic(P_SORTULONGSTRING);
5213  else if (strmatch(type, "DECOMPRESSCHARSTRING"))
5214  preproc=ui_preproc->create_generic(P_DECOMPRESSCHARSTRING);
5215  else if (strmatch(type, "SORTWORD"))
5216  preproc=ui_preproc->create_generic(P_SORTWORD);
5217 
5218  else if (strmatch(type, "PRUNEVARSUBMEAN"))
5219  {
5220  bool divide_by_std=false;
5221  if (m_nrhs==3)
5222  divide_by_std=get_bool_from_bool_or_str();
5223 
5224  preproc=ui_preproc->create_prunevarsubmean(divide_by_std);
5225  }
5226 
5227 #ifdef HAVE_LAPACK
5228  else if (strmatch(type, "PCA") && m_nrhs==4)
5229  {
5230  bool do_whitening=get_bool_from_bool_or_str();
5231  float64_t threshold=get_real_from_real_or_str();
5232 
5233  preproc=ui_preproc->create_pca(do_whitening, threshold);
5234  }
5235 #endif
5236 
5237  else
5239 
5240  SG_FREE(type);
5241  return ui_preproc->add_preproc(preproc);
5242 }
5243 
5244 bool CSGInterface::cmd_del_preproc()
5245 {
5246  if (m_nrhs!=1 || !create_return_values(0))
5247  return false;
5248 
5249  return ui_preproc->del_preproc();
5250 }
5251 
5252 bool CSGInterface::cmd_attach_preproc()
5253 {
5254  if (m_nrhs<2 || !create_return_values(0))
5255  return false;
5256 
5257  int32_t len=0;
5258  char* target=get_str_from_str_or_direct(len);
5259 
5260  bool do_force=false;
5261  if (m_nrhs==3)
5262  do_force=get_bool_from_bool_or_str();
5263 
5264  bool success=ui_preproc->attach_preproc(target, do_force);
5265 
5266  SG_FREE(target);
5267  return success;
5268 }
5269 
5270 bool CSGInterface::cmd_clean_preproc()
5271 {
5272  if (m_nrhs!=1 || !create_return_values(0))
5273  return false;
5274 
5275  return ui_preproc->clean_preproc();
5276 }
5277 
5278 /* Converter */
5279 
5280 bool CSGInterface::cmd_set_converter()
5281 {
5282  int32_t len=0;
5283  char* type=get_str_from_str_or_direct(len);
5284 
5285  if (strmatch(type, "lle"))
5286  {
5287  int32_t k = get_int_from_int_or_str();
5288  ui_converter->create_locallylinearembedding(k);
5289  return true;
5290  }
5291  if (strmatch(type, "npe"))
5292  {
5293  int32_t k = get_int_from_int_or_str();
5294  ui_converter->create_neighborhoodpreservingembedding(k);
5295  return true;
5296  }
5297  if (strmatch(type, "ltsa"))
5298  {
5299  int32_t k = get_int_from_int_or_str();
5300  ui_converter->create_localtangentspacealignment(k);
5301  return true;
5302  }
5303  if (strmatch(type, "lltsa"))
5304  {
5305  int32_t k = get_int_from_int_or_str();
5306  ui_converter->create_linearlocaltangentspacealignment(k);
5307  return true;
5308  }
5309  if (strmatch(type, "hlle"))
5310  {
5311  int32_t k = get_int_from_int_or_str();
5312  ui_converter->create_hessianlocallylinearembedding(k);
5313  return true;
5314  }
5315  if (strmatch(type, "laplacian_eigenmaps"))
5316  {
5317  int32_t k = get_int_from_int_or_str();
5318  int32_t width = get_real_from_real_or_str();
5319  ui_converter->create_laplacianeigenmaps(k,width);
5320  return true;
5321  }
5322  if (strmatch(type, "lpp"))
5323  {
5324  int32_t k = get_int_from_int_or_str();
5325  int32_t width = get_real_from_real_or_str();
5326  ui_converter->create_localitypreservingprojections(k,width);
5327  return true;
5328  }
5329  if (strmatch(type, "diffusion_maps"))
5330  {
5331  int32_t t = get_int_from_int_or_str();
5332  int32_t width = get_real_from_real_or_str();
5333  ui_converter->create_diffusionmaps(t,width);
5334  return true;
5335  }
5336  if (strmatch(type, "isomap"))
5337  {
5338  int32_t k = get_int_from_int_or_str();
5339  ui_converter->create_isomap(k);
5340  return true;
5341  }
5342  if (strmatch(type, "mds"))
5343  {
5344  ui_converter->create_multidimensionalscaling();
5345  return true;
5346  }
5347  return false;
5348 }
5349 
5350 bool CSGInterface::cmd_embed()
5351 {
5352  int32_t target_dim = get_int_from_int_or_str();
5353  CDenseFeatures<float64_t>* embedding = ui_converter->embed(target_dim);
5354  SGMatrix<float64_t> embedding_matrix = embedding->get_feature_matrix();
5355  set_matrix(embedding_matrix.matrix,embedding_matrix.num_cols,embedding_matrix.num_rows);
5356  return true;
5357 }
5358 
5359 /* HMM */
5360 
5361 bool CSGInterface::cmd_new_plugin_estimator()
5362 {
5363  if (m_nrhs<2 || !create_return_values(0))
5364  return false;
5365 
5366  float64_t pos_pseudo=get_real_from_real_or_str();
5367  float64_t neg_pseudo=get_real_from_real_or_str();
5368 
5369  return ui_pluginestimate->new_estimator(pos_pseudo, neg_pseudo);
5370 }
5371 
5372 bool CSGInterface::cmd_train_estimator()
5373 {
5374  if (m_nrhs!=1 || !create_return_values(0))
5375  return false;
5376 
5377  return ui_pluginestimate->train();
5378 }
5379 
5380 bool CSGInterface::cmd_plugin_estimate_classify_example()
5381 {
5382  if (m_nrhs!=2 || !create_return_values(1))
5383  return false;
5384 
5385  int32_t idx=get_int();
5386  float64_t result=ui_pluginestimate->apply_one(idx);
5387 
5388  set_vector(&result, 1);
5389  return true;
5390 }
5391 
5392 bool CSGInterface::cmd_plugin_estimate_classify()
5393 {
5394  if (m_nrhs!=1 || !create_return_values(1))
5395  return false;
5396 
5397  CFeatures* feat=ui_features->get_test_features();
5398  if (!feat)
5399  SG_ERROR("No features found.\n");
5400 
5401  int32_t num_vec=feat->get_num_vectors();
5402  float64_t* result=SG_MALLOC(float64_t, num_vec);
5403  CLabels* labels=ui_pluginestimate->apply();
5404  for (int32_t i=0; i<num_vec; i++)
5405  result[i]=((CRegressionLabels*) labels)->get_label(i);
5406  SG_UNREF(labels);
5407 
5408  set_vector(result, num_vec);
5409  SG_FREE(result);
5410 
5411  return true;
5412 }
5413 
5414 bool CSGInterface::cmd_set_plugin_estimate()
5415 {
5416  if (m_nrhs!=3 || !create_return_values(0))
5417  return false;
5418 
5419  float64_t* emission_probs=NULL;
5420  int32_t num_probs=0;
5421  int32_t num_vec=0;
5422  get_matrix(emission_probs, num_probs, num_vec);
5423 
5424  if (num_vec!=2)
5425  SG_ERROR("Need at least 1 set of positive and 1 set of negative params.\n");
5426 
5427  float64_t* pos_params=emission_probs;
5428  float64_t* neg_params=&(emission_probs[num_probs]);
5429 
5430  float64_t* model_sizes=NULL;
5431  int32_t len=0;
5432  get_vector(model_sizes, len);
5433 
5434  int32_t seq_length=(int32_t) model_sizes[0];
5435  int32_t num_symbols=(int32_t) model_sizes[1];
5436  if (num_probs!=seq_length*num_symbols)
5437  SG_ERROR("Mismatch in number of emission probs and sequence length * number of symbols.\n");
5438 
5439  ui_pluginestimate->get_estimator()->set_model_params(
5440  pos_params, neg_params, seq_length, num_symbols);
5441 
5442  return true;
5443 }
5444 
5445 bool CSGInterface::cmd_get_plugin_estimate()
5446 {
5447  if (m_nrhs!=1 || !create_return_values(2))
5448  return false;
5449 
5450  float64_t* pos_params=NULL;
5451  float64_t* neg_params=NULL;
5452  int32_t num_params=0;
5453  int32_t seq_length=0;
5454  int32_t num_symbols=0;
5455 
5456  if (!ui_pluginestimate->get_estimator()->get_model_params(
5457  pos_params, neg_params, seq_length, num_symbols))
5458  return false;
5459 
5460  num_params=seq_length*num_symbols;
5461 
5462  float64_t* result=SG_MALLOC(float64_t, num_params*2);
5463  for (int32_t i=0; i<num_params; i++)
5464  result[i]=pos_params[i];
5465  for (int32_t i=0; i<num_params; i++)
5466  result[i+num_params]=neg_params[i];
5467 
5468  set_matrix(result, num_params, 2);
5469  SG_FREE(result);
5470 
5471  float64_t model_sizes[2];
5472  model_sizes[0]=(float64_t) seq_length;
5473  model_sizes[1]=(float64_t) num_symbols;
5474  set_vector(model_sizes, 2);
5475 
5476  return true;
5477 }
5478 
5479 bool CSGInterface::cmd_convergence_criteria()
5480 {
5481  if (m_nrhs<3 || !create_return_values(0))
5482  return false;
5483 
5484  int32_t num_iterations=get_int_from_int_or_str();
5485  float64_t epsilon=get_real_from_real_or_str();
5486 
5487  return ui_hmm->convergence_criteria(num_iterations, epsilon);
5488 }
5489 
5490 bool CSGInterface::cmd_normalize()
5491 {
5492  if (m_nrhs<2 || !create_return_values(0))
5493  return false;
5494 
5495  bool keep_dead_states=get_bool_from_bool_or_str();
5496 
5497  return ui_hmm->normalize(keep_dead_states);
5498 }
5499 
5500 bool CSGInterface::cmd_add_states()
5501 {
5502  if (m_nrhs<3 || !create_return_values(0))
5503  return false;
5504 
5505  int32_t num_states=get_int_from_int_or_str();
5506  float64_t value=get_real_from_real_or_str();
5507 
5508  return ui_hmm->add_states(num_states, value);
5509 }
5510 
5511 bool CSGInterface::cmd_permutation_entropy()
5512 {
5513  if (m_nrhs<3 || !create_return_values(0))
5514  return false;
5515 
5516  int32_t width=get_int_from_int_or_str();
5517  int32_t seq_num=get_int_from_int_or_str();
5518 
5519  return ui_hmm->permutation_entropy(width, seq_num);
5520 }
5521 
5522 bool CSGInterface::cmd_relative_entropy()
5523 {
5524  if (m_nrhs!=1 || !create_return_values(1))
5525  return false;
5526 
5527  float64_t* entropy=NULL;
5528  int32_t len=0;
5529  bool success=ui_hmm->relative_entropy(entropy, len);
5530  if (!success)
5531  return false;
5532 
5533  set_vector(entropy, len);
5534 
5535  SG_FREE(entropy);
5536  return true;
5537 }
5538 
5539 bool CSGInterface::cmd_entropy()
5540 {
5541  if (m_nrhs!=1 || !create_return_values(1))
5542  return false;
5543 
5544  float64_t* entropy=NULL;
5545  int32_t len=0;
5546  bool success=ui_hmm->entropy(entropy, len);
5547  if (!success)
5548  return false;
5549 
5550  set_vector(entropy, len);
5551 
5552  SG_FREE(entropy);
5553  return true;
5554 }
5555 
5556 bool CSGInterface::cmd_hmm_classify()
5557 {
5558  return do_hmm_classify(false, false);
5559 }
5560 
5561 bool CSGInterface::cmd_one_class_hmm_classify()
5562 {
5563  return do_hmm_classify(false, true);
5564 }
5565 
5566 bool CSGInterface::cmd_one_class_linear_hmm_classify()
5567 {
5568  return do_hmm_classify(true, true);
5569 }
5570 
5571 bool CSGInterface::do_hmm_classify(bool linear, bool one_class)
5572 {
5573  if (m_nrhs>1 || !create_return_values(1))
5574  return false;
5575 
5576  CFeatures* feat=ui_features->get_test_features();
5577  if (!feat)
5578  return false;
5579 
5580  int32_t num_vec=feat->get_num_vectors();
5581  CRegressionLabels* labels=NULL;
5582 
5583  if (linear) // must be one_class as well
5584  {
5585  labels=ui_hmm->linear_one_class_classify();
5586  }
5587  else
5588  {
5589  if (one_class)
5590  labels=ui_hmm->one_class_classify();
5591  else
5592  labels=ui_hmm->classify();
5593  }
5594  if (!labels)
5595  return false;
5596 
5597  float64_t* result=SG_MALLOC(float64_t, num_vec);
5598  for (int32_t i=0; i<num_vec; i++)
5599  result[i]=labels->get_label(i);
5600  SG_UNREF(labels);
5601 
5602  set_vector(result, num_vec);
5603  SG_FREE(result);
5604 
5605  return true;
5606 }
5607 
5608 bool CSGInterface::cmd_one_class_hmm_classify_example()
5609 {
5610  return do_hmm_classify_example(true);
5611 }
5612 
5613 bool CSGInterface::cmd_hmm_classify_example()
5614 {
5615  return do_hmm_classify_example(false);
5616 }
5617 
5618 bool CSGInterface::do_hmm_classify_example(bool one_class)
5619 {
5620  if (m_nrhs!=2 || !create_return_values(1))
5621  return false;
5622 
5623  int32_t idx=get_int();
5624  float64_t result=0;
5625 
5626  if (one_class)
5627  result=ui_hmm->one_class_classify_example(idx);
5628  else
5629  result=ui_hmm->classify_example(idx);
5630 
5631  set_real(result);
5632 
5633  return true;
5634 }
5635 
5636 bool CSGInterface::cmd_output_hmm()
5637 {
5638  if (m_nrhs!=1 || !create_return_values(0))
5639  return false;
5640 
5641  return ui_hmm->output_hmm();
5642 }
5643 
5644 bool CSGInterface::cmd_output_hmm_defined()
5645 {
5646  if (m_nrhs!=1 || !create_return_values(0))
5647  return false;
5648 
5649  return ui_hmm->output_hmm_defined();
5650 }
5651 
5652 bool CSGInterface::cmd_hmm_likelihood()
5653 {
5654  if (m_nrhs!=1 || !create_return_values(1))
5655  return false;
5656 
5657  CHMM* h=ui_hmm->get_current();
5658  if (!h)
5659  SG_ERROR("No HMM.\n");
5660 
5661  float64_t likelihood=h->model_probability();
5662  set_real(likelihood);
5663 
5664  return true;
5665 }
5666 
5667 bool CSGInterface::cmd_likelihood()
5668 {
5669  if (m_nrhs!=1 || !create_return_values(0))
5670  return false;
5671 
5672  return ui_hmm->likelihood();
5673 }
5674 
5675 bool CSGInterface::cmd_save_likelihood()
5676 {
5677  if (m_nrhs<2 || !create_return_values(0))
5678  return false;
5679 
5680  int32_t len=0;
5681  char* filename=get_str_from_str_or_direct(len);
5682 
5683  bool is_binary=false;
5684  if (m_nrhs==3)
5685  is_binary=get_bool_from_bool_or_str();
5686 
5687  bool success=ui_hmm->save_likelihood(filename, is_binary);
5688 
5689  SG_FREE(filename);
5690  return success;
5691 }
5692 
5693 bool CSGInterface::cmd_get_viterbi_path()
5694 {
5695  if (m_nrhs!=2 || !create_return_values(2))
5696  return false;
5697 
5698  int32_t dim=get_int();
5699  SG_DEBUG("dim: %f\n", dim);
5700 
5701  CHMM* h=ui_hmm->get_current();
5702  if (!h)
5703  return false;
5704 
5705  CFeatures* feat=ui_features->get_test_features();
5706  if (!feat || (feat->get_feature_class()!=C_STRING) ||
5707  (feat->get_feature_type()!=F_WORD))
5708  return false;
5709 
5711 
5712  int32_t num_feat=0;
5713  bool free_vec;
5714  uint16_t* vec=((CStringFeatures<uint16_t>*) feat)->get_feature_vector(dim, num_feat, free_vec);
5715  if (!vec || num_feat<=0)
5716  {
5717  ((CStringFeatures<uint16_t>*) feat)->free_feature_vector(vec, dim, free_vec);
5718  return false;
5719  }
5720  ((CStringFeatures<uint16_t>*) feat)->free_feature_vector(vec, dim, free_vec);
5721 
5722  SG_DEBUG( "computing viterbi path for vector %d (length %d)\n", dim, num_feat);
5723  float64_t likelihood=0;
5724  T_STATES* path=h->get_path(dim, likelihood);
5725 
5726  set_vector(path, num_feat);
5727  SG_FREE(path);
5728  set_real(likelihood);
5729 
5730  return true;
5731 }
5732 
5733 bool CSGInterface::cmd_viterbi_train()
5734 {
5735  if (m_nrhs!=1 || !create_return_values(0))
5736  return false;
5737 
5738  return ui_hmm->viterbi_train();
5739 }
5740 
5741 bool CSGInterface::cmd_viterbi_train_defined()
5742 {
5743  if (m_nrhs!=1 || !create_return_values(0))
5744  return false;
5745 
5746  return ui_hmm->viterbi_train_defined();
5747 }
5748 
5749 bool CSGInterface::cmd_baum_welch_train()
5750 {
5751  if (m_nrhs!=1 || !create_return_values(0))
5752  return false;
5753 
5754  return ui_hmm->baum_welch_train();
5755 }
5756 
5757 bool CSGInterface::cmd_baum_welch_train_defined()
5758 {
5759  if (m_nrhs!=1 || !create_return_values(0))
5760  return false;
5761 
5762  return ui_hmm->baum_welch_train_defined();
5763 }
5764 
5765 
5766 bool CSGInterface::cmd_baum_welch_trans_train()
5767 {
5768  if (m_nrhs!=1 || !create_return_values(0))
5769  return false;
5770 
5771  return ui_hmm->baum_welch_trans_train();
5772 }
5773 
5774 bool CSGInterface::cmd_linear_train()
5775 {
5776  if (m_nrhs<1 || !create_return_values(0))
5777  return false;
5778 
5779  if (m_nrhs==2)
5780  {
5781  int32_t len=0;
5782  char* align=get_str_from_str_or_direct(len);
5783 
5784  bool success=ui_hmm->linear_train(align[0]);
5785 
5786  SG_FREE(align);
5787  return success;
5788  }
5789  else
5790  return ui_hmm->linear_train();
5791 }
5792 
5793 bool CSGInterface::cmd_save_path()
5794 {
5795  if (m_nrhs<2 || !create_return_values(0))
5796  return false;
5797 
5798  int32_t len=0;
5799  char* filename=get_str_from_str_or_direct(len);
5800 
5801  bool is_binary=false;
5802  if (m_nrhs==3)
5803  is_binary=get_bool_from_bool_or_str();
5804 
5805  bool success=ui_hmm->save_path(filename, is_binary);
5806 
5807  SG_FREE(filename);
5808  return success;
5809 }
5810 
5811 bool CSGInterface::cmd_append_hmm()
5812 {
5813  if (m_nrhs!=5 || !create_return_values(0))
5814  return false;
5815 
5816  CHMM* old_h=ui_hmm->get_current();
5817  if (!old_h)
5818  SG_ERROR("No current HMM set.\n");
5819 
5820  float64_t* p=NULL;
5821  int32_t N_p=0;
5822  get_vector(p, N_p);
5823 
5824  float64_t* q=NULL;
5825  int32_t N_q=0;
5826  get_vector(q, N_q);
5827 
5828  float64_t* a=NULL;
5829  int32_t M_a=0;
5830  int32_t N_a=0;
5831  get_matrix(a, M_a, N_a);
5832  int32_t N=N_a;
5833 
5834  float64_t* b=NULL;
5835  int32_t M_b=0;
5836  int32_t N_b=0;
5837  get_matrix(b, M_b, N_b);
5838  int32_t M=N_b;
5839 
5840  if (N_p!=N || N_q!=N || N_a!=N || M_a!=N || N_b!=M || M_b!=N)
5841  {
5842  SG_ERROR("Model matrices not matching in size.\n"
5843  "p:(%d) q:(%d) a:(%d,%d) b(%d,%d)\n",
5844  N_p, N_q, N_a, M_a, N_b, M_b);
5845  }
5846 
5847  CHMM* h=new CHMM(N, M, NULL, ui_hmm->get_pseudo());
5848  int32_t i,j;
5849 
5850  for (i=0; i<N; i++)
5851  {
5852  h->set_p(i, p[i]);
5853  h->set_q(i, q[i]);
5854  }
5855 
5856  for (i=0; i<N; i++)
5857  for (j=0; j<N; j++)
5858  h->set_a(i,j, a[i+j*N]);
5859 
5860  for (i=0; i<N; i++)
5861  for (j=0; j<M; j++)
5862  h->set_b(i,j, b[i+j*N]);
5863 
5864  old_h->append_model(h);
5865  SG_UNREF(h);
5866 
5867  return true;
5868 }
5869 
5870 bool CSGInterface::cmd_append_model()
5871 {
5872  if (m_nrhs<2 || !create_return_values(0))
5873  return false;
5874  if (m_nrhs>2 && m_nrhs!=4)
5875  return false;
5876 
5877  int32_t len=0;
5878  char* filename=get_str_from_str_or_direct(len);
5879  int32_t base1=-1;
5880  int32_t base2=-1;
5881  if (m_nrhs>2)
5882  {
5883  base1=get_int_from_int_or_str();
5884  base2=get_int_from_int_or_str();
5885  }
5886 
5887  bool success=ui_hmm->append_model(filename, base1, base2);
5888 
5889  SG_FREE(filename);
5890  return success;
5891 }
5892 
5893 bool CSGInterface::cmd_new_hmm()
5894 {
5895  if (m_nrhs!=3 || !create_return_values(0))
5896  return false;
5897 
5898  int32_t n=get_int_from_int_or_str();
5899  int32_t m=get_int_from_int_or_str();
5900 
5901  return ui_hmm->new_hmm(n, m);
5902 }
5903 
5904 bool CSGInterface::cmd_load_hmm()
5905 {
5906  if (m_nrhs!=2 || !create_return_values(0))
5907  return false;
5908 
5909  int32_t len=0;
5910  char* filename=get_str_from_str_or_direct(len);
5911 
5912  bool success=ui_hmm->load(filename);
5913 
5914  SG_FREE(filename);
5915  return success;
5916 }
5917 
5918 bool CSGInterface::cmd_save_hmm()
5919 {
5920  if (m_nrhs<2 || !create_return_values(0))
5921  return false;
5922 
5923  int32_t len=0;
5924  char* filename=get_str_from_str_or_direct(len);
5925 
5926  bool is_binary=false;
5927  if (m_nrhs==3)
5928  is_binary=get_bool_from_bool_or_str();
5929 
5930  bool success=ui_hmm->save(filename, is_binary);
5931 
5932  SG_FREE(filename);
5933  return success;
5934 }
5935 
5936 bool CSGInterface::cmd_set_hmm()
5937 {
5938  if (m_nrhs!=5 || !create_return_values(0))
5939  return false;
5940 
5941  float64_t* p=NULL;
5942  int32_t N_p=0;
5943  get_vector(p, N_p);
5944 
5945  float64_t* q=NULL;
5946  int32_t N_q=0;
5947  get_vector(q, N_q);
5948 
5949  float64_t* a=NULL;
5950  int32_t M_a=0;
5951  int32_t N_a=0;
5952  get_matrix(a, M_a, N_a);
5953  int32_t N=N_a;
5954 
5955  float64_t* b=NULL;
5956  int32_t M_b=0;
5957  int32_t N_b=0;
5958  get_matrix(b, M_b, N_b);
5959  int32_t M=N_b;
5960 
5961  if (N_p!=N || N_q!=N || N_a!=N || M_a!=N || N_b!=M || M_b!=N)
5962  {
5963  SG_ERROR("Model matrices not matching in size.\n"
5964  "p:(%d) q:(%d) a:(%d,%d) b(%d,%d)\n",
5965  N_p, N_q, N_a, M_a, N_b, M_b);
5966  }
5967 
5968  CHMM* current=ui_hmm->get_current();
5969  if (!current)
5970  SG_ERROR("Need a previously created HMM.\n");
5971 
5972  int32_t i,j;
5973 
5974  for (i=0; i<N; i++)
5975  {
5976  current->set_p(i, p[i]);
5977  current->set_q(i, q[i]);
5978  }
5979 
5980  for (i=0; i<N; i++)
5981  for (j=0; j<N; j++)
5982  current->set_a(i,j, a[i+j*N]);
5983 
5984  for (i=0; i<N; i++)
5985  for (j=0; j<M; j++)
5986  current->set_b(i,j, b[i+j*N]);
5987 
5988  CStringFeatures<uint16_t>* sf = ((CStringFeatures<uint16_t>*) (ui_features->get_train_features()));
5989  current->set_observations(sf);
5990 
5991  return true;
5992 }
5993 
5994 bool CSGInterface::cmd_set_hmm_as()
5995 {
5996  if (m_nrhs!=2 || !create_return_values(0))
5997  return false;
5998 
5999  int32_t len=0;
6000  char* target=get_str_from_str_or_direct(len);
6001 
6002  bool success=ui_hmm->set_hmm_as(target);
6003 
6004  SG_FREE(target);
6005  return success;
6006 }
6007 
6008 bool CSGInterface::cmd_set_chop()
6009 {
6010  if (m_nrhs!=2 || !create_return_values(0))
6011  return false;
6012 
6013  float64_t value=get_real_from_real_or_str();
6014  return ui_hmm->chop(value);
6015 }
6016 
6017 bool CSGInterface::cmd_set_pseudo()
6018 {
6019  if (m_nrhs!=2 || !create_return_values(0))
6020  return false;
6021 
6022  float64_t value=get_real_from_real_or_str();
6023  return ui_hmm->set_pseudo(value);
6024 }
6025 
6026 bool CSGInterface::cmd_load_definitions()
6027 {
6028  if (m_nrhs<2 || !create_return_values(0))
6029  return false;
6030 
6031  int32_t len=0;
6032  char* filename=get_str_from_str_or_direct(len);
6033 
6034  bool do_init=false;
6035  if (m_nrhs==3)
6036  do_init=get_bool_from_bool_or_str();
6037 
6038  bool success=ui_hmm->load_definitions(filename, do_init);
6039 
6040  SG_FREE(filename);
6041  return success;
6042 }
6043 
6044 bool CSGInterface::cmd_get_hmm()
6045 {
6046  if (m_nrhs!=1 || !create_return_values(4))
6047  return false;
6048 
6049  CHMM* h=ui_hmm->get_current();
6050  if (!h)
6051  return false;
6052 
6053  int32_t N=h->get_N();
6054  int32_t M=h->get_M();
6055  int32_t i=0;
6056  int32_t j=0;
6057  float64_t* p=SG_MALLOC(float64_t, N);
6058  float64_t* q=SG_MALLOC(float64_t, N);
6059 
6060  for (i=0; i<N; i++)
6061  {
6062  p[i]=h->get_p(i);
6063  q[i]=h->get_q(i);
6064  }
6065 
6066  set_vector(p, N);
6067  SG_FREE(p);
6068  set_vector(q, N);
6069  SG_FREE(q);
6070 
6071  float64_t* a=SG_MALLOC(float64_t, N*N);
6072  for (i=0; i<N; i++)
6073  for (j=0; j<N; j++)
6074  a[i+j*N]=h->get_a(i, j);
6075  set_matrix(a, N, N);
6076  SG_FREE(a);
6077 
6078  float64_t* b=SG_MALLOC(float64_t, N*M);
6079  for (i=0; i<N; i++)
6080  for (j=0; j<M; j++)
6081  b[i+j*N]=h->get_b(i, j);
6082  set_matrix(b, N, M);
6083  SG_FREE(b);
6084 
6085  return true;
6086 }
6087 
6088 bool CSGInterface::cmd_best_path()
6089 {
6090  if (m_nrhs!=3 || !create_return_values(0))
6091  return false;
6092 
6093  int32_t from=get_int_from_int_or_str();
6094  int32_t to=get_int_from_int_or_str();
6095 
6096  return ui_hmm->best_path(from, to);
6097 }
6098 
6099 bool CSGInterface::cmd_best_path_2struct()
6100 {
6101  if (m_nrhs!=12 || !create_return_values(3))
6102  return false;
6103 
6104  SG_ERROR("Sorry, this parameter list is awful!\n");
6105 
6106  return true;
6107 }
6108 
6109 void CSGInterface::get_vector(bool*& vector, int32_t& len)
6110 {
6111  int32_t* int_vector;
6112  get_vector(int_vector, len);
6113 
6114  ASSERT(len>0);
6115  vector= SG_MALLOC(bool, len);
6116 
6117  for (int32_t i=0; i<len; i++)
6118  vector[i]= (int_vector[i]!=0);
6119 
6120  SG_FREE(int_vector);
6121 }
6122 
6123 void CSGInterface::set_vector(const bool* vector, int32_t len)
6124 {
6125  int32_t* int_vector = SG_MALLOC(int32_t, len);
6126  for (int32_t i=0;i<len;i++)
6127  {
6128  if (vector[i])
6129  int_vector[i]=1;
6130  else
6131  int_vector[i]=0;
6132  }
6133  set_vector(int_vector,len);
6134  SG_FREE(int_vector);
6135 }
6136 
6137 bool CSGInterface::cmd_set_plif_struct()
6138 {
6139  // ARG 2
6140  int32_t Nid=0;
6141  int32_t* ids;
6142  get_vector(ids,Nid);
6143 
6144  // ARG 3
6145  int32_t Nname=0;
6146  int32_t Mname=0;
6147  SGString<char>* names;
6148  get_string_list(names, Nname,Mname);
6149 
6150  // ARG 4
6151  int32_t Nlimits=0;
6152  int32_t Mlimits=0;
6153  float64_t* all_limits;
6154  get_matrix(all_limits, Mlimits, Nlimits);
6155 
6156  // ARG 5
6157  int32_t Npenalties=0;
6158  int32_t Mpenalties=0;
6159  float64_t* all_penalties;
6160  get_matrix(all_penalties, Mpenalties, Npenalties);
6161 
6162  // ARG 6
6163  int32_t Ntransform=0;
6164  int32_t Mtransform=0;
6165  SGString<char>* all_transform;
6166  get_string_list(all_transform, Ntransform, Mtransform);
6167 
6168  // ARG 7
6169  int32_t Nmin=0;
6170  float64_t* min_values;
6171  get_vector(min_values,Nmin);
6172 
6173  // ARG 8
6174  int32_t Nmax=0;
6175  float64_t* max_values;
6176  get_vector(max_values,Nmax);
6177 
6178  // ARG 9
6179  int32_t Ncache=0;
6180  bool* all_use_cache;
6181  get_vector(all_use_cache,Ncache);
6182 
6183  // ARG 10
6184  int32_t Nsvm=0;
6185  int32_t* all_use_svm;
6186  get_vector(all_use_svm,Nsvm);
6187 
6188  // ARG 11
6189  int32_t Ncalc=0;
6190  bool* all_do_calc;
6191  get_vector(all_do_calc,Ncalc);
6192 
6193  if (Ncalc!=Nsvm)
6194  SG_ERROR("Ncalc!=Nsvm, Ncalc:%i, Nsvm:%i\n",Ncalc,Nsvm);
6195  if (Ncalc!=Ncache)
6196  SG_ERROR("Ncalc!=Ncache, Ncalc:%i, Ncache:%i\n",Ncalc,Ncache);
6197  if (Ncalc!=Ntransform)
6198  SG_ERROR("Ncalc!=Ntransform, Ncalc:%i, Ntransform:%i\n",Ncalc,Ntransform);
6199  if (Ncalc!=Nmin)
6200  SG_ERROR("Ncalc!=Nmin, Ncalc:%i, Nmin:%i\n",Ncalc,Nmin);
6201  if (Ncalc!=Nmax)
6202  SG_ERROR("Ncalc!=Nmax, Ncalc:%i, Nmax:%i\n",Ncalc,Nmax);
6203  if (Ncalc!=Npenalties)
6204  SG_ERROR("Ncalc!=Npenalties, Ncalc:%i, Npenalties:%i\n",Ncalc,Npenalties);
6205  if (Ncalc!=Nlimits)
6206  SG_ERROR("Ncalc!=Nlimits, Ncalc:%i, Nlimits:%i\n",Ncalc,Nlimits);
6207  if (Ncalc!=Nname)
6208  SG_ERROR("Ncalc!=Nname, Ncalc:%i, Nname:%i\n",Ncalc,Nname);
6209  if (Ncalc!=Nid)
6210  SG_ERROR("Ncalc!=Nid, Ncalc:%i, Nid:%i\n",Ncalc,Nid);
6211  if (Mlimits!=Mpenalties)
6212  SG_ERROR("Mlimits!=Mpenalties, Mlimits:%i, Mpenalties:%i\n",Mlimits,Mpenalties);
6213 
6214  int32_t N = Ncalc;
6215  int32_t M = Mlimits;
6216  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6217  pm->create_plifs(N, M);
6218  pm->set_plif_ids(SGVector<int32_t>(ids, N));
6219  pm->set_plif_min_values(SGVector<float64_t>(min_values, N));
6220  pm->set_plif_max_values(SGVector<float64_t>(max_values, N));
6221  pm->set_plif_use_cache(SGVector<bool>(all_use_cache, N));
6222  pm->set_plif_use_svm(SGVector<int32_t>(all_use_svm, N));
6223  pm->set_plif_limits(SGMatrix<float64_t>(all_limits, N, M));
6224  pm->set_plif_penalties(SGMatrix<float64_t>(all_penalties, N, M));
6225  pm->set_plif_names(names, N);
6226  pm->set_plif_transform_type(all_transform, N);
6227 
6228  SG_FREE(names);
6229  SG_FREE(all_transform);
6230  SG_FREE(all_do_calc);
6231 
6232  return true;
6233 }
6234 
6235 bool CSGInterface::cmd_get_plif_struct()
6236 {
6237  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6238  CPlif** PEN = pm->get_PEN();
6239  int32_t N = pm->get_num_plifs();
6240  int32_t M = pm->get_num_limits();
6241 
6242 
6243  int32_t* ids = SG_MALLOC(int32_t, N);
6244  float64_t* max_values = SG_MALLOC(float64_t, N);
6245  float64_t* min_values = SG_MALLOC(float64_t, N);
6247  SGString<char>* all_transform = SG_MALLOC(SGString<char>, N);
6248  float64_t* all_limits = SG_MALLOC(float64_t, N*M);
6249  float64_t* all_penalties = SG_MALLOC(float64_t, N*M);
6250  bool* all_use_cache = SG_MALLOC(bool, N);
6251  int32_t* all_use_svm = SG_MALLOC(int32_t, N);
6252  bool* all_do_calc = SG_MALLOC(bool, N);
6253  for (int32_t i=0;i<N;i++)
6254  {
6255  ids[i]=PEN[i]->get_id();
6256  names[i].string = PEN[i]->get_plif_name();
6257  names[i].slen = strlen(PEN[i]->get_plif_name());
6258  float64_t* limits = PEN[i]->get_plif_limits();
6259  float64_t* penalties = PEN[i]->get_plif_penalties();
6260  for (int32_t j=0;j<M;j++)
6261  {
6262  all_limits[i*M+j]=limits[j];
6263  all_penalties[i*M+j]=penalties[j];
6264  }
6265  all_transform[i].string = (char*) PEN[i]->get_transform_type();
6266  all_transform[i].slen = strlen(PEN[i]->get_transform_type());
6267  min_values[i]=PEN[i]->get_min_value();
6268  max_values[i]=PEN[i]->get_max_value();
6269  all_use_cache[i]=PEN[i]->get_use_cache();
6270  all_use_svm[i]=PEN[i]->get_use_svm();
6271  all_do_calc[i]=PEN[i]->get_do_calc();
6272 
6273  }
6274  set_vector(ids,N);
6275  set_string_list(names, N);
6276  set_matrix(all_limits, M, N);
6277  set_matrix(all_penalties, M, N);
6278  set_string_list(all_transform, N);
6279  set_vector(min_values,N);
6280  set_vector(max_values,N);
6281  set_vector(all_use_cache,N);
6282  set_vector(all_use_svm,N);
6283  set_vector(all_do_calc,N);
6284 
6285  SG_FREE(ids);
6286  SG_FREE(max_values);
6287  SG_FREE(min_values);
6288  SG_FREE(names);
6289  SG_FREE(all_transform);
6290  SG_FREE(all_limits);
6291  SG_FREE(all_penalties);
6292  SG_FREE(all_use_cache);
6293  SG_FREE(all_use_svm);
6294  SG_FREE(all_do_calc);
6295 
6296  return true;
6297 }
6298 /*bool CSGInterface::cmd_signals_set_model()
6299 {
6300  // ARG 1
6301  int32_t len=0;
6302  char* filename;
6303  filename = get_string(len);
6304 
6305  CTrainPredMaster* tpm = new CTrainPredMaster(ui_kernel);
6306 
6307  tpm->read_models_from_file(filename);
6308 
6309  return true;
6310  }*/
6311 bool CSGInterface::cmd_signals_set_positions()
6312 {
6313  return true;
6314 }
6315 bool CSGInterface::cmd_signals_set_labels()
6316 {
6317  return true;
6318 }
6319 bool CSGInterface::cmd_signals_set_split()
6320 {
6321  return true;
6322 }
6323 bool CSGInterface::cmd_signals_set_train_mask()
6324 {
6325  return true;
6326 }
6327 bool CSGInterface::cmd_signals_add_feature()
6328 {
6329  return true;
6330 }
6331 bool CSGInterface::cmd_signals_add_kernel()
6332 {
6333  return true;
6334 }
6335 bool CSGInterface::cmd_signals_run()
6336 {
6337  return true;
6338 }
6339 
6340 bool CSGInterface::cmd_init_dyn_prog()
6341 {
6342  //ARG 1
6343  int32_t num_svms=get_int();
6344 
6345  CDynProg* h=new CDynProg(num_svms);
6346  ui_structure->set_dyn_prog(h);
6347  return true;
6348 }
6349 
6350 bool CSGInterface::cmd_clean_up_dyn_prog()
6351 {
6352  return ui_structure->cleanup();
6353 }
6354 
6355 bool CSGInterface::cmd_set_model()
6356 {
6357 
6358  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6359 
6360  CDynProg* h = ui_structure->get_dyn_prog();
6361  int32_t num_svms = h->get_num_svms();
6362  //CDynProg* h=new CDynProg(Nweights/* = num_svms */);
6363 
6364  //ARG 1
6365  // transition pointers
6366  // link transitions to length, content, frame (and tiling)
6367  // plifs (#states x #states x 3 or 4)
6368  int32_t numDim=0;
6369  int32_t* Dim=0;
6370  float64_t* penalties_array=NULL;
6371  get_ndarray(penalties_array,Dim,numDim);
6372  ASSERT(numDim==3);
6373  ASSERT(Dim[0]==Dim[1]);
6374 
6375  if (!pm->compute_plif_matrix(SGNDArray<float64_t>(penalties_array, Dim, numDim)))
6376  SG_ERROR("error computing plif matrix\n");
6377  ui_structure->set_num_states(Dim[0]);
6378  SG_FREE(penalties_array);
6379 
6380  // ARG 2
6381  // bool-> determines if orf information should be used
6382  bool use_orf = get_bool();
6383  ui_structure->set_use_orf(use_orf);
6384 
6385  // ARG 3
6386  // determines for which contents which orf should be used (#contents x 2)
6387  int32_t Nmod=0;
6388  int32_t Mmod=0;
6389  int32_t* mod_words;
6390  get_matrix(mod_words, Nmod,Mmod);
6391  if (Nmod != num_svms)
6392  SG_ERROR("should be equal: Nmod: %i, num_svms: %i\n",Nmod,num_svms);
6393  ASSERT(Mmod == 2)
6394  h->init_mod_words_array(SGMatrix<int32_t>(mod_words, Nmod, Mmod));
6395 
6396  // ARG 4
6397  // links: states -> signal plifs (#states x 2)
6398  int32_t num_states=0;
6399  int32_t feat_dim3=0;
6400  int32_t* state_signals;
6401  get_matrix(state_signals,num_states,feat_dim3);
6402  ASSERT(num_states==Dim[0]);
6403  pm->compute_signal_plifs(SGMatrix<int32_t>(state_signals, feat_dim3, num_states));
6404 
6405 
6406  // ARG 5
6407  // ORF info (#states x 2)
6408  int32_t Norf=0;
6409  int32_t Morf=0;
6410  int32_t* orf_info;
6411  get_matrix(orf_info,Norf,Morf);
6412  ASSERT(Norf==num_states)
6413  ASSERT(Morf==2)
6414 
6415  ui_structure->set_orf_info(orf_info, Norf, Morf);
6416  h->set_orf_info(SGMatrix<int32_t>(orf_info, Norf, Morf));
6417 
6418  h->set_num_states(num_states) ;
6419 
6420  return true;
6421 }
6422 
6423 bool CSGInterface::cmd_precompute_content_svms()
6424 {
6425 
6426  // ARG 1
6427  int32_t seq_len=0;
6428  char* seq;
6429  seq = get_string(seq_len);
6430 
6431  // ARG 2
6432  // all feature positions
6433  int32_t Npos=0;
6434  int32_t* all_pos;
6435  get_vector(all_pos, Npos);
6436 
6437  //ARG 3
6438  // content svm weights
6439  int32_t Nweights=0;
6440  int32_t num_svms=0;
6441  float64_t* weights;
6442  get_matrix(weights, Nweights, num_svms);
6443  if (Nweights!=5440)
6444  SG_PRINT("Dimension mismatch: got %i, expect %i\n", Nweights, 5440) ;
6445  ui_structure->set_content_svm_weights(weights, Nweights, num_svms);
6446 
6447  CDynProg* h = ui_structure->get_dyn_prog();
6448  if (!h)
6449  SG_ERROR("no DynProg object found, use init_dyn_prog first\n");
6450 
6451 
6452  //float64_t* weights = ui_structure->get_content_svm_weights();
6453  //int32_t Mweights = h->get_num_svms();
6454  //int32_t Nweights = ui_structure->get_num_svm_weights();
6455  h->set_pos(SGVector<int32_t>(all_pos, Npos));
6456  h->set_gene_string(SGVector<char>(seq, seq_len));
6457  h->create_word_string();
6459  h->init_content_svm_value_array(num_svms);
6460  h->set_dict_weights(SGMatrix<float64_t>(weights, Nweights, num_svms));
6462  SG_DEBUG("precompute_content_svms done\n");
6463  return true;
6464 }
6465 
6466 bool CSGInterface::cmd_get_lin_feat()
6467 {
6468  CDynProg* h = ui_structure->get_dyn_prog();
6469  if (!h)
6470  SG_ERROR("no DynProg object found, use set_model first\n");
6471 
6472 
6473  int32_t dim1, dim2 = 0;
6474  float64_t* lin_feat = h->get_lin_feat(dim1, dim2);
6475 
6476  set_matrix(lin_feat, dim1, dim2);
6477 
6478  return true;
6479 }
6480 bool CSGInterface::cmd_set_lin_feat()
6481 {
6482  // ARG 1
6483  int32_t Nseq=0;
6484  char* seq;
6485  seq = get_string(Nseq);
6486 
6487  // ARG 2
6488  // all feature positions
6489  int32_t Npos=0;
6490  int32_t* all_pos;
6491  get_vector(all_pos, Npos);
6492 
6493  //ARG 3
6494  //
6495  int32_t num_svms, seq_len;
6496  float64_t* lin_feat=NULL;
6497  get_matrix(lin_feat, num_svms, seq_len);
6498 
6499  if (Npos!=seq_len)
6500  {
6501  SG_ERROR("Dimension mismatch: got %i positions and (%ix%i) values\n", Npos, num_svms, seq_len) ;
6502 
6503  SG_FREE(lin_feat);
6504  SG_FREE(seq);
6505  SG_FREE(all_pos);
6506 
6507  return false ;
6508  }
6509 
6510  CDynProg* h = ui_structure->get_dyn_prog();
6511  if (!h)
6512  SG_ERROR("no DynProg object found, use set_model first\n");
6513 
6514  h->set_pos(SGVector<int32_t>(all_pos, Npos));
6515  h->set_gene_string(SGVector<char>(seq, Nseq));
6517  h->init_content_svm_value_array(num_svms);
6518  h->set_lin_feat(lin_feat, num_svms, seq_len);
6519 
6520  SG_FREE(lin_feat);
6521 
6522  return true;
6523 }
6524 bool CSGInterface::cmd_long_transition_settings()
6525 {
6526  bool use_long_transitions = get_bool();
6527  int32_t threshold = get_int();
6528  int32_t max_len = get_int();
6529 
6530  CDynProg* h = ui_structure->get_dyn_prog();
6531  if (!h)
6532  SG_ERROR("no DynProg object found, use set_model first\n");
6533 
6534  h->long_transition_settings(use_long_transitions, threshold, max_len);
6535 
6536  return true;
6537 }
6538 bool CSGInterface::cmd_set_feature_matrix()
6539 {
6540  int32_t num_states = ui_structure->get_num_states();
6541 
6542  //ARG 1
6543  // feature matrix (#states x #feature_positions x max_num_signals)
6544  int32_t* Dims=0;
6545  int32_t numDims=0;
6546  float64_t* features = NULL;
6547  get_ndarray(features, Dims, numDims);
6548 
6549  if (numDims!=3)
6550  SG_ERROR("expected a 3 dimensional array, got %i dimensions\n", numDims);
6551  if (Dims[0]!=num_states)
6552  SG_ERROR("number of rows (%i) not equal number of states (%i)\n",Dims[0], num_states);
6553  ASSERT(ui_structure->set_feature_matrix(features, Dims));
6554 
6555  ASSERT(ui_structure->set_feature_dims(Dims));
6556 
6557  SG_FREE(features);
6558  SG_FREE(Dims);
6559 
6560  return true;
6561 }
6562 bool CSGInterface::cmd_set_feature_matrix_sparse()
6563 {
6564  int32_t num_pos = ui_structure->get_num_positions();
6565  int32_t num_states = ui_structure->get_num_states();
6566 
6567  //ARG 1
6568  // feature matrix (#states x #feature_positions x max_num_signals)
6569  int32_t dim11, dim12 ;
6570  SGSparseVector<float64_t> *features1=NULL ;
6571  get_sparse_matrix(features1, dim11, dim12);
6572 
6573  int32_t dim21, dim22 ;
6574  SGSparseVector<float64_t> *features2=NULL ;
6575  get_sparse_matrix(features2, dim21, dim22);
6576 
6577  ASSERT(dim11==dim21) ;
6578  ASSERT(dim12==dim22) ;
6579 
6580  int32_t *Dims = SG_MALLOC(int32_t, 3);
6581  Dims[0]=dim11 ;
6582  Dims[1]=dim12 ;
6583  Dims[2]=2 ;
6584 
6585  ASSERT(Dims[0]==num_states)
6586  ASSERT(Dims[1]==num_pos)
6587 
6588  ASSERT(ui_structure->set_feature_matrix_sparse(features1, features2, Dims));
6589  ASSERT(ui_structure->set_feature_dims(Dims));
6590 
6591  SG_FREE(features1);
6592  SG_FREE(features2);
6593  SG_FREE(Dims);
6594 
6595  return true;
6596 }
6597 bool CSGInterface::cmd_init_intron_list()
6598 {
6599  //ARG1 start_positions
6600  int32_t Nstart_positions;
6601  int32_t* start_positions;
6602  get_vector(start_positions, Nstart_positions);
6603  //SG_PRINT("Nstart_positions:%i\n",Nstart_positions);
6604 
6605  //ARG2 end_positions
6606  int32_t Nend_positions;
6607  int32_t* end_positions;
6608  get_vector(end_positions, Nend_positions);
6609  //SG_PRINT("Nend_positions:%i\n",Nend_positions);
6610 
6611  //ARG3 quality
6612  int32_t Nquality;
6613  int32_t* quality;
6614  get_vector(quality, Nquality);
6615  //SG_PRINT("Nquality:%i\n",Nquality);
6616 
6617  //ARG4 all candidate positions
6618  int32_t Nall_pos;
6619  int32_t* all_pos;
6620  get_vector(all_pos, Nall_pos);
6621  //SG_PRINT("Nall_pos:%i\n",Nall_pos);
6622 
6623  ASSERT(Nquality==Nend_positions);
6624  ASSERT(Nend_positions==Nstart_positions);
6625 
6626  CIntronList* intron_list = new CIntronList();
6627 
6628  intron_list->init_list(all_pos, Nall_pos);
6629 
6630  intron_list->read_introns(start_positions, end_positions, quality, Nstart_positions);
6631 
6632  SG_FREE(start_positions);
6633  SG_FREE(end_positions);
6634