本页面包含了所有Matlab(tm)和Octave静态接口的例子。
要运行这些例子只需要
octave name_of_example.m
或者启动octave或Matlab并输入
name_of_example
注意,你要确保sg.oct或sg.mexglx(系统架构不同名字可能不同)已经在 matlab/octave可访问的路径中。 可通过下面的命令添加到它们的路径中:
addpath /path/to/octave
以及
addpath /path/to/matlab
最后请注意,如果是非root用户安装,你需要确保libshogun和libshogunui可以被动态链接器找到,你可能在启动matlab前需要设置:
LD_LIBRARY_PATH=path/to/libshogun:path/to/libshogunui
% Explicit examples on how to use the different classifiers
size_cache=10;
C=1.2;
use_bias=false;
epsilon=1e-5;
width=2.1;
max_train_time=60;
addpath('tools');
label_train_multiclass=load_matrix('../data/label_train_multiclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% GMNPSVM
disp('GMNPSVM');
sg('new_classifier', 'GMNPSVM');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_multiclass);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
% Explicit examples on how to use the different classifiers
size_cache=10;
C=1.2;
use_bias=false;
epsilon=1e-5;
width=2.1;
max_train_time=60;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% GPBTSVM
disp('GPBTSVM');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'GPBTSVM');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% KNN
disp('KNN');
sg('set_distance', 'EUCLIDIAN', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'KNN');
sg('train_classifier', 3);
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% LDA
disp('LDA');
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'LDA');
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
C=1;
use_bias=false;
epsilon=1e-5;
max_train_time=60;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% LibLinear
disp('LibLinear');
% type can be one of LIBLINEAR_L2R_LR, LIBLINEAR_L2R_L2LOSS_SVC_DUAL,
% LIBLINEAR_L2R_L2LOSS_SVC, LIBLINEAR_L2R_L1LOSS_SVC_DUAL
sg('new_classifier', 'LIBLINEAR_L2R_LR');
sg('set_features', 'TRAIN', sparse(fm_train_real));
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('svm_max_train_time', max_train_time);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', sparse(fm_test_real));
result=sg('classify');
C=0.1;
epsilon=1e-3;
rand('state',17);
num=1000;
dim=20;
dist=1;
traindat=sparse([randn(dim,num/2)-dist, randn(dim,num/2)+dist]);
trainlab=[-ones(1,num/2), ones(1,num/2) ];
sg('set_features', 'TRAIN', traindat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_use_bias', false);
sg('svm_epsilon', epsilon);
% type can be one of LIBLINEAR_L2R_LR, LIBLINEAR_L2R_L2LOSS_SVC_DUAL,
% LIBLINEAR_L2R_L2LOSS_SVC, LIBLINEAR_L2R_L1LOSS_SVC_DUAL
sg('new_classifier', 'LIBLINEAR_L2R_L1LOSS_SVC_DUAL');
tic;
sg('train_classifier');
timeliblinear=toc
[b,W]=sg('get_classifier');
sg('set_features', 'TEST', traindat);
trainout=sg('classify');
trainerr=mean(trainlab~=sign(trainout))
b
W'
obj=sum(W.^2)+C*sum((1-trainlab.*(W'*traindat+b)).^2)
% Explicit examples on how to use the different classifiers
size_cache=10;
C=1;
use_bias=false;
epsilon=1e-5;
width=2.1;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% LibSVM
disp('LibSVM');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'LIBSVM');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
% Explicit examples on how to use the different classifiers
size_cache=10;
C=1.2;
use_bias=false;
epsilon=1e-5;
width=2.1;
addpath('tools');
label_train_multiclass=load_matrix('../data/label_train_multiclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% LibSVM MultiClass
disp('LibSVMMultiClass');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_multiclass);
sg('new_classifier', 'LIBSVM_MULTICLASS');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
% Explicit examples on how to use the different classifiers
size_cache=10;
C=1.2;
use_bias=false;
epsilon=1e-5;
width=2.1;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% LibSVM OneClass
disp('LibSVMOneClass');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('new_classifier', 'LIBSVM_ONECLASS');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
C=100;
epsilon=1e-3;
rand('state',17);
num=1000;
dim=20;
dist=1;
traindat=sparse([rand(dim,num/2)-4*dist, rand(dim,num/2)-dist]);
trainlab=[-ones(1,num/2), ones(1,num/2) ];
sg('set_features', 'TRAIN', traindat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_use_bias', false);
sg('svm_epsilon', epsilon);
sg('new_classifier', 'LPBOOST');
tic;
sg('train_classifier');
timelpboost=toc
[b,W]=sg('get_classifier');
sg('set_features', 'TEST', traindat);
trainout=sg('classify');
trainerr=mean(trainlab~=sign(trainout))
b
W'
obj=sum(abs(W))+C*sum(max(0,1-trainlab.*(W'*traindat+b)))
C=100;
epsilon=1e-3;
rand('state',17);
num=1000;
dim=20;
dist=1;
traindat=sparse([rand(dim,num/2)-4*dist, rand(dim,num/2)-dist]);
trainlab=[-ones(1,num/2), ones(1,num/2) ];
sg('set_features', 'TRAIN', traindat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_use_bias', true);
sg('new_classifier', 'LPM');
tic;
sg('train_classifier');
timelpm=toc
[b,W]=sg('get_classifier');
sg('set_features', 'TEST', traindat);
trainout=sg('classify');
trainerr=mean(trainlab~=sign(trainout))
b
W'
obj=sum(abs(W))+C*sum(max(0,1-trainlab.*(W'*traindat+b)))
size_cache=10;
C=1.2;
use_bias=false;
epsilon=1e-5;
width=2.1;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% MPDSVM
disp('MPDSVM');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'MPDSVM');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
addpath('tools');
% Perceptron
disp('Perceptron');
% create some seperable toy data
num=50;
label_train_twoclass=[-ones(1,num/2) ones(1,num/2)];
fm_train_real=[randn(5,num/2)-1, randn(5,num/2)+1];
fm_test_real=[randn(5,num)-1, randn(5,num)+1];
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'PERCEPTRON');
%sg('set_perceptron_parameters', 1.6, 5000);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
C=1.2;
use_bias=false;
epsilon=1e-5;
width=2.1;
max_train_time=60;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% SubgradientSVM - often does not converge
disp('SubGradientSVM');
C=0.9;
sg('set_features', 'TRAIN', sparse(fm_train_real));
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'SUBGRADIENTSVM');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('svm_max_train_time', max_train_time);
sg('c', C);
% sometimes does not terminate
%sg('train_classifier');
%sg('set_features', 'TEST', sparse(fm_test_real));
%result=sg('classify');
C=1.2;
use_bias=false;
epsilon=1e-5;
addpath('tools');
label_train_dna=load_matrix('../data/label_train_dna.dat');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% SVMLight
try
disp('SVMLight');
degree=20;
sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', size_cache, degree);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('set_labels', 'TRAIN', label_train_dna);
sg('new_classifier', 'SVMLIGHT');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
result=sg('classify');
catch
disp('No support for SVMLight available.')
end
rand('seed',17);
%sequence lengths, number of sequences
len=200;
num_train=500;
num_test=500;
num_a=2;
aa=(round(len/2-num_a/2)):(round(len/2+num_a/2-1));
%SVM regularization factor C
C=1;
%Weighted Degree kernel parameters
max_order=5;
order=15
max_mismatch=0;
cache=100;
normalize=true;
mkl_stepsize=1;
block=0;
single_degree=-1;
%generate some toy data
acgt='ACGT';
rand('state',1);
traindat=acgt(ceil(4*rand(len,num_train)));
trainlab=[-ones(1,num_train/2),ones(1,num_train/2)];
traindat(aa,trainlab==1)='A';
testdat=acgt(ceil(4*rand(len,num_test)));
testlab=[-ones(1,num_test/2),ones(1,num_test/2)];
testdat(aa,testlab==1)='A';
%traindat'
%input('key to continue')
%train svm
sg('use_linadd', true);
sg('use_batch_computation', false);
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('set_labels', 'TRAIN', trainlab);
sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', cache, order, max_mismatch, normalize, mkl_stepsize, block, single_degree);
%sg('set_WD_position_weights', ones(1,100)/100) ;
%sg('set_WD_position_weights', ones(1,200)/200) ;
sg('new_classifier', 'SVMLIGHT');
sg('c',C);
tic;sg('train_classifier');toc;
%evaluate svm on test data
sg('set_features', 'TEST', testdat, 'DNA');
sg('set_labels', 'TEST', testlab);
%sg('init_kernel_optimization');
%sg('delete_kernel_optimization');
sg('use_batch_computation', true);
sg('delete_kernel_optimization');
out1=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out1)==testlab))
sg('use_batch_computation', true);
out2=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out2)==testlab))
sg('use_batch_computation', false);
tic;sg('init_kernel_optimization');toc;
%sg('delete_kernel_optimization');
tic;out3=sg('classify');toc;
fprintf('accuracy: %f \n', mean(sign(out3)==testlab))
max(abs(out1-out2))
max(abs(out1-out3))
rand('seed',17);
%sequence lengths, number of sequences
len=100;
num_train=200;
num_test=300;
num_a=3;
aa=(round(len/2-num_a/2)):(round(len/2+num_a/2-1));
%SVM regularization factor C
C=1;
%Weighted Degree kernel parameters
max_order=8;
order=20;
shift=10 ;
max_mismatch=0;
cache=100;
single_degree=-1;
x=shift*rand(1,len);
%x(:)=0;
shifts = int32(floor(x(end:-1:1)));
% suboptimal position weights:
posweights = double(floor(x(end:-1:1)));
%generate some toy data
acgt='ACGT';
rand('state',1);
traindat=acgt(ceil(4*rand(len,num_train)));
trainlab=[-ones(1,num_train/2),ones(1,num_train/2)];
aas=floor((shift+1)*rand(num_train,1));
idx=find(trainlab==1);
for i=1:length(idx),
traindat(aa+aas(i),idx(i))='A';
end
testdat=acgt(ceil(4*rand(len,num_test)));
testlab=[-ones(1,num_test/2),ones(1,num_test/2)];
aas=floor((shift+1)*rand(num_test,1));
idx=find(testlab==1);
for i=1:length(idx),
testdat(aa+aas(i),idx(i))='A';
end
%traindat=traindat(1:5,:) ;
%testdat=testdat(1:5,:) ;
%len=5 ;
traindat(end,end)='A' ;
%traindat'
%input('key to continue')
%train svm
sg('use_linadd', true);
sg('use_batch_computation', true);
sg('set_features', 'TRAIN', traindat,'DNA');
sg('set_labels', 'TRAIN', trainlab);
%sg('set_kernel', 'WEIGHTEDDEGREEPOS2', 'CHAR', 10, order, max_mismatch, len, shifts);
sg('set_kernel', 'WEIGHTEDDEGREEPOS3', 'CHAR', 10, order, max_mismatch, len, 1, shifts);
%sg('set_kernel', 'WEIGHTEDDEGREEPOS3', 'CHAR', 10, order, max_mismatch, len, 1, shifts, posweights);
%sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', cache, order, max_mismatch, normalize, mkl_stepsize, block, single_degree);
%sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', cache, order);
%sg('set_WD_position_weights', ones(1,100)/100) ;
sg('new_classifier', 'SVMLIGHT');
sg('c',C);
sg('train_classifier');
%w=sg('get_subkernel_weights') ;
%w(1:3)=1 ;
%w(2:3)=0 ;
%w(3)=1 ;
%sg('set_subkernel_weights',w) ;
%z=cell(); z{10}='';
%for i=1:10;
% z{i}=traindat(:,i)';
%end
%sg('set_features', 'TEST', z,'DNA');
sg('set_features', 'TEST', testdat,'DNA');
sg('set_labels', 'TEST', testlab);
sg('use_batch_computation', false);
sg('delete_kernel_optimization');
out1=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out1)==testlab))
sg('set_kernel_optimization_type', 'SLOWBUTMEMEFFICIENT');
sg('use_batch_computation', true);
sg('delete_kernel_optimization');
sg('train_classifier')
out2=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out2)==testlab))
sg('set_kernel_optimization_type', 'FASTBUTMEMHUNGRY');
sg('use_batch_computation', true);
sg('delete_kernel_optimization');
out3=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out3)==testlab))
sg('set_kernel_optimization_type', 'SLOWBUTMEMEFFICIENT');
%sg('set_kernel_optimization_type', 'FASTBUTMEMHUNGRY');
sg('use_batch_computation', false);
tic;sg('init_kernel_optimization');toc;
%sg('delete_kernel_optimization');
tic;out4=sg('classify');toc;
fprintf('accuracy: %f \n', mean(sign(out4)==testlab))
sg('set_kernel_optimization_type', 'FASTBUTMEMHUNGRY');
sg('use_batch_computation', false);
tic;sg('init_kernel_optimization');toc;
%sg('delete_kernel_optimization');
tic;out5=sg('classify');toc;
fprintf('accuracy: %f \n', mean(sign(out5)==testlab))
max(abs(out1-out2))
max(abs(out1-out3))
max(abs(out1-out4))
max(abs(out1-out5))
%max(abs(out2-out3))
%xmax(abs(out3-out4))
return
%evaluate svm on train data
sg('set_features', 'TEST', traindat,'DNA');
sg('set_labels', 'TEST', trainlab);
out=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out)==trainlab))
%evaluate svm on test data
sg('set_features', 'TEST', testdat,'DNA');
sg('set_labels', 'TEST', testlab);
out=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out)==testlab))
C=1.2;
use_bias=false;
epsilon=1e-5;
max_train_time=60;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% SVMLin
disp('SVMLin');
sg('set_features', 'TRAIN', sparse(fm_train_real));
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'SVMLIN');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('svm_max_train_time', max_train_time);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', sparse(fm_test_real));
result=sg('classify');
C=1.2;
use_bias=false;
epsilon=1e-5;
max_train_time=60;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% SVMOcas
disp('SVMOcas');
sg('new_classifier', 'SVMOCAS');
sg('set_features', 'TRAIN', sparse(fm_train_real));
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('svm_max_train_time', max_train_time);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', sparse(fm_test_real));
result=sg('classify');
C=10;
epsilon=1e-3;
rand('state',17);
num=16;
dim=10;
dist=0.001;
traindat=[rand(dim,num/2)-dist, rand(dim,num/2)+dist];
scale=(dim*mean(traindat(:)));
traindat=sparse(traindat/scale);
trainlab=[-ones(1,num/2), +ones(1,num/2) ];
sg('set_features', 'TRAIN', traindat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_use_bias', false);
sg('svm_bufsize', 1000);
sg('svm_epsilon', epsilon);
sg('new_classifier', 'SVMOCAS');
tic;
sg('train_classifier');
timeocas=toc
[b,W]=sg('get_classifier');
sg('set_features', 'TEST', traindat);
trainout=sg('classify');
trainerr=mean(trainlab~=sign(trainout))
sg('new_classifier', 'SVMOCAS');
sg('set_linear_classifier', b, W');
sg('set_features', 'TEST', traindat);
trainout2=sg('classify');
trainerr2=mean(trainlab~=sign(trainout2))
max(abs(trainout-trainout2))
b
W'
obj=sum(W.^2)+C*sum((1-trainlab.*(W'*traindat+b)).^2)
C=1.2;
use_bias=false;
epsilon=1e-5;
max_train_time=60;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% SVMSGD
disp('SVMSGD');
sg('set_features', 'TRAIN', sparse(fm_train_real));
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'SVMSGD');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('svm_max_train_time', max_train_time);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', sparse(fm_test_real));
result=sg('classify');
C=10;
rand('state',17);
num=16;
dim=10;
dist=0.001;
traindat=[rand(dim,num/2)-dist, rand(dim,num/2)+dist];
scale=(dim*mean(traindat(:)));
traindat=sparse(traindat/scale);
trainlab=[-ones(1,num/2), +ones(1,num/2) ];
sg('set_features', 'TRAIN', traindat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_use_bias', false);
sg('new_classifier', 'SVMSGD');
tic;
sg('train_classifier');
timesgd=toc
[b,W]=sg('get_classifier');
sg('set_features', 'TEST', traindat);
trainout=sg('classify');
trainerr=mean(trainlab~=sign(trainout))
b
W'
obj=sum(W.^2)+C*sum((1-trainlab.*(W'*traindat+b)).^2)
addpath('tools');
fm_train=load_matrix('../data/fm_train_real.dat');
% KMEANS
disp('KMeans');
k=3;
iter=1000;
sg('set_features', 'TRAIN', fm_train);
sg('set_distance', 'EUCLIDIAN', 'REAL');
sg('new_clustering', 'KMEANS');
sg('train_clustering', k, iter);
[radi, centers]=sg('get_clustering');
addpath('tools');
fm_train=load_matrix('../data/fm_train_real.dat');
% Hierarchical
disp('Hierarchical');
merges=3;
sg('set_features', 'TRAIN', fm_train);
sg('set_distance', 'EUCLIDIAN', 'REAL');
sg('new_clustering', 'HIERARCHICAL');
sg('train_clustering', merges);
[merge_distance, pairs]=sg('get_clustering');
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% BrayCurtis Distance
disp('BrayCurtisDistance');
sg('set_distance', 'BRAYCURTIS', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);;
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Canberra Metric
disp('CanberraMetric');
sg('set_distance', 'CANBERRA', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
order=3;
gap=0;
reverse='n';
% CanberraWord Distance
disp('CanberraWordDistance');
sg('set_distance', 'CANBERRA', 'WORD');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
dm=sg('get_distance_matrix', 'TEST');
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Chebyshew Metric
disp('ChebyshewMetric');
sg('set_distance', 'CHEBYSHEW', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Chi Square Metric
disp('ChiSquareDistance');
sg('set_distance', 'CHISQUARE', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Cosine Distance
disp('CosineDistance');
sg('set_distance', 'COSINE', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Euclidian Distance
disp('EuclidianDistance');
sg('set_distance', 'EUCLIDIAN', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);;
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Geodesic Metric
disp('GeodesicMetric');
sg('set_distance', 'GEODESIC', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
order=3;
gap=0;
reverse='n'; % bit silly to not use boolean, set 'r' to yield true
% HammingWord Distance
disp('HammingWordDistance');
sg('set_distance', 'HAMMING', 'WORD');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
dm=sg('get_distance_matrix', 'TEST');
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Jensen Metric
disp('JensenMetric');
sg('set_distance', 'JENSEN', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Manhattan Metric
disp('ManhattanMetric');
sg('set_distance', 'MANHATTAN', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
order=3;
gap=0;
reverse='n';
% ManhattanWord Distance
disp('ManhattanWordDistance');
sg('set_distance', 'MANHATTAN', 'WORD');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
dm=sg('get_distance_matrix', 'TEST');
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Minkowski Metric
disp('MinkowskiMetric');
k=3;
sg('set_distance', 'MINKOWSKI', 'REAL', k);
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Tanimoto Metric
disp('TanimotoDistance');
sg('set_distance', 'TANIMOTO', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
% Explicit examples on how to use distributions
leng=50;
rep=5;
weight=1;
order=3;
gap=0;
num=12;
len=23;
reverse='n'; % bit silly to not use boolean, set 'r' to yield true
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
% Histogram
disp('Histogram');
%sg('new_distribution', 'HISTOGRAM');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
% sg('train_distribution');
% histo=sg('get_histogram');
% num_param=sg('get_histogram_num_model_parameters');
% for i = 1:num,
% for j = 1:num_param,
% sg(sprintf('get_log_derivative %d %d', j, i));
% end
% end
% sg('get_log_likelihood');
% sg('get_log_likelihood_sample');
leng=50;
rep=5;
weight=1;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
% HMM
disp('HMM');
N=3;
M=6;
% generate a sequence with characters 1-6 drawn from 3 loaded cubes
for i = 1:3,
a{i}= [ ones(1,ceil(leng*rand)) 2*ones(1,ceil(leng*rand)) 3*ones(1,ceil(leng*rand)) 4*ones(1,ceil(leng*rand)) 5*ones(1,ceil(leng*rand)) 6*ones(1,ceil(leng*rand)) ];
a{i}= a{i}(randperm(length(a{i})));
end
s=[];
for i = 1:size(a,2),
s= [ s i*ones(1,ceil(rep*rand)) ];
end
s=s(randperm(length(s)));
sequence={''};
for i = 1:length(s),
f(i)=ceil(((1-weight)*rand+weight)*length(a{s(i)}));
t=randperm(length(a{s(i)}));
r=a{s(i)}(t(1:f(i)));
sequence{1}=[sequence{1} char(r+'0')];
end
sg('new_hmm', N, M);
sg('set_features','TRAIN', sequence, 'CUBE');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', 1);
sg('bw');
[p, q, a, b]=sg('get_hmm');
sg('new_hmm', N, M);
sg('set_hmm', p, q, a, b);
likelihood=sg('hmm_likelihood');
order=3;
gap=0;
num=12;
reverse='n'; % bit silly to not use boolean, set 'r' to yield true
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
% LinearHMM
disp('LinearHMM');
%sg('new_distribution', 'LinearHMM');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
% sg('train_distribution');
% histo=sg('get_histogram');
% num_param=sg('get_histogram_num_model_parameters');
% for i = 1:num,
% for j = 1:num_param,
% sg(sprintf('get_log_derivative %d %d', j, i));
% end
% end
% sg('get_log_likelihood');
% sg('get_log_likelihood_sample');
seqlen=100;
numseq=50000;
order=2; %max 8, markov chain has in fact of order-1
ppseudo=1e-5;
npseudo=10;
motifidx=10:21;
acgt='ACGT';
rand('state', 17);
LT=[-ones(1,numseq), ones(1,numseq)];
XT=acgt(ceil(3*rand(seqlen,2*numseq)));
XT(motifidx,LT==1)='T';
LV=[-ones(1,numseq), ones(1,numseq)];
XV=acgt(ceil(3*rand(seqlen,2*numseq)));
XV(motifidx,LV==1)='T';
sg('set_features', 'TRAIN', XT(:,LT==1), 'DNA') ;
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order);
sg('pseudo', ppseudo);
sg('new_hmm', size(XT,1), 4^order);
sg('linear_train');
[p_p,q_p,a_p,b_p]=sg('get_hmm');
sg('set_features', 'TEST', XV, 'DNA') ;
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order);
posout=sg('one_class_linear_hmm_classify');
sg('set_features', 'TRAIN', XT(:,LT==-1), 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order);
sg('pseudo', npseudo);
sg('new_hmm', size(XT,1), 4^order);
sg('linear_train');
[p_n,q_n,a_n,b_n]=sg('get_hmm');
sg('set_features', 'TEST', XV, 'DNA') ;
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order);
negout=sg('one_class_linear_hmm_classify');
output=posout-negout;
err=mean(sign(output)~=LV)
degree = 2;
traindat = [rand(10,50)-1 2+rand(10,50)+1];
testdat = [rand(10,50)-1 2+rand(10,50)+1];
trainlab = [ones(1, 50) -ones(1, 50)];
C=1;
size_cache=10;
epsilon=1e-5;
sg('set_kernel', 'POLY', 'REAL', size_cache, degree);
%sg('set_kernel_normalization', 'IDENTITY');
sg('set_features', 'TRAIN', traindat);
sg('set_labels', 'TRAIN', trainlab);
sg('new_classifier', 'SVMLIGHT');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('c', C);
km=sg('get_kernel_matrix', 'TRAIN');
tic; sg('train_classifier'); toc
sg('set_features', 'TEST', testdat);
result=sg('classify');
normalize=1;
sg('loglevel', 'DEBUG');
sg('svm_use_bias', 0);
sg('set_features', 'TRAIN', traindat, 'POLY', degree, normalize);
x = sg('get_features', 'TRAIN');
km2=x'*x;
sg('c', C);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('new_classifier', 'SVMOCAS');
sg('train_classifier');
sg('set_features', 'TEST', testdat, 'POLY', degree, normalize);
out_wdocas=sg('classify');
C=1;
order=6;
degree=order;
from_order=6;
max_mismatch=0;
cache=100;
normalize=1;
mkl_stepsize=1;
block=1;
single_degree=-1;
epsilon=1e-5;
rand('seed',17);
%sequence lengths, number of sequences
len=20;
num_train=10;
num_a=5;
aa=(round(len/2-num_a/2)):(round(len/2+num_a/2-1));
epsilon=1e-6;
%generate some toy data
acgt='ACGT';
shift=1;
rand('state',1);
traindat=acgt(ceil(4*rand(len,num_train)));
trainlab=[-ones(1,num_train/2),ones(1,num_train/2)];
aas=floor((shift+1)*rand(num_train,1));
idx=find(trainlab==1);
for i=1:length(idx),
traindat(aa+aas(i),idx(i))='A';
end
testdat=traindat;
testlab=trainlab;
%train svm
sg('threads',1);
sg('use_linadd', 1);
sg('use_batch_computation', 1);
sg('progress', 'ON');
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('set_labels', 'TRAIN', trainlab);
sg('svm_use_bias', 0);
sg('new_classifier', 'LIGHT');
sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', cache, from_order, max_mismatch, normalize, mkl_stepsize, block, single_degree);
%x=sg('get_subkernel_weights');
%
%sg(sprintf( 'set_kernel WEIGHTEDDEGREE CHAR %i %i %i %i %i %i %i', cache, order, max_mismatch, 0, mkl_stepsize, block, single_degree) );
%sg('set_subkernel_weights',x(1:order));
%
%%kmu=sg('get_kernel_matrix', 'TRAIN');
%
%sg(sprintf( 'set_kernel WEIGHTEDDEGREE CHAR %i %i %i %i %i %i %i', cache, order, max_mismatch, normalize, mkl_stepsize, block, single_degree) );
%sg('set_subkernel_weights',x(1:order));
%%km=sg('get_kernel_matrix', 'TRAIN');
%sg('new_classifier LIGHT');
sg('c',C);
tic;
sg('svm_train');
tim_lo=toc;
%evaluate svm on test data
sg('set_features', 'TEST', testdat, 'DNA');
out_ref=sg('svm_classify');
%prc_ref=calcrfcscore(out_ref, testlab);
%roc_ref=calcrocscore(out_ref, testlab);
traindat(traindat=='A')=0;
traindat(traindat=='C')=1;
traindat(traindat=='G')=2;
traindat(traindat=='T')=3;
traindat=uint8(traindat);
testdat(testdat=='A')=0;
testdat(testdat=='C')=1;
testdat(testdat=='G')=2;
testdat(testdat=='T')=3;
testdat=uint8(testdat);
sg('set_features', 'TRAIN', traindat', 'RAWDNA');
sg('set_labels', 'TRAIN', trainlab);
sg('c',C);
sg('svm_epsilon', epsilon);
sg('new_classifier','WDSVMOCAS',order, from_order);
tic;
sg('svm_train');
tim_lo=toc;
%evaluate svm on test data
sg('set_features', 'TEST', testdat, 'RAWDNA');
out=sg('svm_classify');
%prc=calcrfcscore(out, testlab);
%roc=calcrocscore(out, testlab);
sg('set_features', 'TRAIN', traindat, 'RAWDNA', 'WD', order, from_order);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('new_classifier', 'SVMOCAS');
sg('train_classifier');
sg('set_features', 'TEST', testdat, 'RAWDNA', 'WD', order, from_order);
out_wdocas=sg('classify');
max(abs(out-out_ref))
max(abs(out_wdocas-out_ref))
max(abs(out_wdocas-out))
dat=[];
weights=sqrt((degree:-1:1)/sum(degree:-1:1))/4.281744;
N = size(traindat,1);
nDim = 0;
for d = 1:degree,
nDim = nDim + 4^d;
end
nDim = nDim*N;
for j=1:size(traindat,2),
dat(:,j)= zeros(nDim,1);
offset = 0;
for i=1:N,
val = 0;
for d = 1:degree
if i+d-1<=N,
val = 4*val + double(traindat(i+d-1,j));
dat(offset+val+1,j) = weights(d);
offset = offset + 4^d;
end
end
end
end
traindat=sparse(dat);
testdat=traindat;
sg('set_features', 'TRAIN', traindat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('new_classifier', 'SVMOCAS');
sg('train_classifier');
sg('set_features', 'TEST', traindat);
out_ocas=sg('classify');
sg('set_features', 'TRAIN', dat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('new_classifier', 'SVMOCAS');
sg('train_classifier');
sg('set_features', 'TEST', dat);
out_docas=sg('classify');
max(abs(out-out_ocas))
max(abs(out-out_ref))
max(abs(out_ocas-out_ref))
max(abs(out_ocas-out_docas))
sg('set_features', 'TRAIN', [traindat;2*traindat]);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('new_classifier', 'SVMOCAS');
sg('train_classifier');
sg('set_features', 'TEST', [traindat;2*traindat]);
out1=sg('classify');
sg('clean_features','TRAIN');
sg('clean_features','TEST');
sg('add_dotfeatures', 'TRAIN', traindat);
sg('add_dotfeatures', 'TRAIN', 2*dat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('new_classifier', 'SVMOCAS');
sg('train_classifier');
sg('add_dotfeatures', 'TEST', traindat);
sg('add_dotfeatures', 'TEST', 2*dat);
out2=sg('classify');
max(abs(out1-out2))
rand('seed',17);
%sequence lengths, number of sequences
len=100;
num_train=10;
num_a=5;
aa=(round(len/2-num_a/2)):(round(len/2+num_a/2-1));
epsilon=1e-8;
%SVM regularization factor C
C=1;
%Spectrum kernel parameters
order=8;
cache=10;
use_sign=false;
normalize=true;
if normalize,
normalization='FULL'; %NO,SQRT,LEN,SQLEN,FULL
else
normalization='NO'; %NO,SQRT,LEN,SQLEN,FULL
end
%generate some toy data
acgt='ACGT';
shift=40;
rand('state',1);
traindat=acgt(ceil(4*rand(len,num_train)));
trainlab=[-ones(1,num_train/2),ones(1,num_train/2)];
aas=floor((shift+1)*rand(num_train,1));
idx=find(trainlab==1);
for i=1:length(idx),
traindat(aa+aas(i),idx(i))='A';
end
sg('loglevel', 'ALL');
%%% spec
weights=(order:-1:1);
weights=weights/sum(weights);
km=zeros(size(traindat,2));
for o=1:order,
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', o, order-1);
sg('add_preproc', 'SORTWORDSTRING');
sg('attach_preproc', 'TRAIN');
sg('set_kernel', 'COMMSTRING', 'WORD',cache, use_sign, "NO");
km=km+weights(o)*sg('get_kernel_matrix', 'TRAIN');
end
km2=km;
if normalize,
for i=1:size(km,1),
for j=1:size(km,2),
km2(i,j)=km(i,j)/(sqrt(km(i,i)*km(j,j)));
end
end
end
%%% wdspec
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, 0, 'r');
sg('add_preproc', 'SORTWORDSTRING');
sg('attach_preproc', 'TRAIN');
sg('set_kernel', 'WEIGHTEDCOMMSTRING', 'WORD', cache, use_sign, normalization);
feat=sg('get_features','TRAIN');
wkm=sg('get_kernel_matrix', 'TRAIN');
fprintf('max diff %g\n', max(abs(wkm(:)-km2(:))))
sg('c', C);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('use_linadd', true);
sg('new_classifier', 'SVMLIGHT');
sg('set_labels','TRAIN', trainlab);
sg('train_classifier');
[bias, alphas]=sg('get_classifier');
sg('init_kernel_optimization');
svmw=sg('get_kernel_optimization');
sg('set_features', 'TEST', traindat, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, 0, 'r');
sg('add_preproc', 'SORTWORDSTRING');
sg('attach_preproc', 'TEST');
out_ref=sg('classify');
sg('c', C);
sg('clean_features', 'TRAIN');
sg('clean_features', 'TEST');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('use_linadd', false);
sg('new_classifier', 'SVMLIGHT');
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1);
sg('set_labels','TRAIN', trainlab);
sg('set_kernel','CUSTOM', km2, 'FULL');
sg('train_classifier');
sg('set_features', 'TEST', traindat, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1);
out_ref2=sg('classify');
traindat(traindat=='A')=0;
traindat(traindat=='C')=1;
traindat(traindat=='G')=2;
traindat(traindat=='T')=3;
traindat=uint8(traindat);
testdat=uint8(traindat);
clear sg
sg('svm_use_bias', 0);
sg('svm_epsilon', epsilon);
sg('set_labels','TRAIN', trainlab);
sg('set_features', 'TRAIN', traindat, 'RAWDNA','WSPEC', order, order-1, normalize);
sg('new_classifier', 'SVMOCAS');
sg('train_classifier');
[bias_ocas, alphas_ocas]=sg('get_classifier');
sg('set_features', 'TEST', testdat, 'RAWDNA','WSPEC', order, order-1, normalize);
out=sg('classify');
fprintf('max out diff %g\n', max(abs(out-out_ref)))
fprintf('max out diff %g\n', max(abs(out-out_ref2)))
max(abs(svmw(1:length(alphas_ocas))-alphas_ocas'))
%o=[];
%for i=1:length(feat),
% o(i)=alphas_ocas*feat{i};
%end
acgt='ACGT';
dat={acgt([1*ones(1,10) 2*ones(1,10) 3*ones(1,10) 4*ones(1,10) 1])};
sg('set_features', 'TRAIN', dat, 'DNA', 'slide_window', 5, 1);
f=sg('get_features', 'TRAIN')
sg('set_features', 'TRAIN', dat, 'DNA', 'from_position_list',5, int32([0,1,2,5,15,25,30,36]));
f=sg('get_features', 'TRAIN')
sg('set_features', 'TEST', dat, 'DNA', 'from_position_list',5, int32([0,1,2,5,15,25,30,36]));
ft=sg('get_features', 'TEST')
C=1;
order=20;
order_com=5;
max_mismatch=0;
len=200;
shift=0;
num=100;
num_test=5000;
cache=10;
normalize=true;
mkl_stepsize=1;
block=0;
single_degree=-1;
sg('set_kernel', 'WEIGHTEDDEGREE', 'STRING', cache, order, max_mismatch, normalize, mkl_stepsize, block, single_degree);
km=sg('get_kernel_matrix', 'TRAIN')
sg('clean_features', 'TRAIN');
sg('clean_features', 'TEST');
sg('set_features', 'TRAIN', dat, 'DNA', 'from_position_list',5, int32([0,1,2,5,15,25,30]+5));
sg('set_features', 'TRAIN', dat, 'DNA', 'from_position_list',5, int32([0,1,2,5,15,25]+9));
sg('clean_features', 'TRAIN');
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
width=1.4;
% CHI2
disp('Chi2');
sg('set_kernel', 'CHI2', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Combined
disp('Combined');
sg('clean_features','TRAIN');
sg('clean_features','TEST');
sg('set_kernel', 'COMBINED', size_cache);
sg('add_kernel', 1, 'LINEAR', 'REAL', size_cache);
sg('add_features', 'TRAIN', fm_train_real);
sg('add_features', 'TEST', fm_test_real);
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', size_cache, 1);
sg('add_features', 'TRAIN', fm_train_real);
sg('add_features', 'TEST', fm_test_real);
sg('add_kernel', 1, 'POLY', 'REAL', size_cache, 3, false);
sg('add_features', 'TRAIN', fm_train_real);
sg('add_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TRAIN');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
order=30;
gap=0;
reverse='n';
use_sign=0;
normalization='FULL';
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Comm Ulong String
disp('CommUlongString');
sg('add_preproc', 'SORTULONGSTRING');
sg('set_kernel', 'COMMSTRING', 'ULONG', size_cache, use_sign, normalization);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
order=7;
gap=0;
reverse='n';
use_sign=0;
normalization='FULL';
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Comm Word String
disp('CommWordString');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Const
disp('Const');
c=23;
sg('set_kernel', 'CONST', 'REAL', size_cache, c);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
truth = sign(2*rand(1,60) - 1);
km=rand(length(truth));
km=km+km';
sg('set_kernel', 'CUSTOM', km, 'FULL');
sg('set_labels', 'TRAIN', truth);
sg('new_classifier', 'LIBSVM');
sg('train_classifier');
out_all = sg('classify');
out = sg('classify_example',0);
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Diag
disp('Diag');
diag=23.;
sg('set_kernel', 'DIAG', 'REAL', size_cache, diag);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Distance
disp('Distance');
width=1.7;
sg('set_distance', 'EUCLIDIAN', 'REAL');
sg('set_kernel', 'DISTANCE', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Fixed Degree String
disp('FixedDegreeString');
degree=3;
sg('set_kernel', 'FIXEDDEGREE', 'CHAR', size_cache, degree);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
width=2.1;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Gaussian
disp('Gaussian');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
width=1.0;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% GaussianShift
disp('GaussianShift');
max_shift=2;
shift_step=1;
sg('set_kernel', 'GAUSSIANSHIFT', 'REAL', size_cache, width, max_shift, shift_step);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
order=3;
gap=0;
reverse='n';
addpath('tools');
label_train_dna=load_matrix('../data/label_train_dna.dat');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Plugin Estimate
disp('PluginEstimate w/ HistogramWord');
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
pseudo_pos=1e-1;
pseudo_neg=1e-1;
sg('new_plugin_estimator', pseudo_pos, pseudo_neg);
sg('set_labels', 'TRAIN', label_train_dna);
sg('train_estimator');
sg('set_kernel', 'HISTOGRAM', 'WORD', size_cache);
km=sg('get_kernel_matrix', 'TRAIN');
% not supported yet;
% lab=sg('plugin_estimate_classify');
km=sg('get_kernel_matrix', 'TEST');
rand('seed',17);
%sequence lengths, number of sequences
len=200;
num_train=500;
num_test=500;
num_a=2;
aa=(round(len/2-num_a/2)):(round(len/2+num_a/2-1));
%SVM regularization factor C
C=1;
%locality improved kernel parameters
cache=100;
l=3;
d1=4;
d2=1;
%generate some toy data
acgt='ACGT';
rand('state',1);
traindat=acgt(ceil(4*rand(len,num_train)));
trainlab=[-ones(1,num_train/2),ones(1,num_train/2)];
traindat(aa,trainlab==1)='A';
testdat=acgt(ceil(4*rand(len,num_test)));
testlab=[-ones(1,num_test/2),ones(1,num_test/2)];
testdat(aa,testlab==1)='A';
%traindat'
%input('key to continue')
%train svm
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('set_labels', 'TRAIN', trainlab);
sg('set_kernel', 'SLIK', 'CHAR', cache, l, d1, d2);
sg('new_classifier', 'LIBSVM');
sg('c', C);
tic;sg('train_classifier');toc;
%evaluate svm on test data
sg('set_features', 'TEST', testdat, 'DNA');
sg('set_labels', 'TEST', testlab);
out1=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out1)==testlab))
out2=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out2)==testlab))
tic;out3=sg('classify');toc;
fprintf('accuracy: %f \n', mean(sign(out3)==testlab))
max(abs(out1-out2))
max(abs(out1-out3))
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Linear
disp('Linear');
scale=1.2;
sg('set_kernel', 'LINEAR', 'REAL', size_cache, scale);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_byte=uint8(load_matrix('../data/fm_train_byte.dat'));
fm_test_byte=uint8(load_matrix('../data/fm_test_byte.dat'));
% LinearByte is b0rked
disp('LinearByte');
sg('set_kernel', 'LINEAR', 'BYTE', size_cache);
sg('set_features', 'TRAIN', fm_train_byte, 'RAWBYTE');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_byte, 'RAWBYTE');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Linear String
disp('LinearString');
sg('set_kernel', 'LINEAR', 'CHAR', size_cache);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_word=uint16(load_matrix('../data/fm_train_word.dat'));
fm_test_word=uint16(load_matrix('../data/fm_test_word.dat'));
% LinearWord
disp('LinearWord');
scale=1.4;
sg('set_kernel', 'LINEAR', 'WORD', size_cache, scale);
sg('set_features', 'TRAIN', fm_train_word);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_word);
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Local Alignment String
disp('LocalAlignmentString');
sg('set_kernel', 'LOCALALIGNMENT', 'CHAR', size_cache);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Locality Improved String
disp('LocalityImprovedString');
length=5;
inner_degree=5;
outer_degree=inner_degree+2;
sg('set_kernel', 'LIK', 'CHAR', size_cache, length, inner_degree, outer_degree);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Oligo String
k=3;
w=1.2;
sg('set_kernel', 'OLIGO', 'CHAR', size_cache, k, w);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Poly
disp('Poly');
degree=4;
inhomogene=false;
use_normalization=true;
sg('set_kernel', 'POLY', 'REAL', size_cache, degree, inhomogene, use_normalization);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Poly Match String
disp('PolyMatchString');
degree=3;
inhomogene=false;
sg('set_kernel', 'POLYMATCH', 'CHAR', size_cache, degree, inhomogene);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
order=3;
gap=0;
reverse='n'; % bit silly to not use boolean, set 'r' to yield true
use_sign=false;
normalization='FULL';
% Poly Match WordString
disp('PolyMatchWordString');
degree=2;
inhomogene=true;
sg('set_kernel', 'POLYMATCH', 'WORD', size_cache, degree, inhomogene);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% sigmoid
disp('Sigmoid');
gamma=1.2;
coef0=1.3;
sg('set_kernel', 'SIGMOID', 'REAL', size_cache, gamma, coef0);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Simple Locality Improved String
disp('SimpleLocalityImprovedString');
length=5;
inner_degree=5;
outer_degree=inner_degree+2;
sg('set_kernel', 'SLIK', 'CHAR', size_cache, length, inner_degree, outer_degree);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Sparse Gaussian
disp('SparseGaussian');
width=1.3;
sg('set_kernel', 'GAUSSIAN', 'SPARSEREAL', size_cache, width);
sg('set_features', 'TRAIN', sparse(fm_train_real));
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', sparse(fm_test_real));
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Sparse Linear
disp('SparseLinear');
scale=1.3;
sg('set_kernel', 'LINEAR', 'SPARSEREAL', size_cache, scale);
sg('set_features', 'TRAIN', sparse(fm_train_real));
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', sparse(fm_test_real));
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Sparse Poly
disp('SparsePoly');
degree=3;
inhomogene=true;
use_normalization=false;
sg('set_kernel', 'POLY', 'SPARSEREAL', size_cache, degree, inhomogene, use_normalization);
sg('set_features', 'TRAIN', sparse(fm_train_real));
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', sparse(fm_test_real));
km=sg('get_kernel_matrix', 'TEST');
rand('seed',17);
%sequence lengths, number of sequences
len=100;
num_train=1000;
num_test=5000;
num_a=5;
aa=(round(len/2-num_a/2)):(round(len/2+num_a/2-1));
%SVM regularization factor C
C=1;
%Spectrum kernel parameters
order=5;
cache=10;
use_sign=true;
normalization='FULL'; %NO,SQRT,LEN,SQLEN,FULL
%generate some toy data
acgt='ACGT';
shift=40;
rand('state',1);
traindat=acgt(ceil(4*rand(len,num_train)));
trainlab=[-ones(1,num_train/2),ones(1,num_train/2)];
aas=floor((shift+1)*rand(num_train,1));
idx=find(trainlab==1);
for i=1:length(idx),
traindat(aa+aas(i),idx(i))='A';
end
testdat=acgt(ceil(4*rand(len,num_test)));
testlab=[-ones(1,num_test/2),ones(1,num_test/2)];
aas=floor((shift+1)*rand(num_test,1));
idx=find(testlab==1);
for i=1:length(idx),
testdat(aa+aas(i),idx(i))='A';
end
%traindat'
%input('key to continue')
%train svm
sg('use_linadd', true);
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('set_labels', 'TRAIN', trainlab);
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1);
sg('add_preproc', 'SORTWORDSTRING');
sg('attach_preproc', 'TRAIN');
sg('set_kernel', 'COMMSTRING', 'WORD', cache, use_sign, normalization);
sg('new_classifier', 'SVMLIGHT');
sg('c', C);
sg('train_classifier');
sg('init_kernel_optimization');
%evaluate svm on train data
sg('set_features', 'TEST', traindat, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1);
sg('attach_preproc', 'TEST');
sg('set_labels', 'TEST', trainlab);
out=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out)==trainlab))
%evaluate svm on test data
sg('set_features', 'TEST', testdat, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1);
sg('attach_preproc', 'TEST');
sg('set_labels', 'TEST', testlab);
out=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out)==testlab))
rand('seed',17);
%sequence lengths, number of sequences
len=100;
num_train=10;
num_a=5;
aa=(round(len/2-num_a/2)):(round(len/2+num_a/2-1));
%SVM regularization factor C
C=1;
%Spectrum kernel parameters
order=8;
cache=10;
use_sign=false;
normalization='NO'; %NO,SQRT,LEN,SQLEN,FULL
%generate some toy data
acgt='ACGT';
shift=40;
rand('state',1);
traindat=acgt(ceil(4*rand(len,num_train)));
trainlab=[-ones(1,num_train/2),ones(1,num_train/2)];
aas=floor((shift+1)*rand(num_train,1));
idx=find(trainlab==1);
for i=1:length(idx),
traindat(aa+aas(i),idx(i))='A';
end
%%% spec
weights=(order:-1:1);
weights=weights/sum(weights);
km=zeros(size(traindat,2));
for o=1:order,
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', o, order-1);
sg('add_preproc', 'SORTWORDSTRING');
sg('attach_preproc', 'TRAIN');
sg('set_kernel', 'COMMSTRING', 'WORD',cache, use_sign, normalization);
km=km+weights(o)*sg('get_kernel_matrix', 'TRAIN');
end
%%% wdspec
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, 0, 'r');
sg('add_preproc', 'SORTWORDSTRING');
sg('attach_preproc', 'TRAIN');
sg('set_kernel', 'WEIGHTEDCOMMSTRING', 'WORD', cache, use_sign, normalization);
wkm=sg('get_kernel_matrix', 'TRAIN');
max(abs(wkm(:)-km(:)))
size_cache=10;
use_sign=0;
reverse='r';
order=8;
gap=0;
normalization='FULL';
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Weighted Comm Word String
disp('WeightedCommWordString');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_kernel', 'WEIGHTEDCOMMSTRING', 'WORD', size_cache, use_sign, normalization);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Weighted Degree Position String
disp('WeightedDegreePositionString');
degree=20;
sg('set_kernel', 'WEIGHTEDDEGREEPOS', 'CHAR', size_cache, degree);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Weighted Degree String
disp('WeightedDegreeString');
degree=20;
sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', size_cache, degree);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
% This script should enable you to rerun the experiment in the
% paper that we labeled with "christmas star".
%
% The task is to classify two star-shaped classes that share the
% midpoint. The difficulty of the learning problem depends on the
% distance between the classes, which is varied
%
% Our model selection leads to a choice of C = 0.5. The model
% selection is not repeated inside this script.
% Preliminary settings:
C = 0.5; % SVM Parameter
cache_size = 50; % cache per kernel in MB
svm_eps=1e-3; % svm epsilon
mkl_eps=1e-3; % mkl epsilon
no_obs = 50; % number of observations / data points (sum for train and test and both classes)
% 2000 was used in the paper
k_star = 20; % number of "leaves" of the stars
alpha = 0.3; % noise level of the data
radius_star(:,1) = [4.1:0.2:10]'; % increasing radius of the 1.class
radius_star(:,2) = 4*ones(length(radius_star(:,1)),1); % fixed radius 2.class
% distanz between the classes: diff(radius_star(:,1)-radius_star(:,2))
rbf_width = [0.01 0.1 1 10 100]; % different width for the five used rbf kernels
mkl_norm = 1; % >=1
ent_lambda = 0; % 0<=lambda<=1
rand('state', 17);
randn('state', 17);
%%%%
%%%% Great loop: train MKL for every data set (the different distances between the stars)
%%%%
%sg('loglevel', 'ALL');
%sg('echo', 'ON');
for kk = 1:size(radius_star,1)
% data generation
fprintf('MKL for radius %+02.2f \n', radius_star(kk,1))
dummy(1,:) = rand(1,4*no_obs);
noise = alpha*randn(1,4*no_obs);
dummy(2,:) = sin(k_star*pi*dummy(1,:)) + noise; % sine
dummy(2,1:2*no_obs) = dummy(2,1:2*no_obs)+ radius_star(kk,1); % distanz shift: first class
dummy(2,(2*no_obs+1):end) = dummy(2,(2*no_obs+1):end)+ radius_star(kk,2); % distanz shift: second class
dummy(1,: ) = 2*pi*dummy(1,:);
x(1,:) = dummy(2,:).*sin(dummy(1,:));
x(2,:) = dummy(2,:).*cos(dummy(1,:));
train_y = [-ones(1,no_obs) ones(1,no_obs)];
test_y = [-ones(1,no_obs) ones(1,no_obs)];
train_x = x(:,1:2:end);
test_x = x(:,2:2:end);
clear dummy x;
% train MKL
sg('clean_kernel');
sg('clean_features', 'TRAIN');
sg('add_features','TRAIN', train_x); % set a trainingset for every SVM
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('set_labels','TRAIN', train_y); % set the labels
sg('new_classifier', 'MKL_CLASSIFICATION');
sg('mkl_use_interleaved_optimization', 1); % 0, 1
sg('set_solver', 'ELASTICNET'); % DIRECT, NEWTON, CPLEX, AUTO, GLPK, ELASTICNET
%sg('set_constraint_generator', 'LIBSVM');
sg('mkl_parameters', mkl_eps, 0, mkl_norm);
sg('elasticnet_lambda',ent_lambda);
sg('svm_epsilon', svm_eps);
sg('set_kernel', 'COMBINED', 0);
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(1));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(2));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(3));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(4));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(5));
sg('c', C);
sg('train_classifier');
[b,alphas]=sg('get_svm') ;
w(kk,:) = sg('get_subkernel_weights');
% calculate train error
sg('clean_features', 'TEST');
sg('add_features','TEST',train_x);
sg('add_features','TEST',train_x);
sg('add_features','TEST',train_x);
sg('add_features','TEST',train_x);
sg('add_features','TEST',train_x);
sg('set_labels','TEST', train_y);
sg('set_threshold', 0);
result.trainout(kk,:)=sg('classify');
result.trainerr(kk) = mean(train_y~=sign(result.trainout(kk,:)),2);
% calculate test error
sg('clean_features', 'TEST');
sg('add_features','TEST',test_x);
sg('add_features','TEST',test_x);
sg('add_features','TEST',test_x);
sg('add_features','TEST',test_x);
sg('add_features','TEST',test_x);
sg('set_labels','TEST',test_y);
sg('set_threshold', 0);
result.testout(kk,:)=sg('classify');
result.testerr(kk) = mean(test_y~=sign(result.testout(kk,:)),2);
end
disp('done. now w contains the kernel weightings and result test/train outputs and errors')
% Explicit examples on how to use the different classifiers
size_cache=10;
C=1.2;
use_bias=false;
epsilon=1e-5;
width=1.2;
mkl_eps=0.001;
mkl_norm=2;
max_train_time=600;
addpath('tools');
label_train_multiclass=load_matrix('../data/label_train_multiclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% MKL_MULTICLASS
disp('MKL_MULTICLASS');
sg('new_classifier', 'MKL_MULTICLASS');
disp('Combined');
sg('clean_kernel');
sg('clean_features','TRAIN');
sg('clean_features','TEST');
sg('set_kernel', 'COMBINED', size_cache);
sg('add_kernel', 1, 'LINEAR', 'REAL', size_cache);
sg('add_features', 'TRAIN', fm_train_real);
sg('add_features', 'TEST', fm_test_real);
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', size_cache, 1);
sg('add_features', 'TRAIN', fm_train_real);
sg('add_features', 'TEST', fm_test_real);
sg('add_kernel', 1, 'POLY', 'REAL', size_cache, 2);
sg('add_features', 'TRAIN', fm_train_real);
sg('add_features', 'TEST', fm_test_real);
sg('set_labels', 'TRAIN', label_train_multiclass);
sg('svm_epsilon', epsilon);
sg('c', C);
sg('mkl_parameters', mkl_eps, 0, mkl_norm);
sg('train_classifier');
result=sg('classify');
result
% This script should enable you to rerun the experiment in the
% paper that we labeled "mixture linear and sine ".
%
% The task is to learn a regression function where the true function
% is given by a mixture of 2 sine waves in addition to a linear trend.
% We vary the frequency of the second higher frequency sine wave.
% Setup: MKL on 10 RBF kernels of different widths on 1000 examples
% Preliminary setting
% kernel width for 10 basic SVMs
rbf_width(1) = 0.001;
rbf_width(2) = 0.005;
rbf_width(3) = 0.01;
rbf_width(4) = 0.05;
rbf_width(5) = 0.1;
rbf_width(6) = 1;
rbf_width(7) = 10;
rbf_width(8) = 50;
rbf_width(9) = 100;
rbf_width(10) = 1000;
mkl_norm = 1; % >=1
% SVM parameter
C = 1;
cache_size = 50;
mkl_eps = 1e-4;
svm_eps = 1e-4;
svr_tube = 0.01;
debug = 0;
% data
f = [0:20]; % parameter that varies the frequency of the second sine wave
no_obs = 20; % number of observations
if debug
sg('loglevel', 'ALL');
sg('echo', 'ON');
else
sg('loglevel', 'ERROR');
sg('echo', 'OFF');
end
for kk = 1:length(f) % Big loop
% data generation
train_x = [0:((4*pi)/(no_obs-1)):4*pi];
trend = 2 * train_x* ((pi)/(max(train_x)-min(train_x)));
wave1 = sin(train_x);
wave2 = sin(f(kk)*train_x);
train_y = trend + wave1 + wave2;
% MKL learning
kernels={};
sg('new_classifier', 'MKL_REGRESSION');
sg('mkl_parameters', mkl_eps, 0, mkl_norm);
sg('mkl_use_interleaved_optimization', 1); % 0, 1
sg('set_solver', 'DIRECT'); % DIRECT, NEWTON, CPLEX, AUTO, GLPK, ELASTICNET
sg('c', C);
sg('svm_epsilon',svm_eps);
sg('svr_tube_epsilon',svr_tube);
sg('clean_features', 'TRAIN');
sg('clean_kernel');
sg('set_labels', 'TRAIN', train_y); % set labels
sg('add_features','TRAIN', train_x); % add features for every basic SVM
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('set_kernel', 'COMBINED', 0);
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(1));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(2));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(3));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(4));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(5));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(6));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(7));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(8));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(9));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(10));
sg('train_regression');
weights(kk,:) = sg('get_subkernel_weights') ;
fprintf('frequency: %02.2f rbf-kernel-weights: %02.2f %02.2f %02.2f %02.2f %02.2f %02.2f %02.2f %02.2f %02.2f %02.2f \n', f(kk), weights(kk,:))
end
% This script should enable you to rerun the experiment in the
% paper that we labeled "sine".
%
% In this regression task a sine wave is to be learned.
% We vary the frequency of the wave.
% Preliminary settings:
% Parameter for the SVMs.
C = 10; % obtained via model selection (not included in the script)
cache_size = 10;
mkl_eps = 1e-4; % threshold for precision
svm_eps = 1e-4;
svr_tube_eps = 1e-3;
debug = 0;
% Kernel width for the 5 "basic" SVMs
rbf_width(1) = 0.005;
rbf_width(2) = 0.05;
rbf_width(3) = 0.5;
rbf_width(4) = 1;
rbf_width(5) = 10;
mkl_norm = 1; % >=1
% data
f = [0.1:0.2:5]; % values for the different frequencies
no_obs = 100; % number of observations
if debug
sg('loglevel', 'ALL');
sg('echo', 'ON');
else
sg('loglevel', 'ERROR');
sg('echo', 'OFF');
end
for kk = 1:length(f) % big loop for the different learning problems
% data generation
train_x = [1:(((10*2*pi)-1)/(no_obs-1)):10*2*pi];
train_y = sin(f(kk)*train_x);
kernels={};
% initialize MKL-SVR
sg('new_regression', 'MKL_REGRESSION');
sg('mkl_parameters', mkl_eps, 0, mkl_norm);
sg('mkl_use_interleaved_optimization', 1); % 0, 1
sg('set_solver', 'GLPK'); % DIRECT, NEWTON, CPLEX, AUTO, GLPK, ELASTICNET
sg('c', C);
sg('svm_epsilon', svm_eps);
sg('svr_tube_epsilon', svr_tube_eps);
sg('clean_features', 'TRAIN');
sg('clean_kernel');
sg('set_labels', 'TRAIN', train_y); % set labels
sg('add_features','TRAIN', train_x); % add features for every SVR
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('set_kernel', 'COMBINED', 0);
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(1));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(2));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(3));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(4));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(5));
sg('train_regression');
weights(kk,:) = sg('get_subkernel_weights') ;
fprintf('frequency: %02.2f rbf-kernel-weights: %02.2f %02.2f %02.2f %02.2f %02.2f \n', f(kk), weights(kk,:))
end
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
width=1.4;
% LogPlusOne
disp('LogPlusOne');
sg('add_preproc', 'LOGPLUSONE');
sg('set_kernel', 'CHI2', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
width=1.4;
% NormOne
disp('NormOne');
sg('add_preproc', 'NORMONE');
sg('set_kernel', 'CHI2', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
width=1.4;
% PruneVarSubMean
disp('PruneVarSubMean');
divide_by_std=true;
sg('add_preproc', 'PRUNEVARSUBMEAN', divide_by_std);
sg('set_kernel', 'CHI2', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
width=1.4;
%
% complex string features;
%
order=3;
gap=0;
reverse='n'; % bit silly to not use boolean, set 'r' to yield true
use_sign=false;
normalization='FULL';
% SortUlongString
disp('CommUlongString');
sg('add_preproc', 'SORTULONGSTRING');
sg('set_kernel', 'COMMSTRING', 'ULONG', size_cache, use_sign, normalization);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
width=1.4;
order=3;
gap=0;
reverse='n';
use_sign=false;
normalization='FULL';
% SortWordString
disp('CommWordString');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
size_cache=10;
width=2.1;
C=1.2;
tube_epsilon=1e-2;
addpath('tools');
label_train=load_matrix('../data/label_train_twoclass.dat');
fm_train=load_matrix('../data/fm_train_real.dat');
fm_test=load_matrix('../data/fm_test_real.dat');
% KRR
disp('KRR');
tau=1.2;
sg('set_features', 'TRAIN', fm_train);
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_labels', 'TRAIN', label_train);
sg('new_regression', 'KRR');
sg('krr_tau', tau);
sg('c', C);
sg('train_regression');
sg('set_features', 'TEST', fm_test);
result=sg('classify');
size_cache=10;
width=2.1;
C=1.2;
tube_epsilon=1e-2;
addpath('tools');
label_train=load_matrix('../data/label_train_twoclass.dat');
fm_train=load_matrix('../data/fm_train_real.dat');
fm_test=load_matrix('../data/fm_test_real.dat');
% LibSVR
disp('LibSVR');
sg('set_features', 'TRAIN', fm_train);
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_labels', 'TRAIN', label_train);
sg('new_regression', 'LIBSVR');
sg('svr_tube_epsilon', tube_epsilon);
sg('c', C);
sg('train_regression');
sg('set_features', 'TEST', fm_test);
result=sg('classify');
size_cache=10;
width=2.1;
C=1.2;
tube_epsilon=1e-2;
addpath('tools');
label_train=load_matrix('../data/label_train_twoclass.dat');
fm_train=load_matrix('../data/fm_train_real.dat');
fm_test=load_matrix('../data/fm_test_real.dat');
% SVR Light
try
disp('SVRLight');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train);
sg('set_labels', 'TRAIN', label_train);
sg('new_regression', 'SVRLIGHT');
sg('svr_tube_epsilon', tube_epsilon);
sg('c', C);
sg('train_regression');
sg('set_features', 'TEST', fm_test);
result=sg('classify');
catch
disp('No support for SVRLight available.')
end
%% load data
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
load('-mat', '../data/DynProg_example.dat')
%% set a number of defaults
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
use_orf = 1;
num_svms = 8;
use_long_transitions = 1;
threshold = 1000;
long_transition_max_len = 100000;
block.content_pred(end+1:num_svms,:) = deal(0);
viterbi_nbest = [1 0] ;
%% reshape the training parameters and additional information like
%% length constraints and transformation type and pass them to shogun
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
for j=1:length(penalty_array)
all_ids(j) = penalty_array{j}.id;
all_names{j} = penalty_array{j}.name;
all_limits(:,j) = penalty_array{j}.limits;
all_penalties(:,j) = penalty_array{j}.penalties;
if isempty(penalty_array{j}.transform)
all_transform{j} = 'linear';
else
all_transform{j} = penalty_array{j}.transform;
end
all_min_values(j) = penalty_array{j}.min_value;
all_max_values(j) = penalty_array{j}.max_value;
all_use_cache(j) = penalty_array{j}.use_cache;
all_use_svm(j) = penalty_array{j}.use_svm;
all_do_calc(j) = 1;
end
sg('set_plif_struct',int32(all_ids)-1,all_names, all_limits, all_penalties, all_transform,...
all_min_values, all_max_values, int32(all_use_cache), int32(all_use_svm), int32(all_do_calc));
%% pass the data to shogun
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
sg('init_dyn_prog', num_svms)
sg('set_lin_feat', block.seq, int32(block.all_pos-1), block.content_pred);
sg('set_model', model.transition_pointers, use_orf, int32(model.mod_words), int32(state_signals),int32(model.orf_info))
sg('set_feature_matrix', block.features)
sg('long_transition_settings', use_long_transitions, threshold, long_transition_max_len)
%% run the dynamic program
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[path_scores, path, ppos]= sg('best_path_trans', model.p', model.q', int32(viterbi_nbest), seg_path, a_trans, loss);