本页面包含了所有Octave模块化接口的例子。
要运行这些例子只需要
octave name_of_example.m
或者启动octave并输入
name_of_example
addpath('tools');
init_shogun;
tmp=load_matrix('../data/label_train_dna.dat');
label_train_dna=tmp(1:50);
tmp=load_matrix('../data/fm_train_dna.dat');
fm_train_dna=tmp(:,1:50);
tmp=load_matrix('../data/label_train_dna.dat');
label_train_dna2=tmp(50:92);
tmp=load_matrix('../data/fm_train_dna.dat');
fm_train_dna2=tmp(:, 50:92);
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
fm_test_dna2=tmp(:,50:92);
%if exist('SVMLight')
disp('Domain Adaptation SVM')
C = 1.0;
degree=3;
feats_train=StringCharFeatures(DNA);
feats_test=StringCharFeatures(DNA);
feats_train.set_features(fm_train_dna);
feats_test.set_features(fm_test_dna);
kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree);
labels=Labels(label_train_dna);
svm=SVMLight(C, kernel, labels);
svm.train();
%#####################################
disp('obtaining DA SVM from previously trained SVM')
feats_train2=StringCharFeatures(DNA);
feats_test2=StringCharFeatures(DNA);
feats_train2.set_features(fm_train_dna2);
feats_test2.set_features(fm_test_dna2);
kernel2=WeightedDegreeStringKernel(feats_train, feats_train, degree);
labels2=Labels(label_train_dna);
% we regularize versus the previously obtained solution
dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0);
dasvm.train();
out = dasvm.classify(feats_test2).get_labels();
%else
%disp('No support for SVMLight available.')
%end
init_shogun
addpath('tools');
label_train_multiclass=load_matrix('../data/label_train_multiclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% gmnpsvm
disp('GMNPSVM')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
width=2.1;
kernel=GaussianKernel(feats_train, feats_train, width);
C=1.2;
epsilon=1e-5;
num_threads=1;
labels=Labels(label_train_multiclass);
svm=GMNPSVM(C, kernel, labels);
svm.set_epsilon(epsilon);
svm.parallel.set_num_threads(num_threads);
svm.train();
kernel.init(feats_train, feats_test);
svm.classify().get_labels();
init_shogun
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% gpbtsvm
disp('GPBTSVM')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
width=2.1;
kernel=GaussianKernel(feats_train, feats_train, width);
C=1.2;
epsilon=1e-5;
num_threads=2;
labels=Labels(label_train_twoclass);
svm=GPBTSVM(C, kernel, labels);
svm.set_epsilon(epsilon);
svm.parallel.set_num_threads(num_threads);
svm.train();
kernel.init(feats_train, feats_test);
svm.classify().get_labels();
init_shogun
addpath('tools');
label_train_multiclass=load_matrix('../data/label_train_multiclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% knn
disp('KNN')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
distance=EuclidianDistance(feats_train, feats_train);
k=3;
num_threads=1;
labels=Labels(label_train_multiclass);
knn=KNN(k, distance, labels);
knn.parallel.set_num_threads(num_threads);
knn.train();
output=knn.classify(feats_test).get_labels();
init_shogun
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% lda
disp('LDA')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
gamma=3;
num_threads=1;
labels=Labels(label_train_twoclass);
lda=LDA(gamma, feats_train, labels);
lda.parallel.set_num_threads(num_threads);
lda.train();
lda.get_bias();
lda.get_w();
lda.set_features(feats_test);
lda.classify().get_labels();
init_shogun
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% liblinear
disp('LibLinear')
realfeat=RealFeatures(fm_train_real);
feats_train=SparseRealFeatures();
feats_train.obtain_from_simple(realfeat);
realfeat=RealFeatures(fm_test_real);
feats_test=SparseRealFeatures();
feats_test.obtain_from_simple(realfeat);
C=1.2;
epsilon=1e-5;
num_threads=1;
labels=Labels(label_train_twoclass);
svm=LibLinear(C, feats_train, labels);
svm.set_epsilon(epsilon);
svm.parallel.set_num_threads(num_threads);
svm.set_bias_enabled(true);
svm.train();
svm.set_features(feats_test);
svm.classify().get_labels();
init_shogun num=1000; dist=1; width=2.1; C=1; traindata_real=[randn(2,num)-dist, randn(2,num)+dist]; testdata_real=[randn(2,num)-dist, randn(2,num)+dist]; trainlab=[-ones(1,num), ones(1,num)]; testlab=[-ones(1,num), ones(1,num)]; feats_train=RealFeatures(traindata_real); feats_test=RealFeatures(testdata_real); feats_test.copy_feature_matrix(testdata_real); kernel=GaussianKernel(feats_train, feats_train, width); labels=Labels(trainlab); svm=LibSVM(C, kernel, labels); svm.parallel.set_num_threads(8); svm.train(); kernel.init(feats_train, feats_test); out=svm.classify().get_labels(); testerr=mean(sign(out)~=testlab)
init_shogun
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% libsvm
disp('LibSVM')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
width=2.1;
kernel=GaussianKernel(feats_train, feats_train, width);
C=1.2;
epsilon=1e-5;
num_threads=2;
labels=Labels(label_train_twoclass);
svm=LibSVM(C, kernel, labels);
svm.set_epsilon(epsilon);
svm.parallel.set_num_threads(num_threads);
svm.train();
kernel.init(feats_train, feats_test);
svm.classify().get_labels();
init_shogun
addpath('tools');
label_train_multiclass=load_matrix('../data/label_train_multiclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% libsvmmulticlass
disp('LibSVMMultiClass')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
width=2.1;
kernel=GaussianKernel(feats_train, feats_train, width);
C=1.2;
epsilon=1e-5;
num_threads=8;
labels=Labels(label_train_multiclass);
svm=LibSVMMultiClass(C, kernel, labels);
svm.set_epsilon(epsilon);
svm.parallel.set_num_threads(num_threads);
svm.train();
kernel.init(feats_train, feats_test);
svm.classify().get_labels();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% libsvm twoclass
disp('LibSVMOneClass')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
width=2.1;
kernel=GaussianKernel(feats_train, feats_train, width);
C=1.2;
epsilon=1e-5;
num_threads=4;
svm=LibSVMOneClass(C, kernel);
svm.set_epsilon(epsilon);
svm.parallel.set_num_threads(num_threads);
svm.train();
kernel.init(feats_train, feats_test);
svm.classify().get_labels();
init_shogun
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% mpdsvm
disp('MPDSVM')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
width=2.1;
kernel=GaussianKernel(feats_train, feats_train, width);
C=1.2;
epsilon=1e-5;
num_threads=1;
labels=Labels(label_train_twoclass);
svm=MPDSVM(C, kernel, labels);
svm.set_epsilon(epsilon);
svm.parallel.set_num_threads(num_threads);
svm.train();
kernel.init(feats_train, feats_test);
svm.classify().get_labels();
init_shogun
num=50;
label_train_twoclass=[-ones(1,num/2) ones(1,num/2)];
fm_train_real=[randn(5,num/2)-1, randn(5,num/2)+1];
fm_test_real=[randn(5,num)-1, randn(5,num)+1];
% perceptron
disp('Perceptron')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
learn_rate=1.;
max_iter=1000;
num_threads=1;
labels=Labels(label_train_twoclass);
perceptron=Perceptron(feats_train, labels);
perceptron.set_learn_rate(learn_rate);
perceptron.set_max_iter(max_iter);
perceptron.parallel.set_num_threads(num_threads);
perceptron.train();
perceptron.set_features(feats_test);
perceptron.classify().get_labels();
init_shogun
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% subgradient based svm
disp('SubGradientSVM')
realfeat=RealFeatures(fm_train_real);
feats_train=SparseRealFeatures();
feats_train.obtain_from_simple(realfeat);
realfeat=RealFeatures(fm_test_real);
feats_test=SparseRealFeatures();
feats_test.obtain_from_simple(realfeat);
C=0.9;
epsilon=1e-3;
num_threads=1;
max_train_time=1.;
labels=Labels(label_train_twoclass);
svm=SubGradientSVM(C, feats_train, labels);
svm.set_epsilon(epsilon);
svm.parallel.set_num_threads(num_threads);
svm.set_bias_enabled(false);
svm.set_max_train_time(max_train_time);
svm.train();
svm.set_features(feats_test);
svm.classify().get_labels();
init_shogun
% Explicit examples on how to use the different classifiers
addpath('tools');
label_train_dna=load_matrix('../data/label_train_dna.dat');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% svm light
if exist('SVMLight')
disp('SVMLight')
feats_train=StringCharFeatures(DNA);
feats_train.set_features(fm_train_dna);
feats_test=StringCharFeatures(DNA);
feats_test.set_features(fm_test_dna);
degree=20;
kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree);
C=1.2;
epsilon=1e-5;
num_threads=3;
labels=Labels(label_train_dna);
svm=SVMLight(C, kernel, labels);
svm.set_epsilon(epsilon);
svm.parallel.set_num_threads(num_threads);
svm.train();
kernel.init(feats_train, feats_test);
svm.classify().get_labels();
else
disp('No support for SVMLight available.')
end
init_shogun
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% svm lin
disp('SVMLin')
realfeat=RealFeatures(fm_train_real);
feats_train=SparseRealFeatures();
feats_train.obtain_from_simple(realfeat);
realfeat=RealFeatures(fm_test_real);
feats_test=SparseRealFeatures();
feats_test.obtain_from_simple(realfeat);
C=0.9;
epsilon=1e-5;
num_threads=1;
labels=Labels(label_train_twoclass);
svm=SVMLin(C, feats_train, labels);
svm.set_epsilon(epsilon);
svm.parallel.set_num_threads(num_threads);
svm.set_bias_enabled(true);
svm.train();
svm.set_features(feats_test);
svm.get_bias();
svm.get_w();
svm.classify().get_labels();
init_shogun
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% svm ocas
disp('SVMOcas')
realfeat=RealFeatures(fm_train_real);
feats_train=SparseRealFeatures();
feats_train.obtain_from_simple(realfeat);
realfeat=RealFeatures(fm_test_real);
feats_test=SparseRealFeatures();
feats_test.obtain_from_simple(realfeat);
C=0.9;
epsilon=1e-5;
num_threads=1;
labels=Labels(label_train_twoclass);
svm=SVMOcas(C, feats_train, labels);
svm.set_epsilon(epsilon);
svm.parallel.set_num_threads(num_threads);
svm.set_bias_enabled(false);
svm.train();
svm.set_features(feats_test);
svm.classify().get_labels();
init_shogun
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% sgd
disp('SVMSGD')
realfeat=RealFeatures(fm_train_real);
feats_train=SparseRealFeatures();
feats_train.obtain_from_simple(realfeat);
realfeat=RealFeatures(fm_test_real);
feats_test=SparseRealFeatures();
feats_test.obtain_from_simple(realfeat);
C=0.9;
num_iter=5
num_threads=1;
labels=Labels(label_train_twoclass);
svm=SVMSGD(C, feats_train, labels);
svm.set_epochs(num_iter)
%svm.io.set_loglevel(0);
svm.train();
svm.set_features(feats_test);
svm.classify().get_labels();
init_shogun
% Explicit examples on how to use clustering
addpath('tools');
fm_train=load_matrix('../data/fm_train_real.dat');
% Hierarchical
disp('Hierarchical')
merges=4;
feats_train=RealFeatures(fm_train);
feats_test=RealFeatures(fm_train);
distance=EuclidianDistance(feats_train, feats_train);
hierarchical=Hierarchical(merges, distance);
hierarchical.train();
distance.init(feats_train, feats_test);
mdist=hierarchical.get_merge_distances();
pairs=hierarchical.get_cluster_pairs();
init_shogun
% Explicit examples on how to use clustering
addpath('tools');
fm_train=load_matrix('../data/fm_train_real.dat');
% KMeans
disp('KMeans')
k=4;
feats_train=RealFeatures(fm_train);
distance=EuclidianDistance(feats_train, feats_train);
kmeans=KMeans(k, distance);
kmeans.train();
c=kmeans.get_cluster_centers();
r=kmeans.get_radiuses();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% bray curtis distance
disp('BrayCurtisDistance')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
distance=BrayCurtisDistance(feats_train, feats_train);
dm_train=distance.get_distance_matrix();
distance.init(feats_train, feats_test);
dm_test=distance.get_distance_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% canberra metric
disp('CanberaMetric')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
distance=CanberraMetric(feats_train, feats_train);
dm_train=distance.get_distance_matrix();
distance.init(feats_train, feats_test);
dm_test=distance.get_distance_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% canberra word distance
disp('CanberraWordDistance')
order=3;
gap=0;
reverse=false;
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
feats_train=StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
preproc=SortWordString();
preproc.init(feats_train);
feats_train.add_preproc(preproc);
feats_train.apply_preproc();
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_test_dna);
feats_test=StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse);
feats_test.add_preproc(preproc);
feats_test.apply_preproc();
distance=CanberraWordDistance(feats_train, feats_train);
dm_train=distance.get_distance_matrix();
distance.init(feats_train, feats_test);
dm_test=distance.get_distance_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% chebyshew metric
disp('ChebyshewMetric')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
distance=ChebyshewMetric(feats_train, feats_train);
dm_train=distance.get_distance_matrix();
distance.init(feats_train, feats_test);
dm_test=distance.get_distance_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% chi square distance
disp('ChiSquareDistance')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
distance=ChiSquareDistance(feats_train, feats_train);
dm_train=distance.get_distance_matrix();
distance.init(feats_train, feats_test);
dm_test=distance.get_distance_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% cosine distance
disp('Cosine Distance')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
distance=CosineDistance(feats_train, feats_train);
dm_train=distance.get_distance_matrix();
distance.init(feats_train, feats_test);
dm_test=distance.get_distance_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% euclidian distance
disp('EuclidianDistance')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
distance=EuclidianDistance(feats_train, feats_train);
dm_train=distance.get_distance_matrix();
distance.init(feats_train, feats_test);
dm_test=distance.get_distance_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% geodesic metric
disp('GeodesicMetric')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
distance=GeodesicMetric(feats_train, feats_train);
dm_train=distance.get_distance_matrix();
distance.init(feats_train, feats_test);
dm_test=distance.get_distance_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% hamming word distance
disp('HammingWordDistance')
order=3;
gap=0;
reverse=false;
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
feats_train=StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
preproc=SortWordString();
preproc.init(feats_train);
feats_train.add_preproc(preproc);
feats_train.apply_preproc();
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_test_dna);
feats_test=StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse);
feats_test.add_preproc(preproc);
feats_test.apply_preproc();
use_sign=false;
distance=HammingWordDistance(feats_train, feats_train, use_sign);
dm_train=distance.get_distance_matrix();
distance.init(feats_train, feats_test);
dm_test=distance.get_distance_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% jensen metric
disp('JensenMetric')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
distance=JensenMetric(feats_train, feats_train);
dm_train=distance.get_distance_matrix();
distance.init(feats_train, feats_test);
dm_test=distance.get_distance_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% manhattan metric
disp('ManhattanMetric')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
distance=ManhattanMetric(feats_train, feats_train);
dm_train=distance.get_distance_matrix();
distance.init(feats_train, feats_test);
dm_test=distance.get_distance_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% manhattan word distance
disp('ManhattanWordDistance')
order=3;
gap=0;
reverse=false;
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
feats_train=StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
preproc=SortWordString();
preproc.init(feats_train);
feats_train.add_preproc(preproc);
feats_train.apply_preproc();
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_test_dna);
feats_test=StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse);
feats_test.add_preproc(preproc);
feats_test.apply_preproc();
distance=ManhattanWordDistance(feats_train, feats_train);
dm_train=distance.get_distance_matrix();
distance.init(feats_train, feats_test);
dm_test=distance.get_distance_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% minkowski metric
disp('MinkowskiMetric')
k=3;
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
distance=MinkowskiMetric(feats_train, feats_train, k);
dm_train=distance.get_distance_matrix();
distance.init(feats_train, feats_test);
dm_test=distance.get_distance_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% sparse euclidian distance
disp('SparseEuclidianDistance')
realfeat=RealFeatures(fm_train_real);
feats_train=SparseRealFeatures();
feats_train.obtain_from_simple(realfeat);
realfeat=RealFeatures(fm_test_real);
feats_test=SparseRealFeatures();
feats_test.obtain_from_simple(realfeat);
distance=SparseEuclidianDistance(feats_train, feats_train);
dm_train=distance.get_distance_matrix();
distance.init(feats_train, feats_test);
dm_test=distance.get_distance_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% tanimoto distance
disp('TanimotoDistance')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
distance=TanimotoDistance(feats_train, feats_train);
dm_train=distance.get_distance_matrix();
distance.init(feats_train, feats_test);
dm_test=distance.get_distance_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
% Histogram
disp('Histogram')
order=3;
gap=0;
reverse=false;
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
feats=StringWordFeatures(charfeat.get_alphabet());
feats.obtain_from_char(charfeat, order-1, order, gap, reverse);
preproc=SortWordString();
preproc.init(feats);
feats.add_preproc(preproc);
feats.apply_preproc();
histo=Histogram(feats);
histo.train();
histo.get_histogram();
num_examples=feats.get_num_vectors();
num_param=histo.get_num_model_parameters();
% for i=0:(num_examples-1),
% for j=0:(num_param-1),
% histo.get_log_derivative(j, i);
% end
% end
histo.get_log_likelihood();
histo.get_log_likelihood_sample();
init_shogun
addpath('tools');
leng=50;
rep=5;
weight=0.3;
% generate a sequence with characters 1-6 drawn from 3 loaded cubes
for i = 1:3,
a{i}= [ ones(1,ceil(leng*rand)) 2*ones(1,ceil(leng*rand)) 3*ones(1,ceil(leng*rand)) 4*ones(1,ceil(leng*rand)) 5*ones(1,ceil(leng*rand)) 6*ones(1,ceil(leng*rand)) ];
a{i}= a{i}(randperm(length(a{i})));
end
s=[];
for i = 1:size(a,2),
s= [ s i*ones(1,ceil(rep*rand)) ];
end
s=s(randperm(length(s)));
cubesequence={''};
for i = 1:length(s),
f(i)=ceil(((1-weight)*rand+weight)*length(a{s(i)}));
t=randperm(length(a{s(i)}));
r=a{s(i)}(t(1:f(i)));
cubesequence{1}=[cubesequence{1} char(r+'0')];
end
% HMM
disp('HMM')
N=3;
M=6;
pseudo=1e-1;
order=1;
gap=0;
reverse=false;
num_examples=2;
charfeat=StringCharFeatures(CUBE);
charfeat.set_features(cubesequence);
feats=StringWordFeatures(charfeat.get_alphabet());
feats.obtain_from_char(charfeat, order-1, order, gap, reverse);
preproc=SortWordString();
preproc.init(feats);
feats.add_preproc(preproc);
feats.apply_preproc();
hmm=HMM(feats, N, M, pseudo);
hmm.train();
% cheating, but enum BaumWelchViterbiType does not seem available
BW_NORMAL=0;
hmm.baum_welch_viterbi_train(BW_NORMAL);
num_examples=feats.get_num_vectors();
num_param=hmm.get_num_model_parameters();
for i=0:(num_examples-1),
for j=0:(num_param-1),
hmm.get_log_derivative(j, i);
end
end
best_path=0;
best_path_state=0;
for i=0:(num_examples-1),
best_path = best_path + hmm.best_path(i);
for j=0:(N-1),
best_path_state = best_path_state + hmm.get_best_path_state(i, j);
end
end
hmm.get_log_likelihood();
hmm.get_log_likelihood_sample();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
leng=50;
rep=5;
weight=0.3;
% generate a sequence with characters 1-6 drawn from 3 loaded cubes
for i = 1:3,
a{i}= [ ones(1,ceil(leng*rand)) 2*ones(1,ceil(leng*rand)) 3*ones(1,ceil(leng*rand)) 4*ones(1,ceil(leng*rand)) 5*ones(1,ceil(leng*rand)) 6*ones(1,ceil(leng*rand)) ];
a{i}= a{i}(randperm(length(a{i})));
end
s=[];
for i = 1:size(a,2),
s= [ s i*ones(1,ceil(rep*rand)) ];
end
s=s(randperm(length(s)));
cubesequence={''};
for i = 1:length(s),
f(i)=ceil(((1-weight)*rand+weight)*length(a{s(i)}));
t=randperm(length(a{s(i)}));
r=a{s(i)}(t(1:f(i)));
cubesequence{1}=[cubesequence{1} char(r+'0')];
end
% Linear HMM
disp('LinearHMM')
order=3;
gap=0;
reverse=false;
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
feats=StringWordFeatures(charfeat.get_alphabet());
feats.obtain_from_char(charfeat, order-1, order, gap, reverse);
preproc=SortWordString();
preproc.init(feats);
feats.add_preproc(preproc);
feats.apply_preproc();
hmm=LinearHMM(feats);
hmm.train();
hmm.get_transition_probs();
num_examples=feats.get_num_vectors();
num_param=hmm.get_num_model_parameters();
for i=0:(num_examples-1),
for j=0:(num_param-1),
hmm.get_log_derivative(j, i);
end
end
hmm.get_log_likelihood();
hmm.get_log_likelihood_sample();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
% auc
disp('AUC')
feats_train=RealFeatures(fm_train_real);
width=1.7;
subkernel=GaussianKernel(feats_train, feats_train, width);
kernel=AUCKernel(0, subkernel);
kernel.setup_auc_maximization( Labels(label_train_twoclass) );
km_train=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% chi2
disp('Chi2')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
width=1.4;
size_cache=10;
kernel=Chi2Kernel(feats_train, feats_train, width, size_cache);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% combined
disp('Combined')
kernel=CombinedKernel();
feats_train=CombinedFeatures();
feats_test=CombinedFeatures();
subkfeats_train=RealFeatures(fm_train_real);
subkfeats_test=RealFeatures(fm_test_real);
subkernel=GaussianKernel(10, 1.2);
feats_train.append_feature_obj(subkfeats_train);
feats_test.append_feature_obj(subkfeats_test);
kernel.append_kernel(subkernel);
subkfeats_train=StringCharFeatures(DNA);
subkfeats_train.set_features(fm_train_dna);
subkfeats_test=StringCharFeatures(DNA);
subkfeats_test.set_features(fm_test_dna);
degree=3;
subkernel=FixedDegreeStringKernel(10, degree);
feats_train.append_feature_obj(subkfeats_train);
feats_test.append_feature_obj(subkfeats_test);
kernel.append_kernel(subkernel);
subkfeats_train=StringCharFeatures(DNA);
subkfeats_train.set_features(fm_train_dna);
subkfeats_test=StringCharFeatures(DNA);
subkfeats_test.set_features(fm_test_dna);
subkernel=LocalAlignmentStringKernel(10);
feats_train.append_feature_obj(subkfeats_train);
feats_test.append_feature_obj(subkfeats_test);
kernel.append_kernel(subkernel);
kernel.init(feats_train, feats_train);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% comm_ulong_string
disp('CommUlongString')
order=3;
gap=0;
reverse=false;
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
feats_train=StringUlongFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
preproc=SortUlongString();
preproc.init(feats_train);
feats_train.add_preproc(preproc);
feats_train.apply_preproc();
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_test_dna);
feats_test=StringUlongFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse);
feats_test.add_preproc(preproc);
feats_test.apply_preproc();
use_sign=false;
kernel=CommUlongStringKernel(feats_train, feats_train, use_sign);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% comm_word_string
disp('CommWordString')
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
feats_train=StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
preproc=SortWordString();
preproc.init(feats_train);
feats_train.add_preproc(preproc);
feats_train.apply_preproc();
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_test_dna);
feats_test=StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse);
feats_test.add_preproc(preproc);
feats_test.apply_preproc();
use_sign=false;
kernel=CommWordStringKernel(feats_train, feats_train, use_sign);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% const
disp('Const')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
c=23.;
kernel=ConstKernel(feats_train, feats_train, c);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun;
C=1;
dim=7;
lab=sign(2*rand(1,dim) - 1);
data=rand(dim, dim);
symdata=data+data';
% custom
disp('Custom')
dim=7
data=rand(dim, dim);
symdata=data+data';
%lowertriangle=array([symdata[(x,y)] for x in xrange(symdata.shape[1]);
% for y in xrange(symdata.shape[0]) if y<=x]);
%
kernel=CustomKernel();
%kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle);
%km_triangletriangle=kernel.get_kernel_matrix();
%
kernel.set_triangle_kernel_matrix_from_full(symdata);
km_fulltriangle=kernel.get_kernel_matrix();
%
kernel.set_full_kernel_matrix_from_full(data);
km_fullfull=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% diag
disp('Diag')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
diag=23.;
kernel=DiagKernel(feats_train, feats_train, diag);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
% distance
disp('Distance')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
width=1.7;
distance=EuclidianDistance();
kernel=DistanceKernel(feats_train, feats_test, width, distance);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% fixed_degree_string
disp('FixedDegreeString')
feats_train=StringCharFeatures(DNA);
feats_train.set_features(fm_train_dna);
feats_test=StringCharFeatures(DNA);
feats_test.set_features(fm_test_dna);
degree=3;
kernel=FixedDegreeStringKernel(feats_train, feats_train, degree);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% gaussian
disp('Gaussian')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
width=1.9;
kernel=GaussianKernel(feats_train, feats_train, width);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% gaussian_shift
disp('GaussianShift')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
width=1.8;
max_shift=2;
shift_step=1;
kernel=GaussianShiftKernel(
feats_train, feats_train, width, max_shift, shift_step);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
label_train_dna=load_matrix('../data/label_train_dna.dat');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% plugin_estimate
disp('PluginEstimate w/ HistogramWord')
order=3;
gap=0;
reverse=false;
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
feats_train=StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_test_dna);
feats_test=StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse);
pie=PluginEstimate();
labels=Labels(label_train_dna);
pie.set_labels(labels);
pie.set_features(feats_train);
pie.train();
kernel=HistogramWordStringKernel(feats_train, feats_train, pie);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
pie.set_features(feats_test);
pie.classify().get_labels();
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_byte=uint8(load_matrix('../data/fm_train_byte.dat'));
fm_test_byte=uint8(load_matrix('../data/fm_test_byte.dat'));
% linear byte
disp('LinearByte')
feats_train=ByteFeatures(RAWBYTE);
feats_train.copy_feature_matrix(fm_train_byte);
feats_test=ByteFeatures(RAWBYTE);
feats_test.copy_feature_matrix(fm_test_byte);
kernel=LinearByteKernel(feats_train, feats_train);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% linear
disp('Linear')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
scale=1.2;
kernel=LinearKernel();
kernel.set_normalizer(AvgDiagKernelNormalizer(scale));
kernel.init(feats_train, feats_train);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% linear_string
disp('LinearString')
feats_train=StringCharFeatures(DNA);
feats_train.set_features(fm_train_dna);
feats_test=StringCharFeatures(DNA);
feats_test.set_features(fm_test_dna);
kernel=LinearStringKernel(feats_train, feats_train);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_word=uint16(load_matrix('../data/fm_train_word.dat'));
fm_test_word=uint16(load_matrix('../data/fm_test_word.dat'));
% linear_word
disp('LinearWord')
feats_train=WordFeatures(fm_train_word);
feats_test=WordFeatures(fm_test_word);
do_rescale=true;
scale=1.4;
kernel=LinearWordKernel();
kernel.set_normalizer(AvgDiagKernelNormalizer(scale));
kernel.init(feats_train, feats_train);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% local_alignment_strin
disp('LocalAlignmentString')
feats_train=StringCharFeatures(DNA);
feats_train.set_features(fm_train_dna);
feats_test=StringCharFeatures(DNA);
feats_test.set_features(fm_test_dna);
kernel=LocalAlignmentStringKernel(feats_train, feats_train);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% locality_improved_string
disp('LocalityImprovedString')
feats_train=StringCharFeatures(DNA);
feats_train.set_features(fm_train_dna);
feats_test=StringCharFeatures(DNA);
feats_test.set_features(fm_test_dna);
l=5;
inner_degree=5;
outer_degree=7;
kernel=LocalityImprovedStringKernel(
feats_train, feats_train, l, inner_degree, outer_degree);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% match_word_string
disp('MatchWordString')
degree=3;
scale=1.4;
size_cache=10;
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
feats_train=StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
preproc=SortWordString();
preproc.init(feats_train);
feats_train.add_preproc(preproc);
feats_train.apply_preproc();
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_test_dna);
feats_test=StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse);
feats_test.add_preproc(preproc);
feats_test.apply_preproc();
kernel=MatchWordStringKernel(size_cache, degree);
kernel.set_normalizer(AvgDiagKernelNormalizer(scale));
kernel.init(feats_train, feats_train);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% oligo_string
disp('OligoString')
feats_train=StringCharFeatures(DNA);
feats_train.set_features(fm_train_dna);
feats_test=StringCharFeatures(DNA);
feats_test.set_features(fm_test_dna);
k=3;
width=1.2;
size_cache=10;
kernel=OligoStringKernel(size_cache, k, width);
kernel.init(feats_train, feats_train);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
order=3;
gap=0;
reverse=false;
% poly_match_word_string
disp('PolyMatchWordString')
degree=2;
inhomogene=true;
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
feats_train=StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
preproc=SortWordString();
preproc.init(feats_train);
feats_train.add_preproc(preproc);
feats_train.apply_preproc();
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_test_dna);
feats_test=StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse);
feats_test.add_preproc(preproc);
feats_test.apply_preproc();
kernel=PolyMatchWordStringKernel(feats_train, feats_train, degree, inhomogene);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% poly
disp('Poly')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
degree=4;
inhomogene=false;
use_normalization=true;
kernel=PolyKernel(
feats_train, feats_train, degree, inhomogene, use_normalization);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% poly_match_string
disp('PolyMatchString')
feats_train=StringCharFeatures(DNA);
feats_train.set_features(fm_train_dna);
feats_test=StringCharFeatures(DNA);
feats_test.set_features(fm_test_dna);
degree=3;
inhomogene=false;
kernel=PolyMatchStringKernel(feats_train, feats_train, degree, inhomogene);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% sigmoid
disp('Sigmoid')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
size_cache=10;
gamma=1.2;
coef0=1.3;
kernel=SigmoidKernel(feats_train, feats_train, size_cache, gamma, coef0);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% simple_locality_improved_string
disp('SimpleLocalityImprovedString')
feats_train=StringCharFeatures(DNA);
feats_train.set_features(fm_train_dna);
feats_test=StringCharFeatures(DNA);
feats_test.set_features(fm_test_dna);
l=5;
inner_degree=5;
outer_degree=7;
kernel=SimpleLocalityImprovedStringKernel(
feats_train, feats_train, l, inner_degree, outer_degree);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% sparse_gaussian - b0rked?
disp('SparseGaussian')
feat=RealFeatures(fm_train_real);
feats_train=SparseRealFeatures();
feats_train.obtain_from_simple(feat);
feat=RealFeatures(fm_test_real);
feats_test=SparseRealFeatures();
feats_test.obtain_from_simple(feat);
width=1.1;
kernel=SparseGaussianKernel(feats_train, feats_train, width);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% sparse_linear
disp('SparseLinear')
feat=RealFeatures(fm_train_real);
feats_train=SparseRealFeatures();
feats_train.obtain_from_simple(feat);
feat=RealFeatures(fm_test_real);
feats_test=SparseRealFeatures();
feats_test.obtain_from_simple(feat);
scale=1.1;
kernel=SparseLinearKernel();
kernel.set_normalizer(AvgDiagKernelNormalizer(scale));
kernel.init(feats_train, feats_train);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% sparse_poly
disp('SparsePoly')
feat=RealFeatures(fm_train_real);
feats_train=SparseRealFeatures();
feats_train.obtain_from_simple(feat);
feat=RealFeatures(fm_test_real);
feats_test=SparseRealFeatures();
feats_test.obtain_from_simple(feat);
size_cache=10;
degree=3;
inhomogene=true;
kernel=SparsePolyKernel(feats_train, feats_train, size_cache, degree,
inhomogene);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
leng=28;
rep=5;
weight=0.3;
% generate a sequence with characters 1-6 drawn from 3 loaded cubes
for i = 1:3,
a{i}= [ ones(1,ceil(leng*rand)) 2*ones(1,ceil(leng*rand)) 3*ones(1,ceil(leng*rand)) 4*ones(1,ceil(leng*rand)) 5*ones(1,ceil(leng*rand)) 6*ones(1,ceil(leng*rand)) ];
a{i}= a{i}(randperm(length(a{i})));
end
s=[];
for i = 1:size(a,2),
s= [ s i*ones(1,ceil(rep*rand)) ];
end
s=s(randperm(length(s)));
cubesequence={''};
for i = 1:length(s),
f(i)=ceil(((1-weight)*rand+weight)*length(a{s(i)}));
t=randperm(length(a{s(i)}));
r=a{s(i)}(t(1:f(i)));
cubesequence{1}=[cubesequence{1} char(r+'0')];
end
% top_fisher
disp('TOP/Fisher on PolyKernel')
N=3;
M=6;
pseudo=1e-1;
order=1;
gap=0;
reverse=false;
charfeat=StringCharFeatures(CUBE);
charfeat.set_features(cubesequence);
wordfeats_train=StringWordFeatures(charfeat.get_alphabet());
wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
preproc=SortWordString();
preproc.init(wordfeats_train);
wordfeats_train.add_preproc(preproc);
wordfeats_train.apply_preproc();
charfeat=StringCharFeatures(CUBE);
charfeat.set_features(cubesequence);
wordfeats_test=StringWordFeatures(charfeat.get_alphabet());
wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse);
wordfeats_test.add_preproc(preproc);
wordfeats_test.apply_preproc();
% cheating, BW_NORMAL is somehow not available
BW_NORMAL=0;
pos=HMM(wordfeats_train, N, M, pseudo);
pos.train();
pos.baum_welch_viterbi_train(BW_NORMAL);
neg=HMM(wordfeats_train, N, M, pseudo);
neg.train();
neg.baum_welch_viterbi_train(BW_NORMAL);
pos_clone=HMM(pos);
neg_clone=HMM(neg);
pos_clone.set_observations(wordfeats_test);
neg_clone.set_observations(wordfeats_test);
feats_train=TOPFeatures(10, pos, neg, false, false);
feats_test=TOPFeatures(10, pos_clone, neg_clone, false, false);
kernel=PolyKernel(feats_train, feats_train, 1, false, true);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
feats_train=FKFeatures(10, pos, neg);
feats_train.set_opt_a(-1); %estimate prior
feats_test=FKFeatures(10, pos_clone, neg_clone);
feats_test.set_a(feats_train.get_a()); %use prior from training data
kernel=PolyKernel(feats_train, feats_train, 1, false, true);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% weighted_comm_word_string
disp('WeightedCommWordString')
order=3;
gap=0;
reverse=true;
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
feats_train=StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
preproc=SortWordString();
preproc.init(feats_train);
feats_train.add_preproc(preproc);
feats_train.apply_preproc();
charfeat=StringCharFeatures(DNA);
charfeat.set_features(fm_test_dna);
feats_test=StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse);
feats_test.add_preproc(preproc);
feats_test.apply_preproc();
use_sign=false;
kernel=WeightedCommWordStringKernel(feats_train, feats_train, use_sign);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% weighted_degree_position_string
disp('WeightedDegreePositionString')
feats_train=StringCharFeatures(DNA);
feats_train.set_features(fm_train_dna);
feats_test=StringCharFeatures(DNA);
feats_test.set_features(fm_test_dna);
degree=20;
kernel=WeightedDegreePositionStringKernel(feats_train, feats_train, degree);
%kernel.set_shifts(zeros(len(fm_train_dna[0]), dtype=int));
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% weighted_degree_string
disp('WeightedDegreeString')
feats_train=StringCharFeatures(DNA);
feats_train.set_features(fm_train_dna);
feats_test=StringCharFeatures(DNA);
feats_test.set_features(fm_test_dna);
degree=20;
kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree);
%weights=arange(1,degree+1,dtype=double)[::-1]/ \
% sum(arange(1,degree+1,dtype=double));
%kernel.set_wd_weights(weights);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
label_train_multiclass=load_matrix('../data/label_train_multiclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
disp('Octave_modular')
% combined
disp('Combined')
kernel=CombinedKernel();
feats_train=CombinedFeatures();
feats_test=CombinedFeatures();
subkfeats_train=RealFeatures(fm_train_real);
subkfeats_test=RealFeatures(fm_test_real);
subkernel=GaussianKernel(10, 1.2);
feats_train.append_feature_obj(subkfeats_train);
feats_test.append_feature_obj(subkfeats_test);
kernel.append_kernel(subkernel);
subkfeats_train=RealFeatures(fm_train_real);
subkfeats_test=RealFeatures(fm_test_real);
subkernel=LinearKernel();
feats_train.append_feature_obj(subkfeats_train);
feats_test.append_feature_obj(subkfeats_test);
kernel.append_kernel(subkernel);
subkfeats_train=RealFeatures(fm_train_real);
subkfeats_test=RealFeatures(fm_test_real);
subkernel=PolyKernel(10,2);
feats_train.append_feature_obj(subkfeats_train);
feats_test.append_feature_obj(subkfeats_test);
kernel.append_kernel(subkernel);
kernel.init(feats_train, feats_train);
C=1.2;
epsilon=1e-5;
num_threads=1;
labels=Labels(label_train_multiclass);
% MKL_MULTICLASS
disp('MKL_MULTICLASS')
mkl=MKLMultiClass(C, kernel, labels);
mkl.set_epsilon(epsilon);
mkl.parallel.set_num_threads(num_threads);
mkl.set_mkl_epsilon(0.001);
mkl.set_mkl_norm(1.5);
mkl.train();
kernel.init(feats_train, feats_test);
result=mkl.classify().get_labels();
result
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
%LogPlusOne
disp('LogPlusOne')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
preproc=LogPlusOne();
preproc.init(feats_train);
feats_train.add_preproc(preproc);
feats_train.apply_preproc();
feats_test.add_preproc(preproc);
feats_test.apply_preproc();
width=1.4;
size_cache=10;
kernel=Chi2Kernel(feats_train, feats_train, width, size_cache);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
%NormOne
disp('NormOne')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
preproc=NormOne();
preproc.init(feats_train);
feats_train.add_preproc(preproc);
feats_train.apply_preproc();
feats_test.add_preproc(preproc);
feats_test.apply_preproc();
width=1.4;
size_cache=10;
kernel=Chi2Kernel(feats_train, feats_train, width, size_cache);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
%PruneVarSubMean
disp('PruneVarSubMean')
feats_train=RealFeatures(fm_train_real);
feats_test=RealFeatures(fm_test_real);
preproc=PruneVarSubMean();
preproc.init(feats_train);
feats_train.add_preproc(preproc);
feats_train.apply_preproc();
feats_test.add_preproc(preproc);
feats_test.apply_preproc();
width=1.4;
size_cache=10;
kernel=Chi2Kernel(feats_train, feats_train, width, size_cache);
km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
km_test=kernel.get_kernel_matrix();
init_shogun
addpath('tools');
label_train=load_matrix('../data/label_train_twoclass.dat');
fm_train=load_matrix('../data/fm_train_real.dat');
fm_test=load_matrix('../data/fm_test_real.dat');
% kernel ridge regression
disp('KRR')
feats_train=RealFeatures(fm_train);
feats_test=RealFeatures(fm_test);
width=0.8;
kernel=GaussianKernel(feats_train, feats_train, width);
C=0.9;
tau=1e-6;
num_threads=1;
labels=Labels(label_train);
krr=KRR(tau, kernel, labels);
krr.parallel.set_num_threads(num_threads);
krr.train();
kernel.init(feats_train, feats_test);
out=krr.classify().get_labels();
init_shogun
addpath('tools');
label_train=load_matrix('../data/label_train_twoclass.dat');
fm_train=load_matrix('../data/fm_train_real.dat');
fm_test=load_matrix('../data/fm_test_real.dat');
%% libsvm based support vector regression
disp('LibSVR')
feats_train=RealFeatures(fm_train);
feats_test=RealFeatures(fm_test);
width=2.1;
kernel=GaussianKernel(feats_train, feats_train, width);
C=1.2;
epsilon=1e-5;
tube_epsilon=1e-2;
num_threads=3;
labels=Labels(label_train);
svr=LibSVR(C, epsilon, kernel, labels);
svr.set_tube_epsilon(tube_epsilon);
svr.parallel.set_num_threads(num_threads);
svr.train();
kernel.init(feats_train, feats_test);
out=svr.classify().get_labels();
init_shogun
addpath('tools');
label_train=load_matrix('../data/label_train_twoclass.dat');
fm_train=load_matrix('../data/fm_train_real.dat');
fm_test=load_matrix('../data/fm_test_real.dat');
% libsvm based support vector regression
if exist('SVRLight')
disp('SVRLight')
feats_train=RealFeatures(fm_train);
feats_test=RealFeatures(fm_test);
width=2.1;
kernel=GaussianKernel(feats_train, feats_train, width);
C=1.2;
epsilon=1e-5;
tube_epsilon=1e-2;
num_threads=3;
labels=Labels(label_train);
svr=SVRLight(C, epsilon, kernel, labels);
svr.set_tube_epsilon(tube_epsilon);
svr.parallel.set_num_threads(num_threads);
svr.train();
kernel.init(feats_train, feats_test);
svr.classify().get_labels();
else
disp('No support for SVRLight available.')
end