本页面包含了所有R静态接口的例子。
要运行这些例子只需要
R -f name_of_example.R
或者启动R并输入
source('name_of_example.R')
library("sg")
size_cache <- 10
C <- 10
epsilon <- 1e-5
use_bias <- TRUE
width <- 2.1
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_multiclass <- as.real(as.matrix(read.table('../data/label_train_multiclass.dat')))
# GMNPSVM
print('GMNPSVM')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dump <- sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
dump <- sg('set_labels', 'TRAIN', label_train_multiclass)
dump <- sg('new_classifier', 'GMNPSVM')
dump <- sg('svm_epsilon', epsilon)
dump <- sg('c', C)
dump <- sg('svm_use_bias', use_bias)
dump <- sg('train_classifier')
dump <- sg('set_features', 'TEST', fm_test_real)
result <- sg('classify')
library("sg")
size_cache <- 10
C <- 10
epsilon <- 1e-5
use_bias <- TRUE
width <- 2.1
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(as.matrix(read.table('../data/label_train_twoclass.dat')))
# GPBTSVM
print('GPBTSVM')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dump <- sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
dump <- sg('set_labels', 'TRAIN', label_train_twoclass)
dump <- sg('new_classifier', 'GPBTSVM')
dump <- sg('svm_epsilon', epsilon)
dump <- sg('c', C)
dump <- sg('svm_use_bias', use_bias)
dump <- sg('train_classifier')
dump <- sg('set_features', 'TEST', fm_test_real)
result <- sg('classify')
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_multiclass <- as.real(as.matrix(read.table('../data/label_train_multiclass.dat')))
# KNN
print('KNN')
k <- 3
dump <- sg('set_features', 'TRAIN', fm_train_real)
dump <- sg('set_labels', 'TRAIN', label_train_multiclass)
dump <- sg('set_distance', 'EUCLIDIAN', 'REAL')
dump <- sg('new_classifier', 'KNN')
dump <- sg('train_classifier', k)
dump <- sg('set_features', 'TEST', fm_test_real)
result <- sg('classify')
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(as.matrix(read.table('../data/label_train_twoclass.dat')))
# LDA
print('LDA')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dump <- sg('set_labels', 'TRAIN', label_train_twoclass)
dump <- sg('new_classifier', 'LDA')
dump <- sg('train_classifier')
dump <- sg('set_features', 'TEST', fm_test_real)
result <- sg('classify')
library("sg")
size_cache <- 10
C <- 10
epsilon <- 1e-5
use_bias <- TRUE
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(as.matrix(read.table('../data/label_train_twoclass.dat')))
# LibSVM
print('LibSVM')
width <- 2.1
dump <- sg('set_features', 'TRAIN', fm_train_real)
dump <- sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
dump <- sg('set_labels', 'TRAIN', label_train_twoclass)
dump <- sg('new_classifier', 'LIBSVM')
dump <- sg('svm_epsilon', epsilon)
dump <- sg('c', C)
dump <- sg('svm_use_bias', use_bias)
dump <- sg('train_classifier')
dump <- sg('set_features', 'TEST', fm_test_real)
result <- sg('classify')
library("sg")
size_cache <- 10
C <- 10
epsilon <- 1e-5
use_bias <- TRUE
width <- 2.1
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_multiclass <- as.real(as.matrix(read.table('../data/label_train_multiclass.dat')))
# LibSVM MultiClass
print('LibSVMMultiClass')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dump <- sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
dump <- sg('set_labels', 'TRAIN', label_train_multiclass)
dump <- sg('new_classifier', 'LIBSVM_MULTICLASS')
dump <- sg('svm_epsilon', epsilon)
dump <- sg('c', C)
dump <- sg('svm_use_bias', use_bias)
dump <- sg('train_classifier')
dump <- sg('set_features', 'TEST', fm_test_real)
result <- sg('classify')
library("sg")
size_cache <- 10
svm_nu <- 0.1
epsilon <- 1e-5
use_bias <- TRUE
width <- 2.1
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# LibSVMOneClass
print('LibSVMOneClass')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dump <- sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
dump <- sg('new_classifier', 'LIBSVM_ONECLASS')
dump <- sg('svm_epsilon', epsilon)
dump <- sg('svm_nu', svm_nu)
dump <- sg('svm_use_bias', use_bias)
dump <- sg('train_classifier')
dump <- sg('set_features', 'TEST', fm_test_real)
result <- sg('classify')
library("sg")
size_cache <- 10
C <- 10
epsilon <- 1e-5
use_bias <- TRUE
width <- 2.1
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(as.matrix(read.table('../data/label_train_twoclass.dat')))
# MPDSVM
print('MPDSVM')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dump <- sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
dump <- sg('set_labels', 'TRAIN', label_train_twoclass)
dump <- sg('new_classifier', 'MPDSVM')
dump <- sg('svm_epsilon', epsilon)
dump <- sg('c', C)
dump <- sg('svm_use_bias', use_bias)
dump <- sg('train_classifier')
dump <- sg('set_features', 'TEST', fm_test_real)
result <- sg('classify')
library("sg")
size_cache <- 10
C <- 10
epsilon <- 1e-5
use_bias <- TRUE
fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat'))
fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat'))
label_train_twoclass <- as.real(as.matrix(read.table('../data/label_train_twoclass.dat')))
# Perceptron
print('Perceptron')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dump <- sg('set_labels', 'TRAIN', label_train_twoclass)
dump <- sg('new_classifier', 'PERCEPTRON')
# often does not converge
#dump <- sg('train_classifier')
#dump <- sg('set_features', 'TEST', fm_test_real)
#result <- sg('classify')
library("sg")
size_cache <- 10
C <- 10
epsilon <- 1e-5
use_bias <- TRUE
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
label_train_dna <- as.real(as.matrix(read.table('../data/label_train_dna.dat')))
degree <- 20
# SVM Light
dosvmlight <- function()
{
print('SVMLight')
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
dump <- sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', size_cache, degree)
dump <- sg('set_labels', 'TRAIN', label_train_dna)
dump <- sg('new_classifier', 'SVMLIGHT')
dump <- sg('svm_epsilon', epsilon)
dump <- sg('c', C)
dump <- sg('svm_use_bias', use_bias)
dump <- sg('train_classifier')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
result <- sg('classify')
}
try(dosvmlight())
library("sg")
fm_train <- t(as.matrix(read.table('../data/fm_train_real.dat')))
# Hierarchical
print('Hierarchical')
merges=3
dump <- sg('set_features', 'TRAIN', fm_train)
dump <- sg('set_distance', 'EUCLIDIAN', 'REAL')
dump <- sg('new_clustering', 'HIERARCHICAL')
dump <- sg('train_clustering', merges)
result <- sg('get_clustering')
merge_distances <- result[[1]]
pairs <- result[[2]]
library("sg")
fm_train <- as.matrix(read.table('../data/fm_train_real.dat'))
# KMEANS
print('KMeans')
k <- 3
iter <- 1000
dump <- sg('set_distance', 'EUCLIDIAN', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train)
dump <- sg('new_clustering', 'KMEANS')
dump <- sg('train_clustering', k, iter)
result <- sg('get_clustering')
radi <- result[[1]]
centers <- result[[2]]
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# BrayCurtis Distance
print('BrayCurtisDistance')
dump <- sg('set_distance', 'BRAYCURTIS', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Canberra Metric
print('CanberraMetric')
dump <- sg('set_distance', 'CANBERRA', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')
library("sg")
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
order <- 3
gap <- 0
reverse <- 'n'
# Canberra Word Distance
print('CanberraWordDistance')
dump <- sg('set_distance', 'CANBERRA', 'WORD')
dump <- sg('add_preproc', 'SORTWORDSTRING')
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
dump <- sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TRAIN')
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
dump <- sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TEST')
dm <- sg('get_distance_matrix', 'TEST')
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Chebyshew Metric
print('ChebyshewMetric')
dump <- sg('set_distance', 'CHEBYSHEW', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# ChiSquare Distance
print('ChiSquareDistance')
dump <- sg('set_distance', 'CHISQUARE', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Cosine Distance
print('CosineDistance')
dump <- sg('set_distance', 'COSINE', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Euclidian Distance
print('EuclidianDistance')
dump <- sg('set_distance', 'EUCLIDIAN', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Geodesic Metric
print('GeodesicMetric')
dump <- sg('set_distance', 'GEODESIC', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')
library("sg")
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
order <- 3
gap <- 0
reverse <- 'n'
# Hamming Word Distance
print('HammingWordDistance')
dump <- sg('set_distance', 'HAMMING', 'WORD')
dump <- sg('add_preproc', 'SORTWORDSTRING')
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
dump <- sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TRAIN')
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
dump <- sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TEST')
dm <- sg('get_distance_matrix', 'TEST')
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Jensen Metric
print('JensenMetric')
dump <- sg('set_distance', 'JENSEN', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Manhattan Metric
print('ManhattanMetric')
dump <- sg('set_distance', 'MANHATTAN', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')
library("sg")
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
order <- 3
gap <- 0
reverse <- 'n'
# Manhattan Word Distance
print('ManhattanWordDistance')
dump <- sg('set_distance', 'MANHATTAN', 'WORD')
dump <- sg('add_preproc', 'SORTWORDSTRING')
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
dump <- sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TRAIN')
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
dump <- sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TEST')
dm <- sg('get_distance_matrix', 'TEST')
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Minkowski Metric
print('MinkowskiMetric')
k <- 3
dump <- sg('set_distance', 'MINKOWSKI', 'REAL', k)
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')
library("sg")
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Tanimoto Distance
print('TanimotoDistance')
dump <- sg('set_distance', 'TANIMOTO', 'REAL')
dump <- sg('set_features', 'TRAIN', fm_train_real)
dm <- sg('get_distance_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dm <- sg('get_distance_matrix', 'TEST')
library("sg")
order <- 3
gap <- 0
reverse <- 'n'
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_train_cube <- as.matrix(read.table('../data/fm_train_cube.dat', colClasses=c('character')))
#
# distributions
#
# Histogram
print('Histogram')
# sg('new_distribution', 'HISTOGRAM')
dump <- sg('add_preproc', 'SORTWORDSTRING')
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
dump <- sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TRAIN')
# sg('train_distribution')
# histo=sg('get_histogram')
# num_examples=11
# num_param=sg('get_histogram_num_model_parameters')
# for i in xrange(num_examples):
# for j in xrange(num_param):
# sg('get_log_derivative %d %d' % (j, i))
# sg('get_log_likelihood')
# sg('get_log_likelihood_sample')
library("sg")
order <- 3
gap <- 0
reverse <- 'n'
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_train_cube <- as.matrix(read.table('../data/fm_train_cube.dat', colClasses=c('character')))
# HMM
print('HMM')
N <- 3
M <- 6
order <- 1
hmms <- c()
liks <- c()
dump <- sg('set_features', 'TRAIN', fm_train_cube, 'CUBE')
dump <- sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order)
dump <- sg('new_hmm', N, M)
dump <- sg('bw')
hmm <- sg('get_hmm')
dump <- sg('new_hmm', N, M)
dump <- sg('set_hmm', hmm[[1]], hmm[[2]], hmm[[3]], hmm[[4]])
likelihood <- sg('hmm_likelihood')
library("sg")
order <- 3
gap <- 0
reverse <- 'n'
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_train_cube <- as.matrix(read.table('../data/fm_train_cube.dat', colClasses=c('character')))
# Linear HMM
print('LinearHMM')
# sg('new_distribution', 'LinearHMM')
dump <- sg('add_preproc', 'SORTWORDSTRING')
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
dump <- sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TRAIN')
# sg('train_distribution')
# histo=sg('get_histogram')
# num_examples=11
# num_param=sg('get_histogram_num_model_parameters')
# for i in xrange(num_examples):
# for j in xrange(num_param):
# sg('get_log_derivative %d %d' % (j, i))
# sg('get_log_likelihood')
# sg('get_log_likelihood_sample')
library("sg")
size_cache <- 10
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# CHI2
print('Chi2')
width <- 1.4
dump <- sg('set_kernel', 'CHI2', 'REAL', size_cache, width)
dump <- sg('set_features', 'TRAIN', fm_train_real)
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Combined
print('Combined')
dump <- sg('clean_features', 'TRAIN')
dump <- sg('clean_features', 'TEST')
dump <- sg('set_kernel', 'COMBINED', size_cache)
dump <- sg('add_kernel', 1, 'LINEAR', 'REAL', size_cache)
dump <- sg('add_features', 'TRAIN', fm_train_real)
dump <- sg('add_features', 'TEST', fm_test_real)
dump <- sg('add_kernel', 1, 'GAUSSIAN', 'REAL', size_cache, 1)
dump <- sg('add_features', 'TRAIN', fm_train_real)
dump <- sg('add_features', 'TEST', fm_test_real)
dump <- sg('add_kernel', 1, 'POLY', 'REAL', size_cache, 3, FALSE)
dump <- sg('add_features', 'TRAIN', fm_train_real)
dump <- sg('add_features', 'TEST', fm_test_real)
km <- sg('get_kernel_matrix', 'TRAIN')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
order <- 3
gap <- 0
reverse <- 'n'
use_sign <- FALSE
normalization <- 'FULL'
# Comm Ulong String
print('CommUlongString')
dump <- sg('add_preproc', 'SORTULONGSTRING')
dump <- sg('set_kernel', 'COMMSTRING', 'ULONG', size_cache, use_sign, normalization)
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
dump <- sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TRAIN')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
dump <- sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TEST')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
order <- 3
gap <- 0
reverse <- 'n'
use_sign <- FALSE
normalization <- 'FULL'
# Comm Word String
print('CommWordString')
dump <- sg('add_preproc', 'SORTWORDSTRING')
dump <- sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization)
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
dump <- sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TRAIN')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
dump <- sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TEST')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat'))
fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat'))
# Const
print('Const')
c <- 23.
dump <- sg('set_kernel', 'CONST', 'REAL', size_cache, c)
dump <- sg('set_features', 'TRAIN', fm_train_real)
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat'))
fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat'))
# Diag
print('Diag')
diag=23.
dump <- sg('set_kernel', 'DIAG', 'REAL', size_cache, diag)
dump <- sg('set_features', 'TRAIN', fm_train_real)
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Distance
print('Distance')
width=1.7
dump <- sg('set_distance', 'EUCLIDIAN', 'REAL')
dump <- sg('set_kernel', 'DISTANCE', size_cache, width)
dump <- sg('set_features', 'TRAIN', fm_train_real)
km=sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
km=sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# Fixed Degree String
print('FixedDegreeString')
degree <- 3
dump <- sg('set_kernel', 'FIXEDDEGREE', 'CHAR', size_cache, degree)
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Gaussian
print('Gaussian')
width <- 1.9
dump <- sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
dump <- sg('set_features', 'TRAIN', fm_train_real)
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# GaussianShift
print('GaussianShift')
width <- 1.8
max_shift <- 2
shift_step <- 1
dump <- sg('set_kernel', 'GAUSSIANSHIFT', 'REAL', size_cache, width, max_shift, shift_step)
dump <- sg('set_features', 'TRAIN', fm_train_real)
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
order <- 3
gap <- 0
reverse <- 'n'
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
label_train_dna <- as.real(as.matrix(read.table('../data/label_train_dna.dat')))
# PluginEstimate
print('PluginEstimate w/ HistogramWord')
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
dump <- sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
dump <- sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
pseudo_pos <- 1e-1
pseudo_neg <- 1e-1
dump <- sg('new_plugin_estimator', pseudo_pos, pseudo_neg)
dump <- sg('set_labels', 'TRAIN', label_train_dna)
dump <- sg('train_estimator')
dump <- sg('set_kernel', 'HISTOGRAM', 'WORD', size_cache)
km <- sg('get_kernel_matrix', 'TRAIN')
# not supported yet
# lab=sg('plugin_estimate_classify')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Linear
print('Linear')
scale <- 1.2
dump <- sg('set_kernel', 'LINEAR', 'REAL', size_cache, scale)
dump <- sg('set_features', 'TRAIN', fm_train_real)
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# Linear String
print('LinearString')
dump <- sg('set_kernel', 'LINEAR', 'CHAR', size_cache)
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# Local Alignment String
print('LocalAlignmentString')
dump <- sg('set_kernel', 'LOCALALIGNMENT', 'CHAR', size_cache)
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# Locality Improved String
print('LocalityImprovedString')
length <- 5
inner_degree <- 5
outer_degree <- inner_degree+2
dump <- sg('set_kernel', 'LIK', 'CHAR', size_cache, length, inner_degree, outer_degree)
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# Oligo String
print('OligoString')
k <- 3
width <- 1.2
dump <- sg('set_kernel', 'OLIGO', 'CHAR', size_cache, k, width)
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Poly
print('Poly')
degree <- 4
inhomogene <- FALSE
use_normalization <- TRUE
dump <- sg('set_kernel', 'POLY', 'REAL', size_cache, degree, inhomogene, use_normalization)
dump <- sg('set_features', 'TRAIN', fm_train_real)
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# Poly Match String
print('PolyMatchString')
degree <- 3
inhomogene <- FALSE
dump <- sg('set_kernel', 'POLYMATCH', 'CHAR', size_cache, degree, inhomogene)
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# Sigmoid
print('Sigmoid')
gamma <- 1.2
coef0 <- 1.3
dump <- sg('set_kernel', 'SIGMOID', 'REAL', size_cache, gamma, coef0)
dump <- sg('set_features', 'TRAIN', fm_train_real)
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# Simple Locality Improved String
print('SimpleLocalityImprovedString')
length <- 5
inner_degree <- 5
outer_degree <- inner_degree+2
dump <- sg('set_kernel', 'SLIK', 'CHAR', size_cache, length, inner_degree, outer_degree)
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TEST')
library(sg)
traindat = c("AGTAA", "CGCCC", "GGCGG", "TGTCT")
trainlab <- c(1,-1,-1,1)
testdat = c("AGCAA", "CCCCC", "GGGGG", "TGCTT")
order = 2
C = 1.0
sg('loglevel', 'ALL')
sg('use_linadd', TRUE)
sg('mkl_parameters', 1e-5, 0)
sg('svm_epsilon', 1e-4)
sg('clean_features', 'TRAIN')
sg('clean_kernel')
sg('set_features', 'TRAIN', traindat, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1)
sg('add_preproc', 'SORTWORDSTRING')
sg('attach_preproc', 'TRAIN')
sg('set_labels', 'TRAIN', trainlab)
sg('new_classifier', 'SVMLIGHT')
sg('set_kernel', 'COMMSTRING', 'WORD', 10, TRUE, 'FULL')
sg('c', C)
km=sg('get_kernel_matrix', 'TRAIN')
sg('train_classifier')
svmAsList=sg('get_svm')
sg('set_features', 'TEST', testdat, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1)
sg('attach_preproc', 'TEST')
sg('init_kernel_optimization')
valout=sg('classify')
library("sg")
size_cache <- 10
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
order <- 3
gap <- 0
reverse <- 'n'
use_sign <- FALSE
normalization <- 'FULL'
# Weighted Comm Word String
print('WeightedCommWordString')
dump <- sg('add_preproc', 'SORTWORDSTRING')
dump <- sg('set_kernel', 'WEIGHTEDCOMMSTRING', 'WORD', size_cache, use_sign, normalization)
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
dump <- sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TRAIN')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
dump <- sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TEST')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# Weighted Degree Position String
print('WeightedDegreePositionString')
degree <- 20
dump <- sg('set_kernel', 'WEIGHTEDDEGREEPOS', 'CHAR', size_cache, degree)
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# Weighted Degree String
print('WeightedDegreeString')
degree <- 20
dump <- sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', size_cache, degree)
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
km <- sg('get_kernel_matrix', 'TEST')
# This script should enable you to rerun the experiment in the
# paper that we labeled with "christmas star".
#
# The task is to classify two star-shaped classes that share the
# midpoint. The difficulty of the learning problem depends on the
# distance between the classes, which is varied
#
# Our model selection leads to a choice of C <- 0.5. The model
# selection is not repeated inside this script.
library(sg)
# Preliminary settings:
C <- 0.5 # SVM Parameter
cache_size <- 50 # cache per kernel in MB
svm_eps<-1e-3 # svm epsilon
mkl_eps<-1e-3 # mkl epsilon
no_obs <- 20 # number of observations / data points (sum for train and test and both classes)
k_star <- 20 # number of "leaves" of the stars
alpha <- 0.3 # noise level of the data
radius_star <- matrix(0, length(seq(4.1, 10, 0.2)), 2)
radius_star[,1] <- seq(4.1, 10, 0.2) # increasing radius of the 1.class
radius_star[,2] <- matrix(4, length(radius_star[,1]),1) # fixed radius 2.class
# distanz between the classes: diff(radius_star(:,1)-radius_star(:,2))
rbf_width <- c(0.01, 0.1, 1, 10, 1000) # different width for the five used rbf kernels
####
#### Great loop: train MKL for every data set (the different distances between the stars)
####
sg('loglevel', 'ERROR')
sg('echo', 'OFF')
w = matrix(0, length(1:dim(radius_star)[1]), length(rbf_width))
result.trainout=matrix(0, length(1:dim(radius_star)[1]), 2*no_obs)
result.testout=matrix(0, length(1:dim(radius_star)[1]), 2*no_obs)
result.trainerr=matrix(0,length(rbf_width), 1)
result.testerr=matrix(0,length(rbf_width), 1)
for (kk in 1:dim(radius_star)[1]) {
# data generation
print(sprintf('MKL for radius %+02.2f ', radius_star[kk,1]))
dummy <- matrix(0, 2, 4*no_obs)
dummy[1,] <- runif(4*no_obs)
noise <- alpha*rnorm(4*no_obs)
dummy[2,] <- sin(k_star*pi*dummy[1,]) + noise # sine
dummy[2,1:(2*no_obs)] <- dummy[2,1:(2*no_obs)]+ radius_star[kk,1] # distanz shift: first class
dummy[2,(2*no_obs+1):dim(dummy)[2]] <- dummy[2,(2*no_obs+1):dim(dummy)[2]]+ radius_star[kk,2] # distanz shift: second class
dummy[1,] <- 2*pi*dummy[1,]
x <- matrix(0, dim(dummy)[1], dim(dummy)[2])
x[1,] <- dummy[2,]*sin(dummy[1,])
x[2,] <- dummy[2,]*cos(dummy[1,])
train_y <- c(-matrix(1,1, no_obs), matrix(1,1,no_obs))
test_y <- c(-matrix(1,1, no_obs), matrix(1,1,no_obs))
train_x <- matrix(0, 0, seq(1,dim(x)[2]/2))
train_x <- x[,seq(1,dim(x)[2],2)]
test_x <- x[,seq(2,dim(x)[2],2)]
rm('dummy', 'x')
# train MKL
sg('clean_kernel')
sg('clean_features', 'TRAIN')
sg('add_features','TRAIN', train_x) # set a trainingset for every SVM
sg('add_features','TRAIN', train_x)
sg('add_features','TRAIN', train_x)
sg('add_features','TRAIN', train_x)
sg('add_features','TRAIN', train_x)
sg('set_labels','TRAIN', train_y) # set the labels
sg('new_classifier', 'MKL_CLASSIFICATION')
sg('mkl_parameters', mkl_eps, 0)
sg('svm_epsilon', svm_eps)
sg('set_kernel', 'COMBINED', 0)
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[1])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[2])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[3])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[4])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[5])
sg('c', C)
sg('train_classifier')
alphas <- sg('get_svm')[2]
w[kk,] <- sg('get_subkernel_weights')
# calculate train error
sg('clean_features', 'TEST')
sg('add_features','TEST',train_x)
sg('add_features','TEST',train_x)
sg('add_features','TEST',train_x)
sg('add_features','TEST',train_x)
sg('add_features','TEST',train_x)
sg('set_labels','TEST', train_y)
sg('set_threshold', 0)
result.trainout[kk,]<-sg('classify')
result.trainerr[kk] <- mean(train_y!=sign(result.trainout[kk,]))
# calculate test error
sg('clean_features', 'TEST')
sg('add_features','TEST',test_x)
sg('add_features','TEST',test_x)
sg('add_features','TEST',test_x)
sg('add_features','TEST',test_x)
sg('add_features','TEST',test_x)
sg('set_labels','TEST',test_y)
sg('set_threshold', 0)
result.testout[kk,]<-sg('classify')
result.testerr[kk] <- mean(test_y!=sign(result.testout[kk,]))
}
cat('done. now w contains the kernel weightings and result test/train outputs and errors')
library("sg")
size_cache <- 10
C <- 1.2
epsilon <- 1e-5
mkl_eps <- 0.01
mkl_norm <- 1.5
width <- 1.2
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_multiclass <- as.real(as.matrix(read.table('../data/label_train_multiclass.dat')))
# MKL_MULTICLASS
print('MKL_MULTICLASS')
dump <- sg('clean_features', 'TRAIN')
dump <- sg('clean_features', 'TEST')
dump <- sg('set_kernel', 'COMBINED', size_cache)
dump <- sg('add_kernel', 1, 'LINEAR', 'REAL', size_cache)
dump <- sg('add_features', 'TRAIN', fm_train_real)
dump <- sg('add_features', 'TEST', fm_test_real)
dump <- sg('add_kernel', 1, 'GAUSSIAN', 'REAL', size_cache, width)
dump <- sg('add_features', 'TRAIN', fm_train_real)
dump <- sg('add_features', 'TEST', fm_test_real)
dump <- sg('add_kernel', 1, 'POLY', 'REAL', size_cache, 2)
dump <- sg('add_features', 'TRAIN', fm_train_real)
dump <- sg('add_features', 'TEST', fm_test_real)
dump <- sg('set_labels', 'TRAIN', label_train_multiclass)
dump <- sg('new_classifier', 'MKL_MULTICLASS')
dump <- sg('svm_epsilon', epsilon)
dump <- sg('c', C)
dump <- sg('mkl_parameters', mkl_eps, 0, mkl_norm);
dump <- sg('train_classifier')
result <- sg('classify')
# This script should enable you to rerun the experiment in the
# paper that we labeled "mixture linear and sine ".
#
# The task is to learn a regression function where the true function
# is given by a mixture of 2 sine waves in addition to a linear trend.
# We vary the frequency of the second higher frequency sine wave.
# Setup: MKL on 10 RBF kernels of different widths on 1000 examples
#load shogun
library(sg)
# kernel width for 10 basic SVMs
rbf_width <- array(0.0, dim<-c(1,10))
rbf_width[1] <- 0.001
rbf_width[2] <- 0.005
rbf_width[3] <- 0.01
rbf_width[4] <- 0.05
rbf_width[5] <- 0.1
rbf_width[6] <- 1
rbf_width[7] <- 10
rbf_width[8] <- 50
rbf_width[9] <- 100
rbf_width[10] <- 1000
# SVM parameter
C <- 1
cache_size <- 50
mkl_eps <- 1e-4
svm_eps <- 1e-4
svm_tube <- 0.01
debug <- 0
# data
f <- c(0:20) # parameter that varies the frequency of the second sine wave
#sg('loglevel', 'ALL')
#sg('echo', 'ON')
weights <- array(dim<-c(21,10))
no_obs <- 10 # number of observations
stepsize <- (4*pi)/(no_obs-1)
train_x <- c(0:(no_obs-1))
for (i in c(1:no_obs)) {
train_x[i] <- train_x[i] * stepsize
}
trend <- 2 * train_x* ((pi)/(max(train_x)-min(train_x)))
wave1 <- sin(train_x)
wave2 <- sin(f[1]*train_x)
train_y <- trend + wave1 + wave2
train_x<-matrix(train_x,1, length(train_x))
weights=matrix(0, length(f), length(rbf_width))
for (kk in c(1:length(f))) { #Big loop
#data generation
wave1 <- sin(train_x)
wave2 <- sin(f[kk]*train_x)
train_y <- trend + wave1 + wave2
#MK Learning
sg('new_classifier', 'MKL_REGRESSION')
sg('mkl_parameters', mkl_eps, 0)
sg('c', C)
sg('svm_epsilon', svm_eps)
sg('svr_tube_epsilon', svm_tube)
sg('clean_features', 'TRAIN')
sg('clean_kernel')
sg('set_labels', 'TRAIN', train_y) #set labels
sg('add_features', 'TRAIN', train_x) #add features for every basic SVM
sg('add_features', 'TRAIN', train_x)
sg('add_features', 'TRAIN', train_x)
sg('add_features', 'TRAIN', train_x)
sg('add_features', 'TRAIN', train_x)
sg('add_features', 'TRAIN', train_x)
sg('add_features', 'TRAIN', train_x)
sg('add_features', 'TRAIN', train_x)
sg('add_features', 'TRAIN', train_x)
sg('add_features', 'TRAIN', train_x)
sg('set_kernel', 'COMBINED', 0)
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[1])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[2])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[3])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[4])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[5])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[6])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[7])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[8])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[9])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[10])
sg('train_classifier')
weights[kk,] <- get_subkernel_weights()
cat("frequency:", f[kk], " rbf-kernel-weights: ", weights[kk,], "\n")
}
# This script should enable you to rerun the experiment in the
# paper that we labeled "sine".
#
# In this regression task a sine wave is to be learned.
# We vary the frequency of the wave.
# Preliminary settings:
library(sg)
# Parameter for the SVMs.
C <- 10 # obtained via model selection (not included in the script)
cache_size <- 10
mkl_eps <- 1e-3 # threshold for precision
svm_eps <- 1e-3
svr_tube_eps <- 1e-2
debug <- 0
# Kernel width for the 5 "basic" SVMs
rbf_width <- c(0.005, 0.05, 0.5, 1, 10)
# data
f <- c(0.1:0.2:5) # values for the different frequencies
no_obs <- 10 # number of observations
if (debug) {
sg('loglevel', 'ALL');
sg('echo', 'ON');
} else {
sg('loglevel', 'ERROR');
sg('echo', 'OFF')
}
weights=matrix(0, length(f), length(rbf_width))
for (kk in 1:length(f)) { # big loop for the different learning problems
# data generation
train_x <- seq(1,10*2*pi, (((10*2*pi)-1)/(no_obs-1)))
train_y <- sin(f[kk]*train_x)
train_x <- matrix(train_x, 1, length(train_x))
# initialize MKL-SVR
sg('new_classifier', 'MKL_REGRESSION')
sg('mkl_parameters', mkl_eps, 0)
sg('c', C)
sg('svm_epsilon', svm_eps)
sg('svr_tube_epsilon', svr_tube_eps)
sg('clean_features', 'TRAIN')
sg('clean_kernel')
sg('set_labels', 'TRAIN', train_y) # set labels
sg('add_features', 'TRAIN', train_x) # add features for every SVR
sg('add_features', 'TRAIN', train_x)
sg('add_features', 'TRAIN', train_x)
sg('add_features', 'TRAIN', train_x)
sg('add_features', 'TRAIN', train_x)
sg('set_kernel', 'COMBINED', 0)
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[1])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[2])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[3])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[4])
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width[5])
sg('svm_train')
weights[kk,] <- sg('get_subkernel_weights')
dummy <- print(sprintf('frequency: %02.2f rbf-kernel-weights: %02.2f %02.2f %02.2f %02.2f %02.2f',
f[kk], weights[kk,1], weights[kk,2], weights[kk,3], weights[kk,4], weights[kk,5]))
}
library(sg)
acgt <- c("A","C","G","T")
LT=sign(rnorm(1000))
XT= array("",dim=c(100,1000))
for (i in 1:length(XT)) {
XT[i] = acgt[ceiling(4 * (rnorm(1) %% 1))]
}
for (k in c(30,60,61)) {
for (i in 1:length(XT[k,])) {
if (LT[i] == 1) {
XT[k,i] = "A"
}
}
}
idx=sample(c(1:1000))
XTE=XT[,idx[1:200]]
LTE=LT[idx[1:200]]
XT=XT[,idx[201:1000]]
LT=LT[idx[201:1000]]
center_idx = 50
degree=3
mismatch = 0
C=1
#sg('loglevel', 'ALL')
sg('use_linadd', TRUE)
sg('mkl_parameters', 1e-5, 1)
sg('svm_epsilon', 1e-6)
sg('clean_features', 'TRAIN')
sg('clean_kernel')
sg('new_classifier', 'MKL_CLASSIFICATION')
sg('set_labels', 'TRAIN', LT)
sg('set_features', 'TRAIN', XT, 'DNA')
sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', 10, degree, mismatch, FALSE, 1)
sg('c', C)
sg('svm_train')
svmAsList=sg('get_svm')
beta=sg('get_subkernel_weights')
sg('init_kernel_optimization')
sg('clean_features', 'TEST')
sg('set_features', 'TEST', XTE, 'DNA')
output_xte = sg('classify')
w=sg('get_subkernel_weights')
err=mean(sign(output_xte)!=LTE)
library("sg")
size_cache <- 10
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
width <- 1.4
# LogPlusOne
print('LogPlusOne')
dump <- sg('add_preproc', 'LOGPLUSONE')
dump <- sg('set_kernel', 'CHI2', 'REAL', size_cache, width)
dump <- sg('set_features', 'TRAIN', fm_train_real)
dump <- sg('attach_preproc', 'TRAIN')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dump <- sg('attach_preproc', 'TEST')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
width <- 2.1
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# NormOne
print('NormOne')
dump <- sg('add_preproc', 'NORMONE')
dump <- sg('set_kernel', 'CHI2', 'REAL', size_cache, width)
dump <- sg('set_features', 'TRAIN', fm_train_real)
dump <- sg('attach_preproc', 'TRAIN')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dump <- sg('attach_preproc', 'TEST')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
width <- 2.1
fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat'))
fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat'))
# PruneVarSubMean
print('PruneVarSubMean')
divide_by_std <- TRUE
dump <- sg('add_preproc', 'PRUNEVARSUBMEAN', divide_by_std)
dump <- sg('set_kernel', 'CHI2', 'REAL', size_cache, width)
dump <- sg('set_features', 'TRAIN', fm_train_real)
dump <- sg('attach_preproc', 'TRAIN')
km <- sg('get_kernel_matrix', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_real)
dump <- sg('attach_preproc', 'TEST')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
order <- 3
gap <- 0
reverse <- 'n'
use_sign <- FALSE
normalization <- 'FULL'
# Comm Ulong String
print('CommUlongString')
dump <- sg('add_preproc', 'SORTULONGSTRING')
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
dump <- sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
dump <- sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TEST')
dump <- sg('set_kernel', 'COMMSTRING', 'ULONG', size_cache, use_sign, normalization)
km <- sg('get_kernel_matrix', 'TRAIN')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
order <- 3
gap <- 0
reverse <- 'n'
use_sign <- FALSE
normalization <- 'FULL'
# Comm Word String
print('CommWordString')
dump <- sg('add_preproc', 'SORTWORDSTRING')
dump <- sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
dump <- sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TRAIN')
dump <- sg('set_features', 'TEST', fm_test_dna, 'DNA')
dump <- sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
dump <- sg('attach_preproc', 'TEST')
dump <- sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization)
km <- sg('get_kernel_matrix', 'TRAIN')
km <- sg('get_kernel_matrix', 'TEST')
library("sg")
size_cache <- 10
C <- 10
tube_epsilon <- 1e-2
width <- 2.1
fm_train <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train <- as.real(as.matrix(read.table('../data/label_train_twoclass.dat')))
# KRR
print('KRR')
tau <- 1e-6
dump <- sg('set_features', 'TRAIN', fm_train)
dump <- sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
dump <- sg('set_labels', 'TRAIN', label_train)
dump <- sg('new_regression', 'KRR')
dump <- sg('krr_tau', tau)
dump <- sg('c', C)
dump <- sg('train_regression')
dump <- sg('set_features', 'TEST', fm_test)
result <- sg('classify')
library("sg")
size_cache <- 10
C <- 10
tube_epsilon <- 1e-2
width <- 2.1
fm_train <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train <- as.real(as.matrix(read.table('../data/label_train_twoclass.dat')))
# LibSVR
print('LibSVR')
dump <- sg('set_features', 'TRAIN', fm_train)
dump <- sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
dump <- sg('set_labels', 'TRAIN', label_train)
dump <- sg('new_regression', 'LIBSVR')
dump <- sg('svr_tube_epsilon', tube_epsilon)
dump <- sg('c', C)
dump <- sg('train_regression')
dump <- sg('set_features', 'TEST', fm_test)
result <- sg('classify')
library("sg")
size_cache <- 10
C <- 10
tube_epsilon <- 1e-2
width <- 2.1
fm_train <- as.matrix(read.table('../data/fm_train_real.dat'))
fm_test <- as.matrix(read.table('../data/fm_test_real.dat'))
label_train <- as.real(as.matrix(read.table('../data/label_train_twoclass.dat')))
# SVR Light
dosvrlight <- function()
{
print('SVRLight')
dump <- sg('set_features', 'TRAIN', fm_train)
dump <- sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
dump <- sg('set_labels', 'TRAIN', label_train)
dump <- sg('new_regression', 'SVRLIGHT')
dump <- sg('svr_tube_epsilon', tube_epsilon)
dump <- sg('c', C)
dump <- sg('train_regression')
dump <- sg('set_features', 'TEST', fm_test)
result <- sg('classify')
}
try(dosvrlight())