本页面包含了所有Python静态接口的例子。
要运行这些例子只需要
python name_of_example.py
def gmnpsvm ():
print 'GMNPSVM'
size_cache=10
width=2.1
C=1.2
epsilon=1e-5
use_bias=False
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train_multiclass)
sg('new_classifier', 'GMNPSVM')
sg('svm_epsilon', epsilon)
sg('c', C)
sg('svm_use_bias', use_bias)
sg('train_classifier')
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
label_train_multiclass=lm.load_labels('../data/label_train_multiclass.dat')
gmnpsvm()
def gpbtsvm ():
print 'GPBTSVM'
size_cache=10
width=2.1
C=1.2
epsilon=1e-5
use_bias=False
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train_twoclass)
sg('new_classifier', 'GPBTSVM')
sg('svm_epsilon', epsilon)
sg('c', C)
sg('svm_use_bias', use_bias)
sg('train_classifier')
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat')
gpbtsvm()
def knn ():
print 'KNN'
k=3
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_labels', 'TRAIN', label_train_multiclass)
sg('set_distance', 'EUCLIDIAN', 'REAL')
sg('new_classifier', 'KNN')
sg('train_classifier', k)
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
label_train_multiclass=lm.load_labels('../data/label_train_multiclass.dat')
knn()
def lda ():
print 'LDA'
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_labels', 'TRAIN', label_train_twoclass)
sg('new_classifier', 'LDA')
sg('train_classifier')
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
print result
if __name__=='__main__': #svm_light()
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat')
print fm_train_real
lda()
def libsvm ():
print 'LibSVM'
size_cache=10
width=2.1
C=1.2
epsilon=1e-5
use_bias=False
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train_twoclass)
sg('new_classifier', 'LIBSVM')
sg('svm_epsilon', epsilon)
sg('c', C)
sg('svm_use_bias', use_bias)
sg('train_classifier')
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat')
libsvm()
def libsvm_multiclass ():
print 'LibSVMMultiClass'
size_cache=10
width=2.1
C=10.
epsilon=1e-5
use_bias=False
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train_multiclass)
sg('new_classifier', 'LIBSVM_MULTICLASS')
sg('svm_epsilon', epsilon)
sg('c', C)
sg('svm_use_bias', use_bias)
sg('train_classifier')
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
label_train_multiclass=lm.load_labels('../data/label_train_multiclass.dat')
libsvm_multiclass()
def libsvm_oneclass ():
print 'LibSVMOneClass'
size_cache=10
width=2.1
C=10.
epsilon=1e-5
use_bias=False
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('new_classifier', 'LIBSVM_ONECLASS')
sg('svm_epsilon', epsilon)
sg('c', C)
sg('svm_use_bias', use_bias)
sg('train_classifier')
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
libsvm_oneclass()
def mpdsvm ():
print 'MPDSVM'
size_cache=10
width=2.1
C=1.2
epsilon=1e-5
use_bias=False
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train_twoclass)
sg('new_classifier', 'MPDSVM')
sg('svm_epsilon', epsilon)
sg('c', C)
sg('svm_use_bias', use_bias)
sg('train_classifier')
sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat')
mpdsvm()
def perceptron ():
print 'Perceptron'
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_labels', 'TRAIN', label_train_twoclass)
sg('new_classifier', 'PERCEPTRON')
# often does not converge, mind your data!
#sg('train_classifier')
#sg('set_features', 'TEST', fm_test_real)
#result=sg('classify')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat')
perceptron()
def svm_light ():
print 'SVMLight'
size_cache=10
degree=20
C=1.2
epsilon=1e-5
use_bias=False
from sg import sg
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', size_cache, degree)
sg('set_labels', 'TRAIN', label_train_dna)
try:
sg('new_classifier', 'SVMLIGHT')
except RuntimeError:
return
sg('svm_epsilon', epsilon)
sg('c', C)
sg('svm_use_bias', use_bias)
sg('train_classifier')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
result=sg('classify')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
label_train_dna=lm.load_labels('../data/label_train_dna.dat')
svm_light()
def hierarchical ():
print 'Hierarchical'
size_cache=10
merges=3
from sg import sg
sg('set_features', 'TRAIN', fm_train)
sg('set_distance', 'EUCLIDIAN', 'REAL')
sg('new_clustering', 'HIERARCHICAL')
sg('train_clustering', merges)
[merge_distance, pairs]=sg('get_clustering')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train=lm.load_numbers('../data/fm_train_real.dat')
hierarchical()
def kmeans ():
print 'KMeans'
size_cache=10
k=3
iter=1000
from sg import sg
sg('set_features', 'TRAIN', fm_train)
sg('set_distance', 'EUCLIDIAN', 'REAL')
sg('new_clustering', 'KMEANS')
sg('train_clustering', k, iter)
[radi, centers]=sg('get_clustering')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train=lm.load_numbers('../data/fm_train_real.dat')
kmeans()
def bray_curtis_distance ():
print 'BrayCurtisDistance'
from sg import sg
sg('set_distance', 'BRAYCURTIS', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
bray_curtis_distance()
def canberra_metric ():
print 'CanberraMetric'
from sg import sg
sg('set_distance', 'CANBERRA', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
canberra_metric()
def canberra_word_distance ():
print 'CanberraWordDistance'
order=3
gap=0
reverse='n' # bit silly to not use boolean, set 'r' to yield true
from sg import sg
sg('set_distance', 'CANBERRA', 'WORD')
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
dm=sg('get_distance_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
canberra_word_distance()
def chebyshew_metric ():
print 'ChebyshewMetric'
from sg import sg
sg('set_distance', 'CHEBYSHEW', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
chebyshew_metric()
def chi_square_distance ():
print 'ChiSquareDistance'
from sg import sg
sg('set_distance', 'CHISQUARE', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
chi_square_distance()
def cosine_distance ():
print 'CosineDistance'
from sg import sg
sg('set_distance', 'COSINE', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
cosine_distance()
def euclidian_distance ():
print 'EuclidianDistance'
from sg import sg
sg('set_distance', 'EUCLIDIAN', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
euclidian_distance()
def geodesic_metric ():
print 'GeodesicMetric'
from sg import sg
sg('set_distance', 'GEODESIC', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
geodesic_metric()
def hamming_word_distance ():
print 'HammingWordDistance'
order=3
gap=0
reverse='n' # bit silly to not use boolean, set 'r' to yield true
from sg import sg
sg('set_distance', 'HAMMING', 'WORD')
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
dm=sg('get_distance_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
hamming_word_distance()
def jensen_metric ():
print 'JensenMetric'
from sg import sg
sg('set_distance', 'JENSEN', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
jensen_metric()
def manhattan_metric ():
print 'ManhattanMetric'
from sg import sg
sg('set_distance', 'MANHATTAN', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
manhattan_metric()
def manhattan_word_distance ():
print 'ManhattanWordDistance'
order=3
gap=0
reverse='n' # bit silly to not use boolean, set 'r' to yield true
from sg import sg
sg('set_distance', 'MANHATTAN', 'WORD')
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
dm=sg('get_distance_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
manhattan_word_distance()
def minkowski_metric ():
print 'MinkowskiMetric'
k=3.
from sg import sg
sg('set_distance', 'MINKOWSKI', 'REAL', k)
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
minkowski_metric()
def tanimoto_distance ():
print 'TanimotoDistance'
from sg import sg
sg('set_distance', 'TANIMOTO', 'REAL')
sg('set_features', 'TRAIN', fm_train_real)
dm=sg('get_distance_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
dm=sg('get_distance_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
tanimoto_distance()
def histogram ():
print 'Histogram'
order=3
gap=0
reverse='n' # bit silly to not use boolean, set 'r' to yield true
from sg import sg
# sg('new_distribution', 'HISTOGRAM')
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
# sg('train_distribution')
# histo=sg('get_histogram')
# num_examples=11
# num_param=sg('get_histogram_num_model_parameters')
# for i in xrange(num_examples):
# for j in xrange(num_param):
# sg('get_log_derivative %d %d' % (j, i))
# sg('get_log_likelihood')
# sg('get_log_likelihood_sample')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train=lm.load_dna('../data/fm_train_dna.dat')
fm_cube=lm.load_cubes('../data/fm_train_cube.dat')
histogram()
def hmm ():
print 'HMM'
N=3
M=6
order=1
hmms=list()
liks=list()
from sg import sg
sg('new_hmm',N, M)
sg('set_features', 'TRAIN', fm_cube, 'CUBE')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order)
sg('bw')
hmm=sg('get_hmm')
sg('new_hmm', N, M)
sg('set_hmm', hmm[0], hmm[1], hmm[2], hmm[3])
likelihood=sg('hmm_likelihood')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train=lm.load_dna('../data/fm_train_dna.dat')
fm_cube=lm.load_cubes('../data/fm_train_cube.dat')
hmm()
def linear_hmm ():
print 'LinearHMM'
order=3
gap=0
reverse='n' # bit silly to not use boolean, set 'r' to yield true
from sg import sg
# sg('new_distribution', 'LinearHMM')
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
# sg('train_distribution')
# histo=sg('get_histogram')
# num_examples=11
# num_param=sg('get_histogram_num_model_parameters')
# for i in xrange(num_examples):
# for j in xrange(num_param):
# sg('get_log_derivative %d %d' % (j, i))
# sg('get_log_likelihood')
# sg('get_log_likelihood_sample')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train=lm.load_dna('../data/fm_train_dna.dat')
fm_cube=lm.load_cubes('../data/fm_train_cube.dat')
linear_hmm()
def chi2 ():
print 'Chi2'
width=1.4
size_cache=10
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'CHI2', 'REAL', size_cache, width)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
chi2()
def combined ():
print 'Combined'
size_cache=10
weight=1.
from sg import sg
sg('clean_kernel')
sg('clean_features', 'TRAIN')
sg('clean_features', 'TEST')
sg('set_kernel', 'COMBINED', size_cache)
sg('add_kernel', weight, 'LINEAR', 'REAL', size_cache)
sg('add_features', 'TRAIN', fm_train_real)
sg('add_features', 'TEST', fm_test_real)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', size_cache, 1.)
sg('add_features', 'TRAIN', fm_train_real)
sg('add_features', 'TEST', fm_test_real)
sg('add_kernel', weight, 'POLY', 'REAL', size_cache, 3, False)
sg('add_features', 'TRAIN', fm_train_real)
sg('add_features', 'TEST', fm_test_real)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
combined()
def comm_ulong_string ():
print 'CommUlongString'
size_cache=10
order=3
gap=0
reverse='n' # bit silly to not use boolean, set 'r' to yield true
use_sign=False
normalization='FULL'
from sg import sg
sg('add_preproc', 'SORTULONGSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
sg('set_kernel', 'COMMSTRING', 'ULONG', size_cache, use_sign, normalization)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
comm_ulong_string()
def comm_word_string ():
print 'CommWordString'
size_cache=10
order=3
gap=0
reverse='n' # bit silly to not use boolean, set 'r' to yield true
use_sign=False
normalization='FULL'
from sg import sg
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
comm_word_string()
def const ():
print 'Const'
c=23.
size_cache=10
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'CONST', 'REAL', size_cache, c)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
const()
def diag ():
print 'Diag'
diag=23.
size_cache=10
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'DIAG', 'REAL', size_cache, diag)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
diag()
def fixed_degree_string ():
print 'FixedDegreeString'
size_cache=10
degree=3
from sg import sg
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'FIXEDDEGREE', 'CHAR', size_cache, degree)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
fixed_degree_string()
def gaussian ():
print 'Gaussian'
width=1.9
size_cache=10
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
gaussian()
def gaussian_shift ():
print 'GaussianShift'
width=1.9
max_shift=2
shift_step=1
size_cache=10
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'GAUSSIANSHIFT', 'REAL', size_cache, width, max_shift, shift_step)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
gaussian_shift()
def linear ():
print 'Linear'
scale=1.2
size_cache=10
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'LINEAR', 'REAL', size_cache, scale)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
linear()
def linear_byte ():
print 'LinearByte'
from sg import sg
#import pdb
#pdb.set_trace()
sg('set_features', 'TRAIN', fm_train_byte)
sg('set_features', 'TEST', fm_test_byte, 'RAWBYTE')
sg('set_kernel', 'LINEAR', 'BYTE', 10)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from numpy import ubyte
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_byte=ubyte(lm.load_numbers('../data/fm_train_byte.dat'))
fm_test_byte=ubyte(lm.load_numbers('../data/fm_test_byte.dat'))
linear_byte()
def linear_string ():
print 'LinearString'
size_cache=10
from sg import sg
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'LINEAR', 'CHAR', size_cache)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
linear_string()
def linear_word ():
print 'LinearWord'
size_cache=10
scale=1.4
from sg import sg
sg('set_features', 'TRAIN', fm_train_word)
sg('set_features', 'TEST', fm_test_word)
sg('set_kernel', 'LINEAR', 'WORD', size_cache, scale)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
from numpy import ushort
lm=LoadMatrix()
fm_train_word=ushort(lm.load_numbers('../data/fm_test_word.dat'))
fm_test_word=ushort(lm.load_numbers('../data/fm_test_word.dat'))
linear_word()
def local_alignment_string():
print 'LocalAlignmentString'
size_cache=10
from sg import sg
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'LOCALALIGNMENT', 'CHAR', size_cache)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
local_alignment_string()
def locality_improved_string ():
print 'LocalityImprovedString'
size_cache=10
length=5
inner_degree=5
outer_degree=inner_degree+2
from sg import sg
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'LIK', 'CHAR', size_cache, length, inner_degree, outer_degree)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
label_train_dna=lm.load_labels('../data/label_train_dna.dat')
locality_improved_string()
def oligo_string ():
print 'OligoString'
size_cache=10
k=3
width=1.2
from sg import sg
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'OLIGO', 'CHAR', size_cache, k, width)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
oligo_string()
def plugin_estimate_histogram ():
print 'PluginEstimate w/ HistogramWord'
size_cache=10
order=3
gap=0
reverse='n' # bit silly to not use boolean, set 'r' to yield true
use_sign=False
normalization='FULL'
from sg import sg
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
pseudo_pos=1e-1
pseudo_neg=1e-1
sg('new_plugin_estimator', pseudo_pos, pseudo_neg)
sg('set_labels', 'TRAIN', label_train_dna)
sg('train_estimator')
sg('set_kernel', 'HISTOGRAM', 'WORD', size_cache)
km=sg('get_kernel_matrix', 'TRAIN')
# not supported yet
# lab=sg('plugin_estimate_classify')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
label_train_dna=lm.load_labels('../data/label_train_dna.dat')
plugin_estimate_histogram()
def poly ():
print 'Poly'
degree=4
inhomogene=False
use_normalization=True
size_cache=10
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'POLY', 'REAL', size_cache, degree, inhomogene, use_normalization)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
poly()
def poly_match_string ():
print 'PolyMatchString'
size_cache=10
degree=3
inhomogene=False
from sg import sg
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'POLYMATCH', 'CHAR', size_cache, degree, inhomogene)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
poly_match_string()
def poly_match_word ():
print 'PolyMatchWord'
size_cache=10
degree=2
inhomogene=True
normalize=True
order=3
gap=0
reverse='n' # bit silly to not use boolean, set 'r' to yield true
from sg import sg
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
sg('set_kernel', 'POLYMATCH', 'WORD', size_cache, degree, inhomogene, normalize)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
label_train_dna=lm.load_labels('../data/label_train_dna.dat')
poly_match_word()
def plugin_estimate_salzberg ():
print 'PluginEstimate w/ SalzbergWord'
size_cache=10
order=3
gap=0
reverse='n' # bit silly to not use boolean, set 'r' to yield true
use_sign=False
normalization='FULL'
from sg import sg
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
pseudo_pos=1e-1
pseudo_neg=1e-1
sg('new_plugin_estimator', pseudo_pos, pseudo_neg)
sg('set_labels', 'TRAIN', label_train_dna)
sg('train_estimator')
sg('set_kernel', 'SALZBERG', 'WORD', size_cache)
#sg('set_prior_probs', 0.4, 0.6)
sg('set_prior_probs_from_labels', label_train_dna)
km=sg('get_kernel_matrix', 'TRAIN')
# not supported yet
# lab=sg('plugin_estimate_classify')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
label_train_dna=lm.load_labels('../data/label_train_dna.dat')
plugin_estimate_salzberg()
def sigmoid ():
print 'Sigmoid'
num_feats=11
gamma=1.2
coef0=1.3
size_cache=10
from sg import sg
sg('set_features', 'TRAIN', fm_train_real)
sg('set_features', 'TEST', fm_test_real)
sg('set_kernel', 'SIGMOID', 'REAL', size_cache, gamma, coef0)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
sigmoid()
def simple_locality_improved_string ():
print 'SimpleLocalityImprovedString'
size_cache=10
length=5
inner_degree=5
outer_degree=inner_degree+2
from sg import sg
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'SLIK', 'CHAR', size_cache, length, inner_degree, outer_degree)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
label_train_dna=lm.load_labels('../data/label_train_dna.dat')
simple_locality_improved_string()
def weighted_comm_word_string ():
print 'WeightedCommWordString'
size_cache=10
order=3
gap=0
reverse='n' # bit silly to not use boolean, set 'r' to yield true
use_sign=False
normalization='FULL'
from sg import sg
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
sg('set_kernel', 'WEIGHTEDCOMMSTRING', 'WORD', size_cache, use_sign, normalization)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
label_train_dna=lm.load_labels('../data/label_train_dna.dat')
weighted_comm_word_string()
def weighted_degree_position_string ():
print 'WeightedDegreePositionString'
size_cache=10
degree=20
from sg import sg
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'WEIGHTEDDEGREEPOS', 'CHAR', size_cache, degree)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
weighted_degree_position_string()
def weighted_degree_string ():
print 'WeightedDegreeString'
size_cache=10
degree=20
from sg import sg
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', size_cache, degree)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
weighted_degree_string()
def mkl_multiclass ():
print 'mkl_multiclass'
size_cache=10
width=1.2
C=1.2
epsilon=1e-5
mkl_eps=0.001
mkl_norm=1.5
weight=1.0
from sg import sg
sg('clean_kernel')
sg('clean_features', 'TRAIN')
sg('clean_features', 'TEST')
sg('set_kernel', 'COMBINED', size_cache)
sg('add_kernel', weight, 'LINEAR', 'REAL', size_cache)
sg('add_features', 'TRAIN', fm_train_real)
sg('add_features', 'TEST', fm_test_real)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', size_cache, width)
sg('add_features', 'TRAIN', fm_train_real)
sg('add_features', 'TEST', fm_test_real)
sg('add_kernel', weight, 'POLY', 'REAL', size_cache, 2)
sg('add_features', 'TRAIN', fm_train_real)
sg('add_features', 'TEST', fm_test_real)
sg('set_labels', 'TRAIN', label_train_multiclass)
sg('new_classifier', 'MKL_MULTICLASS')
sg('svm_epsilon', epsilon)
sg('c', C)
sg('mkl_parameters', mkl_eps, 0.0, mkl_norm)
sg('train_classifier')
#sg('set_features', 'TEST', fm_test_real)
result=sg('classify')
print result
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
label_train_multiclass=lm.load_labels('../data/label_train_multiclass.dat')
mkl_multiclass()
from sg import sg
from numpy import *
num=100
weight=1.
labels=concatenate((-ones([1,num]), ones([1,num])),1)[0]
features=concatenate((random.normal(size=(2,num))-1,random.normal(size=(2,num))+1),1)
tube_epsilon=1e-2
sg('new_classifier', 'MKL_REGRESSION')
sg('c', 1.)
sg('svr_tube_epsilon', tube_epsilon)
sg('set_labels', 'TRAIN', labels)
sg('add_features', 'TRAIN', features)
sg('add_features', 'TRAIN', features)
sg('add_features', 'TRAIN', features)
sg('set_kernel', 'COMBINED', 100)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 100.)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 10.)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 1.)
sg('train_classifier')
[bias, alphas]=sg('get_svm');
from sg import sg
from numpy import *
num=100
weight=1.
labels=concatenate((-ones([1,num]), ones([1,num])),1)[0]
features=concatenate((random.normal(size=(2,num))-1,random.normal(size=(2,num))+1),1)
sg('c', 10.)
sg('new_classifier', 'MKL_CLASSIFICATION')
sg('set_labels', 'TRAIN', labels)
sg('add_features', 'TRAIN', features)
sg('add_features', 'TRAIN', features)
sg('add_features', 'TRAIN', features)
sg('set_kernel', 'COMBINED', 100)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 100.)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 10.)
sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 1.)
sg('train_classifier')
[bias, alphas]=sg('get_svm');
def log_plus_one ():
print 'LogPlusOne'
width=1.4
size_cache=10
from sg import sg
sg('add_preproc', 'LOGPLUSONE')
sg('set_kernel', 'CHI2', 'REAL', size_cache, width)
sg('set_features', 'TRAIN', fm_train_real)
sg('attach_preproc', 'TRAIN')
km=sg('get_kernel_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
sg('attach_preproc', 'TEST')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
log_plus_one()
def norm_one ():
print 'NormOne'
width=1.4
size_cache=10
from sg import sg
sg('add_preproc', 'NORMONE')
sg('set_kernel', 'CHI2', 'REAL', size_cache, width)
sg('set_features', 'TRAIN', fm_train_real)
sg('attach_preproc', 'TRAIN')
km=sg('get_kernel_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
sg('attach_preproc', 'TEST')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
norm_one()
def prune_var_sub_mean ():
print 'PruneVarSubMean'
width=1.4
size_cache=10
divide_by_std=True
from sg import sg
sg('add_preproc', 'PRUNEVARSUBMEAN', divide_by_std)
sg('set_kernel', 'CHI2', 'REAL', size_cache, width)
sg('set_features', 'TRAIN', fm_train_real)
sg('attach_preproc', 'TRAIN')
km=sg('get_kernel_matrix', 'TRAIN')
sg('set_features', 'TEST', fm_test_real)
sg('attach_preproc', 'TEST')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_real=lm.load_numbers('../data/fm_train_real.dat')
fm_test_real=lm.load_numbers('../data/fm_test_real.dat')
prune_var_sub_mean()
def sort_ulong_string ():
print 'CommUlongString'
size_cache=10
order=3
gap=0
reverse='n' # bit silly to not use boolean, set 'r' to yield true
use_sign=False
normalization='FULL'
from sg import sg
sg('add_preproc', 'SORTULONGSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
sg('set_kernel', 'COMMSTRING', 'ULONG', size_cache, use_sign, normalization)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
sort_ulong_string()
def sort_word_string ():
print 'CommWordString'
size_cache=10
order=3
gap=0
reverse='n' # bit silly to not use boolean, set 'r' to yield true
use_sign=False
normalization='FULL'
from sg import sg
sg('add_preproc', 'SORTWORDSTRING')
sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TRAIN')
sg('set_features', 'TEST', fm_test_dna, 'DNA')
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
sg('attach_preproc', 'TEST')
sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization)
km=sg('get_kernel_matrix', 'TRAIN')
km=sg('get_kernel_matrix', 'TEST')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train_dna=lm.load_dna('../data/fm_train_dna.dat')
fm_test_dna=lm.load_dna('../data/fm_test_dna.dat')
sort_word_string()
def krr ():
print 'KRR'
size_cache=10
width=2.1
C=1.2
tau=1e-6
from sg import sg
sg('set_features', 'TRAIN', fm_train)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train)
sg('new_regression', 'KRR')
sg('krr_tau', tau)
sg('c', C)
sg('train_regression')
sg('set_features', 'TEST', fm_test)
result=sg('classify')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train=lm.load_numbers('../data/fm_train_real.dat')
fm_test=lm.load_numbers('../data/fm_test_real.dat')
label_train=lm.load_labels('../data/label_train_twoclass.dat')
krr()
def libsvr ():
print 'LibSVR'
size_cache=10
width=2.1
C=1.2
epsilon=1e-5
tube_epsilon=1e-2
from sg import sg
sg('set_features', 'TRAIN', fm_train)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train)
sg('new_regression', 'LIBSVR')
sg('svr_tube_epsilon', tube_epsilon)
sg('c', C)
sg('train_regression')
sg('set_features', 'TEST', fm_test)
result=sg('classify')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train=lm.load_numbers('../data/fm_train_real.dat')
fm_test=lm.load_numbers('../data/fm_test_real.dat')
label_train=lm.load_labels('../data/label_train_twoclass.dat')
libsvr()
def svr_light ():
print 'SVRLight'
size_cache=10
width=2.1
C=1.2
epsilon=1e-5
tube_epsilon=1e-2
from sg import sg
sg('set_features', 'TRAIN', fm_train)
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
sg('set_labels', 'TRAIN', label_train)
try:
sg('new_regression', 'SVRLIGHT')
except RuntimeError:
return
sg('svr_tube_epsilon', tube_epsilon)
sg('c', C)
sg('train_regression')
sg('set_features', 'TEST', fm_test)
result=sg('classify')
if __name__=='__main__':
from tools.load import LoadMatrix
lm=LoadMatrix()
fm_train=lm.load_numbers('../data/fm_train_real.dat')
fm_test=lm.load_numbers('../data/fm_test_real.dat')
label_train=lm.load_labels('../data/label_train_twoclass.dat')
svr_light()