Package mvpa :: Package clfs :: Module warehouse
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.warehouse

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Collection of classifiers to ease the exploration. 
 10  """ 
 11   
 12  __docformat__ = 'restructuredtext' 
 13   
 14  from sets import Set 
 15  import operator 
 16   
 17  # Data 
 18  from mvpa.datasets.splitter import OddEvenSplitter 
 19   
 20  # Define sets of classifiers 
 21  from mvpa.clfs.base import FeatureSelectionClassifier, SplitClassifier, \ 
 22                                   MulticlassClassifier 
 23  from mvpa.clfs.smlr import SMLR 
 24  from mvpa.clfs.knn import kNN 
 25  from mvpa.clfs.gpr import GPR 
 26   
 27  # Helpers 
 28  from mvpa.clfs.transerror import TransferError 
 29  from mvpa.base import externals 
 30  from mvpa.measures.anova import OneWayAnova 
 31  from mvpa.misc.transformers import Absolute 
 32  from mvpa.featsel.rfe import RFE 
 33  from mvpa.clfs.smlr import SMLRWeights 
 34  from mvpa.featsel.helpers import FractionTailSelector, \ 
 35      FixedNElementTailSelector, RangeElementSelector, \ 
 36      FixedErrorThresholdStopCrit 
 37  from mvpa.clfs.transerror import ConfusionBasedError 
 38  from mvpa.featsel.base import SensitivityBasedFeatureSelection 
 39   
 40  _KNOWN_INTERNALS=[ 'knn', 'binary', 'svm', 'linear', 
 41          'smlr', 'does_feature_selection', 'has_sensitivity', 
 42          'multiclass', 'non-linear', 'kernel-based', 'lars', 
 43          'regression', 'libsvm', 'sg', 'meta', 'retrainable', 'gpr' ] 
44 45 -class Warehouse(object):
46 """Class to keep known instantiated classifiers 47 48 Should provide easy ways to select classifiers of needed kind: 49 clfs['linear', 'svm'] should return all linear SVMs 50 clfs['linear', 'multiclass'] should return all linear classifiers 51 capable of doing multiclass classification 52 """ 53
54 - def __init__(self, known_tags=None):
55 self.__known_tags = Set(known_tags) 56 self.__items = [] 57 self.__keys = Set()
58
59 - def __getitem__(self, *args):
60 if isinstance(args[0], tuple): 61 args = args[0] 62 63 # so we explicitely handle [:] 64 if args == (slice(None),): 65 args = [] 66 67 # lets remove optional modifier '!' 68 dargs = Set([x.lstrip('!') for x in args]).difference( 69 self.__known_tags) 70 71 if len(dargs)>0: 72 raise ValueError, "Unknown internals %s requested. Known are %s" % \ 73 (list(dargs), list(self.__known_tags)) 74 75 # dummy implementation for now 76 result = [] 77 for item in self.__items: 78 good = True 79 for arg in args: 80 if (arg.startswith('!') and \ 81 (arg[1:] in item._clf_internals)) or \ 82 (not arg.startswith('!') and \ 83 (not (arg in item._clf_internals))): 84 good = False 85 break 86 if good: 87 result.append(item) 88 return result
89
90 - def __iadd__(self, item):
91 if operator.isSequenceType(item): 92 for item_ in item: 93 self.__iadd__(item_) 94 else: 95 if not hasattr(item, '_clf_internals'): 96 raise ValueError, "Cannot register %s " % item + \ 97 "which has no _clf_internals defined" 98 if len(item._clf_internals) == 0: 99 raise ValueError, "Cannot register %s " % item + \ 100 "which has empty _clf_internals" 101 clf_internals = Set(item._clf_internals) 102 if clf_internals.issubset(self.__known_tags): 103 self.__items.append(item) 104 self.__keys |= clf_internals 105 else: 106 raise ValueError, 'Unknown clf internal(s) %s' % \ 107 clf_internals.difference(self.__known_tags) 108 return self
109 110 @property
111 - def internals(self):
112 return self.__keys
113
114 - def listing(self):
115 return [(x.descr, x._clf_internals) for x in self.__items]
116 117 @property
118 - def items(self):
119 return self.__items
120 121 clfs = Warehouse(known_tags=_KNOWN_INTERNALS) 122 123 # NB: 124 # - Nu-classifiers are turned off since for haxby DS default nu 125 # is an 'infisible' one 126 # - Python's SMLR is turned off for the duration of development 127 # since it is slow and results should be the same as of C version 128 # 129 clfs += [ SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"), 130 SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"), 131 SMLR(lm=10.0, implementation="C", descr="SMLR(lm=10.0)"), 132 #SMLR(lm=100.0, implementation="C", descr="SMLR(lm=100.0)"), 133 # SMLR(implementation="Python", descr="SMLR(Python)") 134 ] 135 136 clfs += \ 137 [ MulticlassClassifier(clfs['smlr'][0], 138 descr='Pairs+maxvote multiclass on ' + \ 139 clfs['smlr'][0].descr) ] 140 141 if externals.exists('libsvm'): 142 from mvpa.clfs import libsvm 143 clfs += [libsvm.SVM(descr="libsvm.LinSVM(C=def)", probability=1), 144 libsvm.SVM( 145 C=-10.0, descr="libsvm.LinSVM(C=10*def)", probability=1), 146 libsvm.SVM( 147 C=1.0, descr="libsvm.LinSVM(C=1)", probability=1), 148 libsvm.SVM(svm_impl='NU_SVC', 149 descr="libsvm.LinNuSVM(nu=def)", probability=1) 150 ] 151 clfs += [libsvm.SVM(kernel_type='RBF', descr="libsvm.RbfSVM()"), 152 libsvm.SVM(kernel_type='RBF', svm_impl='NU_SVC', 153 descr="libsvm.RbfNuSVM(nu=def)"), 154 libsvm.SVM(kernel_type='poly', 155 descr='libsvm.PolySVM()', probability=1), 156 #libsvm.svm.SVM(kernel_type='sigmoid', 157 # svm_impl='C_SVC', 158 # descr='libsvm.SigmoidSVM()'), 159 ] 160 161 if externals.exists('shogun'): 162 from mvpa.clfs import sg 163 # some classifiers are not yet ready to be used out-of-the-box in 164 # PyMVPA, thus we don't populate warehouse with their instances 165 bad_classifiers = [ 166 'mpd', # was segfault, now non-training on testcases, and XOR. 167 # and was described as "for educational purposes", thus 168 # shouldn't be used for real data ;-) 169 # Should be a drop-in replacement for lightsvm 170 'gpbt', # fails to train for testAnalyzerWithSplitClassifier 171 # also 'retraining' doesn't work -- fails to generalize 172 'gmnp', # would fail with 'assertion Cache_Size > 2' if shogun < 0.6.3, also refuses to train 173 'svrlight', # fails to 'generalize' as a binary classifier after 'binning' 174 'krr', # fails to generalize 175 ] 176 if not externals.exists('sg_fixedcachesize'): 177 # would fail with 'assertion Cache_Size > 2' if shogun < 0.6.3 178 bad_classifiers.append('gnpp') 179 180 for impl in sg.svm.known_svm_impl: 181 # Uncomment the ones to disable 182 if impl in bad_classifiers: 183 continue 184 clfs += [ 185 sg.SVM( 186 descr="sg.LinSVM(C=def)/%s" % impl, svm_impl=impl), 187 sg.SVM( 188 C=-10.0, descr="sg.LinSVM(C=10*def)/%s" % impl, svm_impl=impl), 189 sg.SVM( 190 C=1.0, descr="sg.LinSVM(C=1)/%s" % impl, svm_impl=impl), 191 ] 192 clfs += [ 193 sg.SVM(kernel_type='RBF', descr="sg.RbfSVM()/%s" % impl, svm_impl=impl), 194 # sg.SVM(kernel_type='RBF', descr="sg.RbfSVM(gamma=0.1)/%s" % impl, svm_impl=impl, gamma=0.1), 195 # sg.SVM(descr="sg.SigmoidSVM()/%s" % impl, svm_impl=impl, kernel_type="sigmoid"), 196 ] 197 198 199 200 if len(clfs['svm', 'linear']) > 0: 201 # if any SVM implementation is known, import default ones 202 from mvpa.clfs.svm import * 203 204 # lars from R via RPy 205 if externals.exists('lars'): 206 import mvpa.clfs.lars as lars 207 from mvpa.clfs.lars import LARS 208 for model in lars.known_models: 209 # XXX create proper repository of classifiers! 210 lars = LARS(descr="LARS(%s)" % model, model_type=model) 211 clfs += lars 212 # clfs += MulticlassClassifier(lars, descr='Multiclass %s' % lars.descr) 213 214 # kNN 215 clfs += kNN(k=5, descr="kNN(k=5)") 216 217 clfs += \ 218 FeatureSelectionClassifier( 219 kNN(), 220 SensitivityBasedFeatureSelection( 221 SMLRWeights(SMLR(lm=1.0, implementation="C")), 222 RangeElementSelector(mode='select')), 223 descr="kNN on SMLR(lm=1) non-0") 224 225 clfs += \ 226 FeatureSelectionClassifier( 227 kNN(), 228 SensitivityBasedFeatureSelection( 229 OneWayAnova(), 230 FractionTailSelector(0.05, mode='select', tail='upper')), 231 descr="kNN on 5%(ANOVA)") 232 233 clfs += \ 234 FeatureSelectionClassifier( 235 kNN(), 236 SensitivityBasedFeatureSelection( 237 OneWayAnova(), 238 FixedNElementTailSelector(50, mode='select', tail='upper')), 239 descr="kNN on 50(ANOVA)") 240 241 242 # GPR 243 clfs += GPR(descr="GPR()") 244 245 246 # SVM stuff 247 248 if externals.exists('shogun') or externals.exists('libsvm'): 249 # "Interesting" classifiers 250 clfs += \ 251 FeatureSelectionClassifier( 252 LinearCSVMC(), 253 SensitivityBasedFeatureSelection( 254 SMLRWeights(SMLR(lm=1.0, implementation="C")), 255 RangeElementSelector(mode='select')), 256 descr="LinSVM on SMLR(lm=1) non-0") 257 258 259 # "Interesting" classifiers 260 clfs += \ 261 FeatureSelectionClassifier( 262 LinearCSVMC(), 263 SensitivityBasedFeatureSelection( 264 SMLRWeights(SMLR(lm=1.0, implementation="C")), 265 RangeElementSelector(mode='select')), 266 descr="LinSVM on SMLR(lm=1) non-0") 267 268 269 # "Interesting" classifiers 270 clfs += \ 271 FeatureSelectionClassifier( 272 RbfCSVMC(), 273 SensitivityBasedFeatureSelection( 274 SMLRWeights(SMLR(lm=1.0, implementation="C")), 275 RangeElementSelector(mode='select')), 276 descr="RbfSVM on SMLR(lm=1) non-0") 277 278 clfs += \ 279 FeatureSelectionClassifier( 280 LinearCSVMC(), 281 SensitivityBasedFeatureSelection( 282 OneWayAnova(), 283 FractionTailSelector(0.05, mode='select', tail='upper')), 284 descr="LinSVM on 5%(ANOVA)") 285 286 clfs += \ 287 FeatureSelectionClassifier( 288 LinearCSVMC(), 289 SensitivityBasedFeatureSelection( 290 OneWayAnova(), 291 FixedNElementTailSelector(50, mode='select', tail='upper')), 292 descr="LinSVM on 50(ANOVA)") 293 294 sample_linear_svm = clfs['linear', 'svm'][0] 295 296 clfs += \ 297 FeatureSelectionClassifier( 298 sample_linear_svm, 299 SensitivityBasedFeatureSelection( 300 sample_linear_svm.getSensitivityAnalyzer(transformer=Absolute), 301 FractionTailSelector(0.05, mode='select', tail='upper')), 302 descr="LinSVM on 5%(SVM)") 303 304 clfs += \ 305 FeatureSelectionClassifier( 306 sample_linear_svm, 307 SensitivityBasedFeatureSelection( 308 sample_linear_svm.getSensitivityAnalyzer(transformer=Absolute), 309 FixedNElementTailSelector(50, mode='select', tail='upper')), 310 descr="LinSVM on 50(SVM)") 311 312 313 # SVM with unbiased RFE -- transfer-error to another splits, or in 314 # other terms leave-1-out error on the same dataset 315 # Has to be bound outside of the RFE definition since both analyzer and 316 # error should use the same instance. 317 rfesvm_split = SplitClassifier(LinearCSVMC())#clfs['LinearSVMC'][0]) 318 319 # "Almost" classical RFE. If this works it would differ only that 320 # our transfer_error is based on internal splitting and classifier used 321 # within RFE is a split classifier and its sensitivities per split will get 322 # averaged 323 # 324 325 #clfs += \ 326 # FeatureSelectionClassifier( 327 # clf = LinearCSVMC(), #clfs['LinearSVMC'][0], # we train LinearSVM 328 # feature_selection = RFE( # on features selected via RFE 329 # # based on sensitivity of a clf which does splitting internally 330 # sensitivity_analyzer=rfesvm_split.getSensitivityAnalyzer(), 331 # transfer_error=ConfusionBasedError( 332 # rfesvm_split, 333 # confusion_state="training_confusions"), 334 # # and whose internal error we use 335 # feature_selector=FractionTailSelector( 336 # 0.2, mode='discard', tail='lower'), 337 # # remove 20% of features at each step 338 # update_sensitivity=True), 339 # # update sensitivity at each step 340 # descr='LinSVM+RFE(splits_avg)' ) 341 # 342 #clfs += \ 343 # FeatureSelectionClassifier( 344 # clf = LinearCSVMC(), #clfs['LinearSVMC'][0], # we train LinearSVM 345 # feature_selection = RFE( # on features selected via RFE 346 # # based on sensitivity of a clf which does splitting internally 347 # sensitivity_analyzer=rfesvm_split.getSensitivityAnalyzer(), 348 # transfer_error=ConfusionBasedError( 349 # rfesvm_split, 350 # confusion_state="training_confusions"), 351 # # and whose internal error we use 352 # feature_selector=FractionTailSelector( 353 # 0.2, mode='discard', tail='lower'), 354 # # remove 20% of features at each step 355 # update_sensitivity=False), 356 # # update sensitivity at each step 357 # descr='LinSVM+RFE(splits_avg,static)' ) 358 359 rfesvm = LinearCSVMC() 360 361 # This classifier will do RFE while taking transfer error to testing 362 # set of that split. Resultant classifier is voted classifier on top 363 # of all splits, let see what that would do ;-) 364 #clfs += \ 365 # SplitClassifier( # which does splitting internally 366 # FeatureSelectionClassifier( 367 # clf = LinearCSVMC(), 368 # feature_selection = RFE( # on features selected via RFE 369 # sensitivity_analyzer=\ 370 # rfesvm.getSensitivityAnalyzer(transformer=Absolute), 371 # transfer_error=TransferError(rfesvm), 372 # stopping_criterion=FixedErrorThresholdStopCrit(0.05), 373 # feature_selector=FractionTailSelector( 374 # 0.2, mode='discard', tail='lower'), 375 # # remove 20% of features at each step 376 # update_sensitivity=True)), 377 # # update sensitivity at each step 378 # descr='LinSVM+RFE(N-Fold)') 379 # 380 # 381 #clfs += \ 382 # SplitClassifier( # which does splitting internally 383 # FeatureSelectionClassifier( 384 # clf = LinearCSVMC(), 385 # feature_selection = RFE( # on features selected via RFE 386 # sensitivity_analyzer=\ 387 # rfesvm.getSensitivityAnalyzer(transformer=Absolute), 388 # transfer_error=TransferError(rfesvm), 389 # stopping_criterion=FixedErrorThresholdStopCrit(0.05), 390 # feature_selector=FractionTailSelector( 391 # 0.2, mode='discard', tail='lower'), 392 # # remove 20% of features at each step 393 # update_sensitivity=True)), 394 # # update sensitivity at each step 395 # splitter = OddEvenSplitter(), 396 # descr='LinSVM+RFE(OddEven)') 397