1
2
3
4
5
6
7
8
9 """Collection of classifiers to ease the exploration.
10 """
11
12 __docformat__ = 'restructuredtext'
13
14 from sets import Set
15 import operator
16
17
18 from mvpa.datasets.splitter import OddEvenSplitter
19
20
21 from mvpa.clfs.base import FeatureSelectionClassifier, SplitClassifier, \
22 MulticlassClassifier
23 from mvpa.clfs.smlr import SMLR
24 from mvpa.clfs.knn import kNN
25 from mvpa.clfs.gpr import GPR
26
27
28 from mvpa.clfs.transerror import TransferError
29 from mvpa.base import externals
30 from mvpa.measures.anova import OneWayAnova
31 from mvpa.misc.transformers import Absolute
32 from mvpa.featsel.rfe import RFE
33 from mvpa.clfs.smlr import SMLRWeights
34 from mvpa.featsel.helpers import FractionTailSelector, \
35 FixedNElementTailSelector, RangeElementSelector, \
36 FixedErrorThresholdStopCrit
37 from mvpa.clfs.transerror import ConfusionBasedError
38 from mvpa.featsel.base import SensitivityBasedFeatureSelection
39
40 _KNOWN_INTERNALS=[ 'knn', 'binary', 'svm', 'linear',
41 'smlr', 'does_feature_selection', 'has_sensitivity',
42 'multiclass', 'non-linear', 'kernel-based', 'lars',
43 'regression', 'libsvm', 'sg', 'meta', 'retrainable', 'gpr' ]
46 """Class to keep known instantiated classifiers
47
48 Should provide easy ways to select classifiers of needed kind:
49 clfs['linear', 'svm'] should return all linear SVMs
50 clfs['linear', 'multiclass'] should return all linear classifiers
51 capable of doing multiclass classification
52 """
53
55 self.__known_tags = Set(known_tags)
56 self.__items = []
57 self.__keys = Set()
58
60 if isinstance(args[0], tuple):
61 args = args[0]
62
63
64 if args == (slice(None),):
65 args = []
66
67
68 dargs = Set([x.lstrip('!') for x in args]).difference(
69 self.__known_tags)
70
71 if len(dargs)>0:
72 raise ValueError, "Unknown internals %s requested. Known are %s" % \
73 (list(dargs), list(self.__known_tags))
74
75
76 result = []
77 for item in self.__items:
78 good = True
79 for arg in args:
80 if (arg.startswith('!') and \
81 (arg[1:] in item._clf_internals)) or \
82 (not arg.startswith('!') and \
83 (not (arg in item._clf_internals))):
84 good = False
85 break
86 if good:
87 result.append(item)
88 return result
89
91 if operator.isSequenceType(item):
92 for item_ in item:
93 self.__iadd__(item_)
94 else:
95 if not hasattr(item, '_clf_internals'):
96 raise ValueError, "Cannot register %s " % item + \
97 "which has no _clf_internals defined"
98 if len(item._clf_internals) == 0:
99 raise ValueError, "Cannot register %s " % item + \
100 "which has empty _clf_internals"
101 clf_internals = Set(item._clf_internals)
102 if clf_internals.issubset(self.__known_tags):
103 self.__items.append(item)
104 self.__keys |= clf_internals
105 else:
106 raise ValueError, 'Unknown clf internal(s) %s' % \
107 clf_internals.difference(self.__known_tags)
108 return self
109
110 @property
113
116
117 @property
120
121 clfs = Warehouse(known_tags=_KNOWN_INTERNALS)
122
123
124
125
126
127
128
129 clfs += [ SMLR(lm=0.1, implementation="C", descr="SMLR(lm=0.1)"),
130 SMLR(lm=1.0, implementation="C", descr="SMLR(lm=1.0)"),
131 SMLR(lm=10.0, implementation="C", descr="SMLR(lm=10.0)"),
132
133
134 ]
135
136 clfs += \
137 [ MulticlassClassifier(clfs['smlr'][0],
138 descr='Pairs+maxvote multiclass on ' + \
139 clfs['smlr'][0].descr) ]
140
141 if externals.exists('libsvm'):
142 from mvpa.clfs import libsvm
143 clfs += [libsvm.SVM(descr="libsvm.LinSVM(C=def)", probability=1),
144 libsvm.SVM(
145 C=-10.0, descr="libsvm.LinSVM(C=10*def)", probability=1),
146 libsvm.SVM(
147 C=1.0, descr="libsvm.LinSVM(C=1)", probability=1),
148 libsvm.SVM(svm_impl='NU_SVC',
149 descr="libsvm.LinNuSVM(nu=def)", probability=1)
150 ]
151 clfs += [libsvm.SVM(kernel_type='RBF', descr="libsvm.RbfSVM()"),
152 libsvm.SVM(kernel_type='RBF', svm_impl='NU_SVC',
153 descr="libsvm.RbfNuSVM(nu=def)"),
154 libsvm.SVM(kernel_type='poly',
155 descr='libsvm.PolySVM()', probability=1),
156
157
158
159 ]
160
161 if externals.exists('shogun'):
162 from mvpa.clfs import sg
163
164
165 bad_classifiers = [
166 'mpd',
167
168
169
170 'gpbt',
171
172 'gmnp',
173 'svrlight',
174 'krr',
175 ]
176 if not externals.exists('sg_fixedcachesize'):
177
178 bad_classifiers.append('gnpp')
179
180 for impl in sg.svm.known_svm_impl:
181
182 if impl in bad_classifiers:
183 continue
184 clfs += [
185 sg.SVM(
186 descr="sg.LinSVM(C=def)/%s" % impl, svm_impl=impl),
187 sg.SVM(
188 C=-10.0, descr="sg.LinSVM(C=10*def)/%s" % impl, svm_impl=impl),
189 sg.SVM(
190 C=1.0, descr="sg.LinSVM(C=1)/%s" % impl, svm_impl=impl),
191 ]
192 clfs += [
193 sg.SVM(kernel_type='RBF', descr="sg.RbfSVM()/%s" % impl, svm_impl=impl),
194
195
196 ]
197
198
199
200 if len(clfs['svm', 'linear']) > 0:
201
202 from mvpa.clfs.svm import *
203
204
205 if externals.exists('lars'):
206 import mvpa.clfs.lars as lars
207 from mvpa.clfs.lars import LARS
208 for model in lars.known_models:
209
210 lars = LARS(descr="LARS(%s)" % model, model_type=model)
211 clfs += lars
212
213
214
215 clfs += kNN(k=5, descr="kNN(k=5)")
216
217 clfs += \
218 FeatureSelectionClassifier(
219 kNN(),
220 SensitivityBasedFeatureSelection(
221 SMLRWeights(SMLR(lm=1.0, implementation="C")),
222 RangeElementSelector(mode='select')),
223 descr="kNN on SMLR(lm=1) non-0")
224
225 clfs += \
226 FeatureSelectionClassifier(
227 kNN(),
228 SensitivityBasedFeatureSelection(
229 OneWayAnova(),
230 FractionTailSelector(0.05, mode='select', tail='upper')),
231 descr="kNN on 5%(ANOVA)")
232
233 clfs += \
234 FeatureSelectionClassifier(
235 kNN(),
236 SensitivityBasedFeatureSelection(
237 OneWayAnova(),
238 FixedNElementTailSelector(50, mode='select', tail='upper')),
239 descr="kNN on 50(ANOVA)")
240
241
242
243 clfs += GPR(descr="GPR()")
244
245
246
247
248 if externals.exists('shogun') or externals.exists('libsvm'):
249
250 clfs += \
251 FeatureSelectionClassifier(
252 LinearCSVMC(),
253 SensitivityBasedFeatureSelection(
254 SMLRWeights(SMLR(lm=1.0, implementation="C")),
255 RangeElementSelector(mode='select')),
256 descr="LinSVM on SMLR(lm=1) non-0")
257
258
259
260 clfs += \
261 FeatureSelectionClassifier(
262 LinearCSVMC(),
263 SensitivityBasedFeatureSelection(
264 SMLRWeights(SMLR(lm=1.0, implementation="C")),
265 RangeElementSelector(mode='select')),
266 descr="LinSVM on SMLR(lm=1) non-0")
267
268
269
270 clfs += \
271 FeatureSelectionClassifier(
272 RbfCSVMC(),
273 SensitivityBasedFeatureSelection(
274 SMLRWeights(SMLR(lm=1.0, implementation="C")),
275 RangeElementSelector(mode='select')),
276 descr="RbfSVM on SMLR(lm=1) non-0")
277
278 clfs += \
279 FeatureSelectionClassifier(
280 LinearCSVMC(),
281 SensitivityBasedFeatureSelection(
282 OneWayAnova(),
283 FractionTailSelector(0.05, mode='select', tail='upper')),
284 descr="LinSVM on 5%(ANOVA)")
285
286 clfs += \
287 FeatureSelectionClassifier(
288 LinearCSVMC(),
289 SensitivityBasedFeatureSelection(
290 OneWayAnova(),
291 FixedNElementTailSelector(50, mode='select', tail='upper')),
292 descr="LinSVM on 50(ANOVA)")
293
294 sample_linear_svm = clfs['linear', 'svm'][0]
295
296 clfs += \
297 FeatureSelectionClassifier(
298 sample_linear_svm,
299 SensitivityBasedFeatureSelection(
300 sample_linear_svm.getSensitivityAnalyzer(transformer=Absolute),
301 FractionTailSelector(0.05, mode='select', tail='upper')),
302 descr="LinSVM on 5%(SVM)")
303
304 clfs += \
305 FeatureSelectionClassifier(
306 sample_linear_svm,
307 SensitivityBasedFeatureSelection(
308 sample_linear_svm.getSensitivityAnalyzer(transformer=Absolute),
309 FixedNElementTailSelector(50, mode='select', tail='upper')),
310 descr="LinSVM on 50(SVM)")
311
312
313
314
315
316
317 rfesvm_split = SplitClassifier(LinearCSVMC())
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359 rfesvm = LinearCSVMC()
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397