Hidden Markov Model. Structure and Function collection. This Class implements a Hidden Markov Model. Several functions for tasks such as training,reading/writing models, reading observations, calculation of derivatives are supplied.
|
Public Member Functions |
| bool | alloc_state_dependend_arrays () |
| | allocates memory that depends on N
|
| void | free_state_dependend_arrays () |
| | free memory that depends on N
|
| bool | linear_train (bool right_align=false) |
| | estimates linear model from observations.
|
| bool | permutation_entropy (INT window_width, INT sequence_number) |
| | compute permutation entropy
|
| virtual INT | get_num_relevant_model_parameters () |
| virtual DREAL | get_log_likelihood_sample () |
| virtual void | get_log_likelihood (DREAL **dst, INT *num) |
| virtual DREAL | get_model_parameter (INT num_param) |
| virtual DREAL | get_derivative (INT num_param, INT num_example) |
| virtual DREAL | get_likelihood_example (INT num_example) |
| virtual void | set_features (CFeatures *f) |
| virtual CFeatures * | get_features () |
| virtual void | set_pseudo_count (DREAL pseudo) |
| virtual DREAL | get_pseudo_count () |
|
Train definitions. Encapsulates Modelparameters that are constant/shall be learned. Consists of structures and access functions for learning only defined transitions and constants.
|
| | CHMM (INT N, INT M, CModel *model, DREAL PSEUDO) |
| | CHMM (CStringFeatures< WORD > *obs, INT N, INT M, DREAL PSEUDO) |
| | CHMM (INT N, double *p, double *q, double *a) |
| | CHMM (INT N, double *p, double *q, int num_trans, double *a_trans) |
| | CHMM (FILE *model_file, DREAL PSEUDO) |
| | CHMM (CHMM *h) |
| | Constructor - Clone model h.
|
| virtual | ~CHMM () |
| | Destructor - Cleanup.
|
| virtual bool | train () |
| virtual INT | get_num_model_parameters () |
| virtual DREAL | get_log_model_parameter (INT num_param) |
| virtual DREAL | get_log_derivative (INT num_param, INT num_example) |
| virtual DREAL | get_log_likelihood_example (INT num_example) |
| bool | initialize (CModel *model, DREAL PSEUDO, FILE *model_file=NULL) |
|
forward/backward/viterbi algorithm
|
| DREAL | forward_comp (INT time, INT state, INT dimension) |
| DREAL | forward_comp_old (INT time, INT state, INT dimension) |
| DREAL | backward_comp (INT time, INT state, INT dimension) |
| DREAL | backward_comp_old (INT time, INT state, INT dimension) |
| DREAL | best_path (INT dimension) |
| WORD | get_best_path_state (INT dim, INT t) |
| DREAL | model_probability_comp () |
| DREAL | model_probability (INT dimension=-1) |
| | inline proxy for model probability.
|
| DREAL | linear_model_probability (INT dimension) |
|
| bool | set_iterations (INT num) |
| INT | get_iterations () |
| bool | set_epsilon (DREAL eps) |
| DREAL | get_epsilon () |
| bool | baum_welch_viterbi_train (BaumWelchViterbiType type) |
|
| void | estimate_model_baum_welch (CHMM *train) |
| void | estimate_model_baum_welch_old (CHMM *train) |
| void | estimate_model_baum_welch_trans (CHMM *train) |
| void | estimate_model_baum_welch_defined (CHMM *train) |
| void | estimate_model_viterbi (CHMM *train) |
| void | estimate_model_viterbi_defined (CHMM *train) |
|
| void | output_model (bool verbose=false) |
| void | output_model_defined (bool verbose=false) |
| | performs output_model only for the defined transitions etc
|
|
| void | normalize (bool keep_dead_states=false) |
| | normalize the model to satisfy stochasticity
|
| void | add_states (INT num_states, DREAL default_val=0) |
| bool | append_model (CHMM *append_model, DREAL *cur_out, DREAL *app_out) |
| bool | append_model (CHMM *append_model) |
| void | chop (DREAL value) |
| | set any model parameter with probability smaller than value to ZERO
|
| void | convert_to_log () |
| | convert model to log probabilities
|
| void | init_model_random () |
| | init model with random values
|
| void | init_model_defined () |
| void | clear_model () |
| | initializes model with log(PSEUDO)
|
| void | clear_model_defined () |
| | initializes only parameters in learn_x with log(PSEUDO)
|
| void | copy_model (CHMM *l) |
| | copies the the modelparameters from l
|
| void | invalidate_model () |
| bool | get_status () const |
| DREAL | get_pseudo () const |
| | returns current pseudo value
|
| void | set_pseudo (DREAL pseudo) |
| | sets current pseudo value
|
|
| void | set_observations (CStringFeatures< WORD > *obs, CHMM *lambda=NULL) |
| void | set_observation_nocache (CStringFeatures< WORD > *obs) |
| CStringFeatures< WORD > * | get_observations () |
| | return observation pointer
|
|
for observations/model/traindefinitions
|
| bool | load_definitions (FILE *file, bool verbose, bool initialize=true) |
| bool | load_model (FILE *file) |
| bool | save_model (FILE *file) |
| bool | save_model_derivatives (FILE *file) |
| bool | save_model_derivatives_bin (FILE *file) |
| bool | save_model_bin (FILE *file) |
| bool | check_model_derivatives () |
| | numerically check whether derivates were calculated right
|
| bool | check_model_derivatives_combined () |
| T_STATES * | get_path (INT dim, DREAL &prob) |
| bool | save_path (FILE *file) |
| bool | save_path_derivatives (FILE *file) |
| bool | save_path_derivatives_bin (FILE *file) |
| bool | save_likelihood_bin (FILE *file) |
| bool | save_likelihood (FILE *file) |
|
for all the arrays a,b,p,q,A,B,psi and scalar model parameters like N,M
|
| T_STATES | get_N () const |
| | access function for number of states N
|
| INT | get_M () const |
| | access function for number of observations M
|
| void | set_q (T_STATES offset, DREAL value) |
| void | set_p (T_STATES offset, DREAL value) |
| void | set_A (T_STATES line_, T_STATES column, DREAL value) |
| void | set_a (T_STATES line_, T_STATES column, DREAL value) |
| void | set_B (T_STATES line_, WORD column, DREAL value) |
| void | set_b (T_STATES line_, WORD column, DREAL value) |
| void | set_psi (INT time, T_STATES state, T_STATES value, INT dimension) |
| DREAL | get_q (T_STATES offset) const |
| DREAL | get_p (T_STATES offset) const |
| DREAL | get_A (T_STATES line_, T_STATES column) const |
| DREAL | get_a (T_STATES line_, T_STATES column) const |
| DREAL | get_B (T_STATES line_, WORD column) const |
| DREAL | get_b (T_STATES line_, WORD column) const |
| T_STATES | get_psi (INT time, T_STATES state, INT dimension) const |
|
management and access functions for observation matrix
|
| DREAL | state_probability (INT time, INT state, INT dimension) |
| | calculates probability of being in state i at time t for dimension
|
| DREAL | transition_probability (INT time, INT state_i, INT state_j, INT dimension) |
| | calculates probability of being in state i at time t and state j at time t+1 for dimension
|
|
computes log dp(lambda)/d lambda_i - Parameters:
-
| dimension | dimension for that derivatives are calculated |
| i,j | parameter specific |
|
| DREAL | linear_model_derivative (T_STATES i, WORD j, INT dimension) |
| DREAL | model_derivative_p (T_STATES i, INT dimension) |
| DREAL | model_derivative_q (T_STATES i, INT dimension) |
| DREAL | model_derivative_a (T_STATES i, T_STATES j, INT dimension) |
| | computes log dp(lambda)/d a_ij.
|
| DREAL | model_derivative_b (T_STATES i, WORD j, INT dimension) |
| | computes log dp(lambda)/d b_ij.
|
|
computes d log p(lambda,best_path)/d lambda_i - Parameters:
-
| dimension | dimension for that derivatives are calculated |
| i,j | parameter specific |
|
| DREAL | path_derivative_p (T_STATES i, INT dimension) |
| | computes d log p(lambda,best_path)/d p_i
|
| DREAL | path_derivative_q (T_STATES i, INT dimension) |
| | computes d log p(lambda,best_path)/d q_i
|
| DREAL | path_derivative_a (T_STATES i, T_STATES j, INT dimension) |
| | computes d log p(lambda,best_path)/d a_ij
|
| DREAL | path_derivative_b (T_STATES i, WORD j, INT dimension) |
| | computes d log p(lambda,best_path)/d b_ij
|
Static Public Attributes |
| static CParallel | parallel |
| static CIO | io |
| static CVersion | version |
Protected Member Functions |
| void | prepare_path_derivative (INT dim) |
| | initialization function that is called before path_derivatives are calculated
|
| DREAL | forward (INT time, INT state, INT dimension) |
| | inline proxies for forward pass
|
| DREAL | backward (INT time, INT state, INT dimension) |
| | inline proxies for backward pass
|
|
for reading model/definition/observation files
|
| bool | get_numbuffer (FILE *file, CHAR *buffer, INT length) |
| | put a sequence of numbers into the buffer
|
| void | open_bracket (FILE *file) |
| | expect open bracket.
|
| void | close_bracket (FILE *file) |
| | expect closing bracket
|
| bool | comma_or_space (FILE *file) |
| | expect comma or space.
|
| void | error (INT p_line, const CHAR *str) |
| | parse error messages
|
Protected Attributes |
| DREAL * | arrayN1 |
| DREAL * | arrayN2 |
| T_ALPHA_BETA | alpha_cache |
| | cache for forward variables can be terrible HUGE O(T*N)
|
| T_ALPHA_BETA | beta_cache |
| | cache for backward variables can be terrible HUGE O(T*N)
|
| T_STATES * | states_per_observation_psi |
| | backtracking table for viterbi can be terrible HUGE O(T*N)
|
| T_STATES * | path |
| | best path (=state sequence) through model
|
| bool | path_prob_updated |
| | true if path probability is up to date
|
| INT | path_prob_dimension |
| | dimension for which path_prob was calculated
|
| CFeatures * | features |
| DREAL | pseudo_count |
|
these are p,q,a,b,N,M etc
|
| INT | M |
| | number of observation symbols eg. ACGT -> 0123
|
| INT | N |
| | number of states
|
| DREAL | PSEUDO |
| | define pseudocounts against overfitting
|
| INT | line |
| CStringFeatures< WORD > * | p_observations |
| | observation matrix
|
| CModel * | model |
| DREAL * | transition_matrix_A |
| | matrix of absolute counts of transitions
|
| DREAL * | observation_matrix_B |
| | matrix of absolute counts of observations within each state
|
| DREAL * | transition_matrix_a |
| | transition matrix
|
| DREAL * | initial_state_distribution_p |
| | initial distribution of states
|
| DREAL * | end_state_distribution_q |
| | distribution of end-states
|
| DREAL * | observation_matrix_b |
| | distribution of observations within each state
|
| INT | iterations |
| | convergence criterion iterations
|
| INT | iteration_count |
| DREAL | epsilon |
| | convergence criterion epsilon
|
| INT | conv_it |
| DREAL | all_pat_prob |
| | probability of best path
|
| DREAL | pat_prob |
| | probability of best path
|
| DREAL | mod_prob |
| | probability of model
|
| bool | mod_prob_updated |
| | true if model probability is up to date
|
| bool | all_path_prob_updated |
| | true if path probability is up to date
|
| INT | path_deriv_dimension |
| | dimension for which path_deriv was calculated
|
| bool | path_deriv_updated |
| | true if path derivative is up to date
|
| bool | loglikelihood |
| bool | status |
| bool | reused_caches |
Static Protected Attributes |
| static const INT | GOTN = (1<<1) |
| static const INT | GOTM = (1<<2) |
| static const INT | GOTO = (1<<3) |
| static const INT | GOTa = (1<<4) |
| static const INT | GOTb = (1<<5) |
| static const INT | GOTp = (1<<6) |
| static const INT | GOTq = (1<<7) |
| static const INT | GOTlearn_a = (1<<1) |
| static const INT | GOTlearn_b = (1<<2) |
| static const INT | GOTlearn_p = (1<<3) |
| static const INT | GOTlearn_q = (1<<4) |
| static const INT | GOTconst_a = (1<<5) |
| static const INT | GOTconst_b = (1<<6) |
| static const INT | GOTconst_p = (1<<7) |
| static const INT | GOTconst_q = (1<<8) |
| bool CHMM::load_definitions |
( |
FILE * |
file, |
|
|
bool |
verbose, |
|
|
bool |
initialize = true | |
|
) |
| | |
read definitions file (learn_x,const_x) used for training. -format specs: definition_file (train.def) % HMM-TRAIN - specification % learn_a - elements in state_transition_matrix to be learned % learn_b - elements in oberservation_per_state_matrix to be learned % note: each line stands for % state, observation(0), observation(1)...observation(NOW) % learn_p - elements in initial distribution to be learned % learn_q - elements in the end-state distribution to be learned % % const_x - specifies initial values of elements % rest is assumed to be 0.0 % % NOTE: IMPLICIT DEFINES: % define A 0 % define C 1 % define G 2 % define T 3
learn_a=[ [INT,INT]; [INT,INT]; [INT,INT]; ........ [INT,INT]; [-1,-1]; ];
learn_b=[ [INT,INT,INT,...,INT]; [INT,INT,INT,...,INT]; [INT,INT,INT,...,INT]; ........ [INT,INT,INT,...,INT]; [-1,-1]; ];
learn_p= [ INT, ... , INT, -1 ];
learn_q= [ INT, ... , INT, -1 ];
const_a=[ [INT,INT,DREAL]; [INT,INT,DREAL]; [INT,INT,DREAL]; ........ [INT,INT,DREAL]; [-1,-1,-1]; ];
const_b=[ [INT,INT,INT,...,INT,DREAL]; [INT,INT,INT,...,INT,DREAL]; [INT,INT,INT,...,INT,<DOUBLE]; ........ [INT,INT,INT,...,INT,DREAL]; [-1,-1,-1]; ];
const_p[]=[ [INT, DREAL], ... , [INT,DREAL], [-1,-1] ]; const_q[]=[ [INT, DREAL], ... , [INT,DREAL], [-1,-1] ];
- Parameters:
-
| file | filehandle to definitions file |
| verbose | true for verbose messages |
| initialize | true to initialize to underlying HMM |
Definition at line 3460 of file HMM.cpp.