Ripper 分类算法

源代码在线查看: clean-data-main.c

软件大小: 2155 K
上传用户: xufengping716
关键词: Ripper 分类算法
下载地址: 免注册下载 普通下载 VIP

相关代码

				/*****************************************************************************				 * clean-data-main.c --- main routine to remove conflicting examples				 ****************************************************************************/								#include 				#include "ripper.h"				#include "protos.h"				#include "mdb.h"								/******************************************************************************/				char *Program="clean-data";				char *Help_str[] = {				    "syntax: clean-data [options] [stem]",				    "   'clean' dataset by removing examples with inconsistent labels",				    "",				    "options are:",				    "  -c class: assume inconsistent examples are of class c",				    "  -s:       read from stdinput",				    "  -v#:      set verbosity",				    NULL				};								static BOOL equiv_example(example_t *,example_t *);								main(argc,argv)				int argc;				char *argv[];				{				    vec_t *data;				    char *stem;				    int use_stdin;				    int i,j,o;				    example_t *exi,*exj;				    char *def_class_name=NULL;				    symbol_t *def_class=NULL;				    				    /* defaults */				    use_stdin = FALSE;				    set_trace_level(SUMM);								    while ((o=getopt(argc,argv,"stv:hc:"))!=EOF) {					switch (o) {					  case 'c':					    def_class_name = optarg;					    break;					  case 's':					    use_stdin = TRUE;					    break;					  case 'v':					    set_trace_level(atoi(optarg)); 					    break;					  case 'h':					  case '?':					  default: 					    give_help();					    if (o=='h') exit(0);					    else fatal("option not implemented");					}				    }				    if (optind					stem = argv[optind++];					ld_names(add_ext(stem,".names"));					if (use_stdin) data = ld_data(NULL);					else data = ld_data(add_ext(stem,".data"));				    } else {					data = ld_data(NULL);				    }								    if (optind					warning("not all arguments were used: %s ...",argv[optind]);				    }								    if (!data || vmax(data)==0) fatal("no examples");								    if (def_class_name) {					def_class = intern(def_class_name);				    }				    for (i=0; i					exi = vref(example_t,data,i);					for (j=i+1; j					    exj = vref(example_t,data,j);					    if (equiv_example(exi,exj)) {						if (def_class != NULL) {						    exi->lab.nom = def_class;						}						swap_out_example(data,j);					    } else {						j++;					    }					}				    }				    print_data(data);				}								static BOOL equiv_example(example_t *exi,example_t *exj)				{				    int k,m;				    aval_t *aik,*ajk;				    symbol_t *sikm,*sjkm;								    if (vmax(exi->inst)!=vmax(exj->inst)) return FALSE;				    for (k=0; kinst); k++) {					aik = vref(aval_t,exi->inst,k);					ajk = vref(aval_t,exj->inst,k);					if (aik->kind != ajk->kind) return FALSE;					switch (aik->kind) {					  case MISSING_VALUE:					  case IGNORE:					    break;					  case SYMBOL:					    if (aik->u.nom != ajk->u.nom) return FALSE;					    break;					  case CONTINUOUS:					    if (aik->u.num != ajk->u.num) return FALSE;					    break;					  case SET:					    if (vmax(aik->u.set) != vmax(ajk->u.set)) return FALSE;					    for (m=0; mu.set); m++) {						sikm = *vref(symbol_t *,aik->u.set,m);						sjkm = *vref(symbol_t *,ajk->u.set,m);						if (sikm != sjkm) return FALSE;					    }					    break;					}				    }				    return TRUE;				}							

相关资源