Ripper 分类算法
源代码在线查看: clean-data-main.c
/***************************************************************************** * clean-data-main.c --- main routine to remove conflicting examples ****************************************************************************/ #include #include "ripper.h" #include "protos.h" #include "mdb.h" /******************************************************************************/ char *Program="clean-data"; char *Help_str[] = { "syntax: clean-data [options] [stem]", " 'clean' dataset by removing examples with inconsistent labels", "", "options are:", " -c class: assume inconsistent examples are of class c", " -s: read from stdinput", " -v#: set verbosity", NULL }; static BOOL equiv_example(example_t *,example_t *); main(argc,argv) int argc; char *argv[]; { vec_t *data; char *stem; int use_stdin; int i,j,o; example_t *exi,*exj; char *def_class_name=NULL; symbol_t *def_class=NULL; /* defaults */ use_stdin = FALSE; set_trace_level(SUMM); while ((o=getopt(argc,argv,"stv:hc:"))!=EOF) { switch (o) { case 'c': def_class_name = optarg; break; case 's': use_stdin = TRUE; break; case 'v': set_trace_level(atoi(optarg)); break; case 'h': case '?': default: give_help(); if (o=='h') exit(0); else fatal("option not implemented"); } } if (optind stem = argv[optind++]; ld_names(add_ext(stem,".names")); if (use_stdin) data = ld_data(NULL); else data = ld_data(add_ext(stem,".data")); } else { data = ld_data(NULL); } if (optind warning("not all arguments were used: %s ...",argv[optind]); } if (!data || vmax(data)==0) fatal("no examples"); if (def_class_name) { def_class = intern(def_class_name); } for (i=0; i exi = vref(example_t,data,i); for (j=i+1; j exj = vref(example_t,data,j); if (equiv_example(exi,exj)) { if (def_class != NULL) { exi->lab.nom = def_class; } swap_out_example(data,j); } else { j++; } } } print_data(data); } static BOOL equiv_example(example_t *exi,example_t *exj) { int k,m; aval_t *aik,*ajk; symbol_t *sikm,*sjkm; if (vmax(exi->inst)!=vmax(exj->inst)) return FALSE; for (k=0; kinst); k++) { aik = vref(aval_t,exi->inst,k); ajk = vref(aval_t,exj->inst,k); if (aik->kind != ajk->kind) return FALSE; switch (aik->kind) { case MISSING_VALUE: case IGNORE: break; case SYMBOL: if (aik->u.nom != ajk->u.nom) return FALSE; break; case CONTINUOUS: if (aik->u.num != ajk->u.num) return FALSE; break; case SET: if (vmax(aik->u.set) != vmax(ajk->u.set)) return FALSE; for (m=0; mu.set); m++) { sikm = *vref(symbol_t *,aik->u.set,m); sjkm = *vref(symbol_t *,ajk->u.set,m); if (sikm != sjkm) return FALSE; } break; } } return TRUE; }