源码地带 > 电路图 > 电子资料下载 > 其他 >这是一款很好用的工具包 > 查看压缩包源码

这是一款很好用的工具包

源代码在线查看： rank-vocab.gawk

软件大小：	3034 K
上传用户：	wanghaihah
关键词：	工具包
下载地址：	免注册下载普通下载


相关代码
rank-vocab.gawk htklat-vocab.gawk pfsg-vocab.gawk nbest-vocab.gawk compute-oov-rate.gawk rank.c rank.c rank.c

				#!/usr/local/bin/gawk -f				#				# rank-vocab --				#	Given K different rankings of candidate vocabularies, and 				# 	a held-out optimization unigram count file, optimize the 				#	combined ranking of words				#				# usage: rank-vocab counts words1 words2 ... worksK				#				# $Header: /home/srilm/devel/utils/src/RCS/rank-vocab.gawk,v 1.2 2004/11/02 02:00:35 stolcke Exp $				#								BEGIN {					num_sources = 0;					num_output = 0;					num_oovs = 0;									debug = 0;				}												ARGIND == 1 {					word_count[$1] = $2;									num_oovs += $2;									next;				}								ARGIND > 1 {					k = ARGIND - 1;					num_sources = k;									num_words[k] ++;									word_ranked[k, num_words[k]] = $1;					next;				}								function dump_words(k) {					print "source " k " words:";									for (i = 1; i 					    print i, word_ranked[k,i];					}				}								# find the next word from source k that occurs in the test set				# return 0 if no more words are available				function find_next(k) {					for (j = last_chosen[k] + 1; j 					    if (word_count[word_ranked[k,j]] > 0) {						if (debug) {						    print "next word rank for source " k ": " j >> "/dev/stderr";						}										return j;					    }					}					if (debug) {					    print "no more words from source " k >> "/dev/stderr";					}					return 0;				}								# compute gain (number of OOVs tokens reduced per number of word types added)				# by adding the next word from source k				function compute_gain(k) {					if (next_word[k] == 0) {					    # no more words in source k, no gain					    return -1;					} else {					    g = word_count[word_ranked[k,next_word[k]]] / (next_word[k] - last_chosen[k]);					    if (debug) {						print "next gain for source " k " = " g;					    }					    return g;					}				}								END {				#	for (k = 1; k 				#	    dump_words(k);				#	}									for (k = 1; k 					    last_chosen[k] = 0;					    next_word[k] = find_next(k);					    gain[k] = compute_gain(k);					}									print "INITIAL OOVS = " num_oovs;									# add words until no more gain possible (i.e., until all source					# words have been used up)					while (1) {					    best_gain = -1;					    best_source = 0;									    # find next best source to pick word from					    for (k = 1; k 						if (gain[k] > best_gain) {							best_source = k;							best_gain = gain[k];						}					    }									    if (best_gain < 0) break;									    # process all the words from source k up to the one chosen 					    for (i = last_chosen[best_source] + 1; \						 i 						 i ++) {						word_chosen = word_ranked[best_source,i] 										if (debug) {						    print "source = " best_source \							  " gain = " best_gain \							  " word = " word_chosen >> "/dev/stderr";						}										# output the word if it hasn't been already						if (!was_output[word_chosen]) {						    num_output ++;										    num_oovs -= word_count[word_chosen];										    print "RANK " num_output " WORD " word_chosen \								" OOVS " num_oovs;										    was_output[word_chosen] = 1;						}					    }									    # update the statistics for the source that was chosen					    last_chosen[best_source] = next_word[best_source];					    next_word[best_source] = find_next(best_source);					    gain[best_source] = compute_gain(best_source);					}				}


相关资源
这是一款很好用的工具包这是一款很好用的B/S结构的酒店管理系统简单这是一本很好用的VHDL编程书这是一本很好用的VHDL编程书 UltraEdit是一款很好用的编辑软件这是一款很好的SQL多用户版程序这是一款简单易用的自动升级及更新软件这是一款很好的登陆软件

这是一款很好用的工具包

源代码在线查看： rank-vocab.gawk

相关代码

相关资源

友情链接