这是一款很好用的工具包

源代码在线查看: make-meta-counts.gawk

软件大小: 3034 K
上传用户: wanghaihah
关键词: 工具包
下载地址: 免注册下载 普通下载 VIP

相关代码

				#!/usr/local/bin/gawk -f				#				# make-meta-counts --				#	Apply N-gram count cut-offs and insert meta-counts (counts-of-counts)				#				# $Header: /home/srilm/devel/utils/src/RCS/make-meta-counts.gawk,v 1.2 2002/07/22 21:24:45 stolcke Exp $				#				BEGIN {					order = 3;					# trust_total=1 means we don't have to generate meta-counts, just					# apply the cut-offs (in combination with ngram-count -trust-totals)					trust_totals = 0;					metatag = "__META__";				}								NR == 1 {					mincount[1] = mincount1 + 0;					mincount[2] = mincount2 + 0;					mincount[3] = mincount3 + 0;					mincount[4] = mincount4 + 0;					mincount[5] = mincount5 + 0;					mincount[6] = mincount6 + 0;					mincount[7] = mincount7 + 0;					mincount[8] = mincount8 + 0;					mincount[9] = mincount9 + 0;				}								NF > order + 1 {					next;				}								NF > 1 {				    this_order = NF - 1;								    if (!trust_totals) {					# output buffered ngrams of higher order IF there was at least 					# one non-meta count of the respective order					for (i = order; i > this_order; i --) {					    if (have_counts[i]) {						printf "%s", buffer[i];						have_counts[i] = 0;					    }					    delete buffer[i];					}				    }								    if ($NF < mincount[this_order]) {					if (trust_totals) {					    next;					} else {					    # convert below-cutoff ngram to meta-ngram					    $this_order = metatag int($NF);					    $NF = 1;									    # add it to buffer					    buffer[this_order] = buffer[this_order] $0 "\n";					}				    } else {					have_counts[this_order] = 1;					print;				    }								}								END {				    # output any remaining buffered ngrams				    for (i = order; i >= 1; i --) {					if (have_counts[i]) {					    printf "%s", buffer[i];					}				    }				}											

相关资源