这是一款很好用的工具包

源代码在线查看: sort-lm.gawk

软件大小: 3034 K
上传用户: wanghaihah
关键词: 工具包
下载地址: 免注册下载 普通下载 VIP

相关代码

				#!/usr/local/bin/gawk -f				#				# sort-lm --				#	sort the ngrams in an LM in lexicographic order, as required for 				#	some other LM software (notably CMU's).				#				# usage: sort-lm lm-file > sorted-lm-file				#				# $Header: /home/srilm/devel/utils/src/RCS/sort-lm.gawk,v 1.2 2004/11/02 02:00:35 stolcke Exp $				#								BEGIN {					sorter = "";					currorder = 0;				}				NF==0 {					print;					next;				}				/^ngram *[0-9][0-9]*=/ {					order = substr($2,1,index($2,"=")-1);					print;					next;				}				/^\\[0-9]-grams:/ {					if (sorter) {					    close(sorter);					}									currorder = substr($0,2,1);					print;					fflush();									# set up new sorting pipeline;					sorter = "sort";					for (i = 1; i 						sorter = sorter " +" i " -" (i+1);					}					# print sorter >> "/dev/stderr";					next;				}				/^\\/ {					if (sorter) {					    close(sorter);					    sorter = "";					}					currorder = 0;					print; next;				}				currorder && NF > 1 {					print | sorter;					next;				}				{						print;				}							

相关资源