这是一款很好用的工具包

源代码在线查看: add-pauses-to-pfsg.gawk

软件大小: 3034 K
上传用户: wanghaihah
关键词: 工具包
下载地址: 免注册下载 普通下载 VIP

相关代码

				#!/usr/local/bin/gawk -f				#				# add-pauses-to-pfsg --				#	Modify Decipher PFSG to allow pauses between words				#				# $Header: /home/srilm/devel/utils/src/RCS/add-pauses-to-pfsg.gawk,v 1.11 2002/06/29 19:59:12 stolcke Exp $				#				BEGIN {					pause = "-pau-";					top_level_name = "TOP_LEVEL";					pause_filler_name = "PAUSE_FILLER";					null = "NULL";									wordwrap = 1;		# wrap pause filler around words					pauselast = 0;		# make pauses follow wrapped words					version = 0;		# no "version" line by default									# portable way to test for lowercase characters					# check for high-order bit is supposed to catch multibyte characters					word_pattern = "[[:lower:]\x80-\xFF]";							if ("a" !~ word_pattern) word_pattern = "[a-z\x80-\xFF]";				}								#				# output the TOP_LEVEL model				#	oldname is the name of the original pfsg				function print_top_level(oldname) {					if (version) {						print "version " version "\n";					}					print "name " top_level_name;					if (pauselast) {					    print "nodes 4 " null " " pause_filler_name " " oldname " " null;					} else {					    print "nodes 4 " null " " oldname " " pause_filler_name " " null;					}					print "initial 0"					print "final 3"					print "transitions 4"					print "0 1 0"					print "1 2 0"					if (pauselast) {					    print "0 2 0"					} else {					    print "1 3 0"					}					print "2 3 0"					print "";				}								function word_wrapper_name(word) {					return "_" word "_PF";				}								#				# output a pause wrapper for word				#				function print_word_wrapper(word) {					print "name " word_wrapper_name(word);					if (pauselast) {					    print "nodes 3 " word " " pause_filler_name " " null;					} else {					    print "nodes 3 " null " " pause_filler_name " " word;					}					print "initial 0";					print "final 2";					print "transitions 3";					print "0 1 0";					print "1 2 0";					print "0 2 0";					print "";				}								#				# output the pause filler				#				function print_pause_filler() {					print "name " pause_filler_name;					print "nodes 4 " null " " null " " pause " " null;					print "initial 0";					print "final 3";					print "transitions 4";					print "0 1 0";					print "1 2 0";					print "2 3 0";					print "2 1 0";				}								NF == 0 {					print;					next;				}								#				# read vocabulary list if supplied				#				NR == 1 && vocab != "" {					while (getline line < vocab) {					    if (split(line, a)) {						word_list[a[1]] = 1;					    }					}					close (vocab);				}								#				# check that a node name is word				# if a vocabulary was not specified we use the following heuristic:				# word nodes contain at least one lowercase or non-ascii character and are not				# surrounded by "*...*" (which indicates a class name).				#				function is_word(w) {					if (vocab) {					    return w in word_list;					} else {					    return w !~ /^\*.*\*$/ && w ~ word_pattern;					}				}								#				# first time we see a pfsg name, issue a top-level wrapper for it.				#				$1 == "name" && !have_top_level {					print_top_level($2);					print;					have_top_level = 1;					next;				}								#				# maps word nodes to wrapper nodes				#				$1 == "nodes" {					numnodes = $2;					printf "nodes %d", numnodes;									for (i = 0; i < numnodes; i ++) {					    node_name = $(i + 3);									    # if it contains lowercase characters it's a word and					    # needs to wrapped					    if (wordwrap && is_word(node_name)) {						if (!(node_name in all_words)) {						    all_words[node_name] = 1;						    words[++num_words] = node_name;						}						printf " %s", word_wrapper_name(node_name);					    } else {						printf " %s", node_name;					    }					}					printf "\n";					next;				}								{					print;				}								END {					#					# output the word wrappers					#					if (wordwrap) {					    for (i = 1; i 						print_word_wrapper(words[i]);					    }					}									print_pause_filler();				}							

相关资源