这是一款很好用的工具包

源代码在线查看: pfsg-to-fsm.gawk

软件大小: 3034 K
上传用户: wanghaihah
关键词: 工具包
下载地址: 免注册下载 普通下载 VIP

相关代码

				#!/usr/local/bin/gawk -f				# 				# pfsg-to-fsm --				#	convert a Decipher PFSG to AT&T FSM format				#				# usage: pfsg-to-fsm [symbolfile=SYMFILE] [symbolic=1] [scale=S] file.pfsg > file.fsm				#				# symbolic=1		retains output word strings in the fsm file.				# symbolfile=SYMFILE 	dump output symbol table to SYMFILE				#			(to be used with fsmcompile|fsmdraw|fsmprint -i SYMFILE)				# scale=S		set transition weight scaling factor to S				#			(default -1)				# 				#				# $Header: /home/srilm/devel/utils/src/RCS/pfsg-to-fsm.gawk,v 1.14 2004/11/02 02:00:35 stolcke Exp $				#				BEGIN {					empty_output = "NULL";					output_symbols[empty_output] = 0;					numoutputs = 1;									if ("pid" in PROCINFO) {					    pid = PROCINFO["pid"];					} else {					    getline pid < "/dev/pid";					}					tmpfile = "/tmp/pfsg.tmp" pid;									# hack to remove tmpfile when killed					print "" | "trap '/bin/rm -f " tmpfile "' 0 1 2 15 30; cat >/dev/null";									symbolfile = "";					symbolic = 0;									scale = -1;	# scaling of transition weights					nofinal = 0;	# do output final node definition					final_output = "";				}				$1 == "nodes" {					numnodes = $2;									for (i = 0; i < numnodes; i++) {						node_output[i] = $(i + 3);										if (!(node_output[i] in output_symbols)) {							output_symbols[node_output[i]] = numoutputs++;						}					}									next;				}				$1 == "initial" {					initial_node = $2;									if (node_output[initial_node] != empty_output) {						print "initial node must be NULL" >> "/dev/stderr";						exit 1;					}					next;				}				$1 == "final" {					final_node = $2;									if (final_output) {						node_output[final_node] = final_output;						if (!(final_output in output_symbols)) {							output_symbols[final_output] = numoutputs++;						}					}					next;				}								function print_trans(from_node, to_node, cost) {					if (to_node == final_node && node_output[final_node] == empty_output) {						print from_node, scale * cost;					} else {						# PFSG bytelogs have to be negated to FSM default semiring						print from_node, to_node, \							(symbolic ? node_output[to_node] : \							 output_symbols[node_output[to_node]]), \							scale * cost;					}				}								function print_final() {					# if the final node is non-emitting, we don't need to output it					# at all (see print_trans above)					if (!nofinal && node_output[final_node] != empty_output) {						print final_node, 0;					}				}								$1 == "transitions" {					num_transitions = $2;									# process the transitions and map them to FSM transitions and					# final states.					# FSM requires the first transition to be out of the initial state,					# so we scan the transitions twice.					# The first time, to find the initial transitions, then					# to add all the others. Yuck!					for (k = 1; k 						getline;										from_node = $1;						to_node = $2;						cost = $3;										if (from_node == initial_node) {							print_trans(from_node, to_node, cost);						} else {							print > tmpfile;						}					}					close(tmpfile);									# output definition of the final node					print_final();									# now process all the non-initial transitions					while (getline < tmpfile) {						from_node = $1;						to_node = $2;						cost = $3;										print_trans(from_node, to_node, cost);					}									next;				}								END {					# dump out the symbol table					if (symbolfile) {						for (s in output_symbols) {							print s, output_symbols[s] > symbolfile;						}					}				}							

相关资源