这是一款很好用的工具包

源代码在线查看: reverse-lm.gawk

软件大小: 3034 K
上传用户: wanghaihah
关键词: 工具包
下载地址: 免注册下载 普通下载 VIP

相关代码

				#!/usr/local/bin/gawk -f				#				# reverse-lm --				#	reverse N-grams in a backoff LM				#				# usage: reverse-lm lm-file > rev-lm-file				#				# $Header: /home/srilm/devel/utils/src/RCS/reverse-lm.gawk,v 1.2 2004/11/02 02:00:35 stolcke Exp $				#								BEGIN {					start_tag = "";					end_tag = "";									renorm_command = "ngram -debug 1 -order 2 -lm - -renorm -write-lm -";				}				NF==0 {					print | renorm_command;					next;				}				/^ngram *[0-9][0-9]*=/ {					order = substr($2,1,index($2,"=")-1);									if (order > 2) {						print "can handle bigram LMs only" >> "/dev/stderr";						exit(2);					}					print | renorm_command;					next;				}				/^\\[0-9]-grams:/ {					currorder=substr($0,2,1);					print | renorm_command;					next;				}				/^\\/ {					print | renorm_command;					next;				}				currorder == 1 {					# unigrams are copied unchanged					# store probs for later use									prob = $1;					word = $2;					if (word == start_tag) {					    ; # get  unigram prob from 					} else if (word == end_tag) {					    uniprob[start_tag] = uniprob[end_tag] = prob;					} else {					    uniprob[word] = prob;					}									# add dummy backoff weight					$3 = "0";					print | renorm_command;					next;				}								function map_tags(w) {					if (w == start_tag) {					    return end_tag;					} else if (w == end_tag) {					    return start_tag;					} else {					    return w;					}				}								currorder == 2 {					# bigrams are reverse and new probabilities are assigned 					prob = $1;					w1 = map_tags($2);					w2 = map_tags($3);									# p_rev(w1|w2) = p(w1) p(w2|w1) / p(w2)					new_prob = uniprob[w1] + prob - uniprob[w2];									if (new_prob > 0) {						print "warning: p(" w1 "|" w2 ") > 0" >> "/dev/stderr";					}									print new_prob "\t" w2 " " w1 | renorm_command;					next;				}							

相关资源