PostgreSQL7.4.6 for Linux

源代码在线查看: makedict.pl

软件大小: 12280 K
上传用户: zln12345
关键词: PostgreSQL Linux for
下载地址: 免注册下载 普通下载 VIP

相关代码

				#!/usr/bin/perl				use strict;				use Getopt::Std;				use locale;								my %opt; 				getopts('l:he:s:ap:o:m:f', \%opt);								if ( $opt{h} || ! ($opt{e}||$opt{s}) || !$opt{l} ) {					print				Generator of variant of the Lovin's stemmer which 				uses a longest match algorithm.								Author Teodor Sigaev 				Usage:				$0 -l LOCALENAME [ -e FILENAME ] [ -s FILENAME ] [ -p PREFIX ] [ -o FILENAME ] [ -a ] [ -m NUMBER ]				-e FILENAME	- file with endings of word				-s FILENAME	- file with list of stop-word				-o FILENAME	- out file, default STDOUT				-a 		- stop-word are strimmed				-p PREFIX	- prefix of function and etc, default strimmed locale				-m NUMBER	- minimal length of rest after semming, default 3				-l LOCALENAME	- name of locale				-f		- do not call tolower for each char				At least one of -e or -s must be defined				EOT				exit; 				}								if ( ! defined $opt{p} ) {					$opt{p} = $opt{l};					$opt{p}=~s/[^a-zA-Z0-9_]+//g;				}				$opt{m}=3 if ! defined $opt{m};								my ($enddata,$stopdata) = ('','');				my $maxchild = 0;								if ( $opt{e} ) {					my @tree;					buildtree(\@tree,  $opt{e}, 1);					printstruct( \@tree,  0, \$enddata);					undef @tree;				} 									if ( $opt{s} ) {					my @tree;					buildtree(\@tree,  $opt{s}, 0);					printstruct( \@tree,  0, \$stopdata);					undef @tree;				} 													die "No data\n" if ( ! (length $enddata || length $stopdata) );								$enddata = "\t{0,0,0,0}" if ( ! length $enddata );				$stopdata = "\t{0,0,0,0}" if ( ! length $stopdata );								my $fh=\*STDOUT;				if ( $opt{o} ) {					open(OUT,">$opt{o}") || die "Can;t open file '$opt{o}' for writing\n";					$fh = \*OUT;				}								my $linktype = 'uint32';				if ( $maxchild 				        $linktype='uint8';				} elsif (  $maxchild 				        $linktype='uint16';				}								my $wherecheck = ( $opt{a} ) ?						"NULL,\n\t$opt{p}_is_stopword"					: 						"$opt{p}_is_stopword,\n\tNULL";								my ($tolower, $resttolower) = ('','');				if ( ! $opt{f} ) {					$tolower = '*cur = tolower( *cur );';					$resttolower=					while( cur - buf >= 0 ) {						*cur = tolower(*cur);						cur--;					}				EOT				}								print {$fh} 				/*				 * Autogenerated file				 *				 * Variant of the Lovin's stemmer which uses a longest match algorithm.				 * Endings are stored in a suffix tree.				 */								#ifdef DICT_BODY				#include 								typedef struct {					uint8	val;					uint8	flag;					uint8	right;					$linktype	child;				} $opt{p}_NODE;								/* is exists left tree ? */				#define L       0x01				/* finish word flag */				#define F       0x02				#define ISLEFT(x)       ((($opt{p}_NODE*)x)->flag & L)				#define ISFINISH(x)     ((($opt{p}_NODE*)x)->flag & F)								#define MINLENREST	$opt{m}								static $opt{p}_NODE $opt{p}_endstree[]={				$enddata				};								static $opt{p}_NODE $opt{p}_stoptree[]={				$stopdata				};								static char*				$opt{p}_stem( void* obj, char *in, int *len ) {					$opt{p}_NODE    *ptr = $opt{p}_endstree;					int     result = 0;					uint8 *buf = (uint8 *)in;					uint8 *cur = buf + (*len) - 1;									while( cur - buf >= MINLENREST ) {						$tolower						if ( ptr->val == *cur ) {							if ( ISFINISH(ptr) ) result = buf + (*len) - cur;							cur--;							if ( ! ptr->child ) break;							ptr += ptr->child;						} else if ( ptr->val > *cur ) {							if ( ISLEFT(ptr) )								ptr++;							else								break;						} else {							if ( ptr->right )								ptr += ptr->right;							else								break;						}					}					$resttolower					*len -= result;					return in;				}								static int				$opt{p}_is_stopword( void *obj, char *in, int len ) {				        $opt{p}_NODE    *ptr = $opt{p}_stoptree;				        int     result = 0;				        uint8 *buf = (uint8 *)in;				        uint8 *cur = buf;								        while( cur - buf < len ) {						$tolower				                if ( ptr->val == *cur ) {				                        cur++;				                        if ( ISFINISH(ptr) ) result = cur - buf;				                        if ( ! ptr->child ) break;				                        ptr += ptr->child;				                } else if ( ptr->val > *cur ) {				                        if ( ISLEFT(ptr) )				                                ptr++;				                        else				                                break;				                } else {				                        if ( ptr->right )				                                ptr += ptr->right;				                        else				                                break;				                }				        }				        return (result==len) ? 1 : 0;				}								#undef L				#undef F				#undef ISLEFT				#undef ISFINISH				#undef MINLENREST 				#endif /* DICT_BODY */								#ifdef DICT_TABLE				TABLE_DICT_START					\"$opt{l}\",					NULL,					NULL,					$opt{p}_stem,					$wherecheck				TABLE_DICT_END				#endif								EOT								close($fh) if ( $fh != \*STDOUT );												sub buildtree {					my ($reftree,$file, $needreverse) = @_;					open(DATA,$file) || die "Can't open file '$file'\n";					while() {						chomp;						next if ! length $_;						$_ = lc($_) if ! $opt{f};						addtostruct( $reftree, ( $needreverse ) ? scalar(reverse($_)) : $_ );					}					close DATA;				}								sub mkbintree {					my ( $start, $stop, $rprop, $rres) = @_;				        					my $middle = $start + int( ($stop-$start)/2 );									push( @$rres, $rprop->[$middle] );					my $idx = $#$rres;					$rres->[$idx]{right}=0;					$rres->[$idx]{left}=0;					return 1 if ( $start == $stop );									my $leftsize = 0;					if ( $middle!=$start ) {						$rres->[$idx]{left}=1;  						$leftsize = mkbintree( $start, $middle-1, $rprop, $rres );						$rres->[$idx]{right}=$leftsize+1;					} else {						$rres->[$idx]{right} = 1;					}					return 1 + $leftsize + mkbintree( $middle+1, $stop, $rprop, $rres );				}								sub addtostruct {					my $node = shift;					my ($char, $subval) = split('', shift, 2);					$char = ord( $char );					if ( ! defined $node->[$char] ) {						$node->[$char] = {};						$node->[$char]{finish} = length $subval;						$node->[$char]{child} = [];					} elsif ( ! length $subval ) {						$node->[$char]{finish} = 0;					}									addtostruct( $node->[$char]{child}, $subval ) if ( length $subval );				}								sub printstruct {					my ($node, $pre, $refout) = @_;					my $add = 0;					my @prop;					my $outchild;					my $current = 0;					my $poschild=0;					my @tmp;									foreach my $i ( 0..255 ) {						next if ( !defined $node->[ $i ] );						push @prop , { val=>$i,							nchild=>printstruct( $node->[ $i ]{child}, 1, \$outchild ),							poschild=>$poschild };						$poschild += $prop[$#prop]{nchild};					}									return 0 if $#prop < 0;					if ($pre) {						$$refout .= ",\n\n";					}					mkbintree(0,$#prop,\@prop,\@tmp);					@prop = @tmp;										$current=$#prop+1;					foreach my $i ( 0..$#prop ) {						my $flag = ($prop[$i]{left}) ? 'L' : undef;						if ( $node->[ $prop[$i]{val} ]{finish}==0 ) {							$flag .= '|' if defined $flag;							$flag .= 'F';						} elsif ( ! defined $flag ) {							$flag='0';						}						$$refout .= "\t{'".chr( $prop[$i]{val} )."',".								$flag.','.								$prop[$i]{right}.','.								(($prop[$i]{nchild}==0)?0:($prop[$i]{poschild}+$current)).'}'.								(($i==$#prop)? '' : ",\n");										$maxchild = $prop[$i]{poschild}+$current if							( $prop[$i]{nchild} && $prop[$i]{poschild}+$current > $maxchild );						$current--;						$add += $prop[$i]{nchild};					}					$$refout .= $outchild;					return $#prop+1 + $add;				}																			

相关资源