可自动识别自然语言,句子匹配等功能,达到人工智能识别

源代码在线查看: matcher.cpp

软件大小: 20 K
上传用户: renyumeng
关键词: 自动识别 人工智能 语言 识别
下载地址: 免注册下载 普通下载 VIP

相关代码

				/////////////////////////////////////////////////////////////////////////////////
				//
				// Sentence matcher
				// @author: Gonzales Cenelia
				// homepage: www.ai-search.4t.com
				//
				// when applied to strings,the "Levenstein distance" is usualy used 
				// to measure the distances between words but we could also use it to measure 
				// the distance between sentences,the following code is a simple implementation 
				// of an algorithm that trys to measure the distances between two sentences 
				// by using the "Levenstein distance". //********************
				//
				// this code is copyrighted and has limited warranty. //***************
				//////////////////////////////////////////////////////////////////////////
				#include "matcher.h"
				
				unsigned int tokenized_pattern_size;
				unsigned int tokenized_text_size;
				
				// tokenize a string and puts the result into a vector
				void tokenize_string(const char *str, vstring &dest) {
					int len = strlen(str);
					char *szString = new char[len + 1];
					strcpy(szString, str);
					char *token = strtok(szString, seps);
					while(token != NULL) {
						dest.push_back(token);
						token = strtok(NULL, seps);
					}
					delete szString;
				}
				
				// search for a string inside a vector of strings
				int search_string(const char *str, vstring v) {
					int size = v.size();
					for(int i = 0; i < size; ++i) {
						if(v[i] == str) {
							return i;
						}
					}
					return -1;
				}
				
				// compress sentences by replacing each word 
				// by characters of the english alphabet
				void compress_strings(char *pattern, char *text) {
					vstring v1, v2, temp;
					tokenize_string(pattern, v1);
					tokenize_string(text, v2);
					std::stringstream s1, s2;
					int size1 = v1.size();
					int size2 = v2.size();
					tokenized_pattern_size = size1;
					tokenized_text_size = size2;
					for(int i = 0, j = -1, prev_index; i < size1; ++i) {
						if(search_string(v1[i].c_str(), temp) == -1) {
							temp.push_back(v1[i].c_str());
							++j; prev_index = j;
						}
						s1 					}
					int index = prev_index, counter = 0, result;
					for(i = 0; i < size2; ++i) {
						result = search_string(v2[i].c_str(), temp);
						if(result == -1) {
							temp.push_back(v2[i].c_str());
							if(counter == 0) {
								index = prev_index + 1;
								++counter;
							}
							else {
								++index;
							}
						}
						else {
							if(index > prev_index) {
								prev_index = index;
								counter = 0;
							}
							index = result;
						}
						s2 					}
					strcpy(pattern, s1.str().c_str());
					strcpy(text, s2.str().c_str());
				}
				
				// measure the distance between two sentences by using 
				// the "Levenshtein Distance"
				int find_distance(const char *pattern, const char *text) {
					int patLen = strlen(pattern);
					int textLen = strlen(text);
				
					char *pszPattern = new char[patLen + 1];
					char *pszText = new char[textLen + 1];
				
					strcpy(pszPattern, pattern);
					strcpy(pszText, text);
				
					// compress the given sentences
					compress_strings(pszPattern, pszText);
				
					patLen = strlen(pszPattern);
					textLen = strlen(pszText);
					// measure the distances between the new compressed strings
					int retval = LD(pszText, textLen, pszPattern, patLen);
				
					delete pszPattern;
					delete pszText;
					return retval;
				}
				
				// Fuzzy Sentence Match
				// function for matching two sentences
				// returns a value between 0 and 1
				// the closer that value is to 0 the closer the sentences are
				float match(const char *pattern, const char *text) {
					int distance = find_distance(pattern, text);
					float max_size = max(tokenized_pattern_size, tokenized_text_size);
					return distance/max_size;
				}
				
				
				
				
							

相关资源