用httpclient-4.0-alpha2 打造基于http协议的网站分析器

源代码在线查看: javascript.java

软件大小: 2411 K
上传用户: cong328
关键词: httpclient alpha http 4.0
下载地址: 免注册下载 普通下载 VIP

相关代码

				package com.ue.browser;
				
				import java.util.ArrayList;
				import java.util.regex.Matcher;
				import java.util.regex.Pattern;
				
				import com.ue.browser.core.HTMLElement;
				import com.ue.browser.Browser;
				import com.ue.browser.util.*;
				import com.ue.browser.core.Event;
				import com.ue.browser.core.EventCode;
				
				public class Javascript {
					
					private static ArrayList events = EventCode.getEvents();
					
					private ArrayList jevents = new ArrayList();
					private ArrayList elements;
					private Browser browser;
					private String firsturl;
						
					static String regex = "function\\s+([^\\(\\s]+)";
					static Pattern p = Pattern.compile(regex);
					
						
					public Javascript(ArrayList elements,Browser browser){
						this.setElements(elements);
						this.setBrowser(browser);
					}
					
					public Javascript(ArrayList elements,String firsturl){
						this.setElements(elements);
						this.firsturl = firsturl;
					}
						
					public ArrayList getJevents() {
						return jevents;
					}
				
					public void setJevents(ArrayList jevents) {
						this.jevents = jevents;
					}
					
					public ArrayList getElements() {
						return elements;
					}
				
					public void setElements(ArrayList elements) {
						this.elements = elements;
					}
					
					public Browser getBrowser() {
						return browser;
					}
				
					public void setBrowser(Browser browser) {
						this.browser = browser;
					}
					
				    //获取页面中元素的事件名
					public ArrayList eventFunction(){
						ArrayList al = new ArrayList();
						try	{
							for(HTMLElement he:elements){
								for(String event:events){
								  if(he.getAttribute(event)!= null){
									al.add(he.getTag()+": "+he.getAttribute(event));
									}
								}
								}
						}
						catch(Exception e)	{
							e.printStackTrace();
						}
						return al;
					}
					
					//从分析出来的HTML元素中将事件对象插入拥有事件处理器的元素
					public void buildEvents(){
						try	{
							for(HTMLElement he:elements){
								for(String event:events){
									if(he.getAttribute(event)!= null){
									  /*
									   *
									  String name = he.getTag().toUpperCase();
									  String clazz = elementMap.get(name);
									  if(clazz!=null){
									  Class tt = Class.forName(thePack + clazz);
									  Constructor c = tt.getConstructor(Browser.class);
									  element = (HTMLElement)c.newInstance(this.getBrowser());
									  */
									  int eCode = EventUtil.getEventType(event);
									  he.setEvents(new Event(eCode,he.getAttribute(event)));
									  jevents.add(new Event(eCode,he.getAttribute(event)));
									  }
									}
								  }
								}
						catch(Exception e)	{
							e.printStackTrace();
						}
						
					}
						
					//获取页面中JS的函数名
					public ArrayList parseInterFunction(){
						ArrayList al = new ArrayList();
				       	try	{
							for(HTMLElement he:elements){
								String tagname = he.getTag();
								  if("Script".equals(tagname)){
									if(he.getAttribute("daima")!=null){
										//System.out.println(he.getAttribute("daima"));
										 Matcher m = p.matcher(he.getAttribute("daima"));
										 while (m.find()) {
											String fname = m.group(1).trim();
											al.add(fname);
										 }
									}
								}
								
							}
						}
						catch(Exception e)
						{
							e.printStackTrace();
						}
						return al;
					}
					
				    //获取外部服务器引用的JS函数名
					public ArrayList parseOuterFunction(){
						Browser browser = new Browser();
						ArrayList al = new ArrayList();
						String jsurl = "";
						String jsResult = "";
						
				        try	{
						for(HTMLElement he:elements){
						String tagname = he.getTag();
						if("Script".equals(tagname) && he.getAttribute("src")!= null) {
							jsurl = he.getAttribute("src");
				            //拿到需要访问的JS的URL
							String url = "";
							if(firsturl!=null){
								url = firsturl;
							}else{
								url = he.getBrowser().getDocument().getUrl();
							}
							String aurl = StringUtil.getDomain(url);
							String lastpath = StringUtil.getLastPath(url);
							String protocol = StringUtil.getProtocol(url);
							/*由于脚本引用的多样性,在被使用前必须得判断,否则httpclient会被抛出异常
							 *默认协议为http,可以调用StringUtil类中对协议的判断方法
							 * */
							  //默认协议地址
							  if(jsurl.startsWith("//"))		  jsurl = protocol + ":" + jsurl;
							  //绝对路径
							  if(StringUtil.isSlashStart(jsurl))  jsurl = aurl + jsurl;
							  //相对路径之下级目录
							  if(StringUtil.isLetterOrNumberStart(jsurl) && !jsurl.startsWith("http:") && !jsurl.startsWith("https:")){
								  jsurl = lastpath + "/" + jsurl;
							  }
							  //相对路径之上级目录
							  if(jsurl.startsWith("..")) {
								  int n = StringUtil.DoubleDotNum(jsurl);
								  String truepath = StringUtil.getTruePath(url,n);
								  String filename = StringUtil.getAppath(jsurl);
								  jsurl = truepath + "/" + filename;
							  }
							
							if(StringUtil.isJsFile(jsurl)){
								//System.err.println(jsurl);
							    jsResult = browser.JsNavigate(jsurl);
				                if(jsResult != null){
								   Matcher m = p.matcher(jsResult);
								   while (m.find()) {
									String fname = m.group(1).trim();
									al.add(fname);
								 }
							}
						   }
						  }
						}
						}
						catch(Exception e){
							e.printStackTrace();
						}
						return al;
					}
				
				
				
				}
							

相关资源