用httpclient-4.0-alpha2 打造基于http协议的网站分析器
源代码在线查看: javascript.java
package com.ue.browser;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.ue.browser.core.HTMLElement;
import com.ue.browser.Browser;
import com.ue.browser.util.*;
import com.ue.browser.core.Event;
import com.ue.browser.core.EventCode;
public class Javascript {
private static ArrayList events = EventCode.getEvents();
private ArrayList jevents = new ArrayList();
private ArrayList elements;
private Browser browser;
private String firsturl;
static String regex = "function\\s+([^\\(\\s]+)";
static Pattern p = Pattern.compile(regex);
public Javascript(ArrayList elements,Browser browser){
this.setElements(elements);
this.setBrowser(browser);
}
public Javascript(ArrayList elements,String firsturl){
this.setElements(elements);
this.firsturl = firsturl;
}
public ArrayList getJevents() {
return jevents;
}
public void setJevents(ArrayList jevents) {
this.jevents = jevents;
}
public ArrayList getElements() {
return elements;
}
public void setElements(ArrayList elements) {
this.elements = elements;
}
public Browser getBrowser() {
return browser;
}
public void setBrowser(Browser browser) {
this.browser = browser;
}
//获取页面中元素的事件名
public ArrayList eventFunction(){
ArrayList al = new ArrayList();
try {
for(HTMLElement he:elements){
for(String event:events){
if(he.getAttribute(event)!= null){
al.add(he.getTag()+": "+he.getAttribute(event));
}
}
}
}
catch(Exception e) {
e.printStackTrace();
}
return al;
}
//从分析出来的HTML元素中将事件对象插入拥有事件处理器的元素
public void buildEvents(){
try {
for(HTMLElement he:elements){
for(String event:events){
if(he.getAttribute(event)!= null){
/*
*
String name = he.getTag().toUpperCase();
String clazz = elementMap.get(name);
if(clazz!=null){
Class tt = Class.forName(thePack + clazz);
Constructor c = tt.getConstructor(Browser.class);
element = (HTMLElement)c.newInstance(this.getBrowser());
*/
int eCode = EventUtil.getEventType(event);
he.setEvents(new Event(eCode,he.getAttribute(event)));
jevents.add(new Event(eCode,he.getAttribute(event)));
}
}
}
}
catch(Exception e) {
e.printStackTrace();
}
}
//获取页面中JS的函数名
public ArrayList parseInterFunction(){
ArrayList al = new ArrayList();
try {
for(HTMLElement he:elements){
String tagname = he.getTag();
if("Script".equals(tagname)){
if(he.getAttribute("daima")!=null){
//System.out.println(he.getAttribute("daima"));
Matcher m = p.matcher(he.getAttribute("daima"));
while (m.find()) {
String fname = m.group(1).trim();
al.add(fname);
}
}
}
}
}
catch(Exception e)
{
e.printStackTrace();
}
return al;
}
//获取外部服务器引用的JS函数名
public ArrayList parseOuterFunction(){
Browser browser = new Browser();
ArrayList al = new ArrayList();
String jsurl = "";
String jsResult = "";
try {
for(HTMLElement he:elements){
String tagname = he.getTag();
if("Script".equals(tagname) && he.getAttribute("src")!= null) {
jsurl = he.getAttribute("src");
//拿到需要访问的JS的URL
String url = "";
if(firsturl!=null){
url = firsturl;
}else{
url = he.getBrowser().getDocument().getUrl();
}
String aurl = StringUtil.getDomain(url);
String lastpath = StringUtil.getLastPath(url);
String protocol = StringUtil.getProtocol(url);
/*由于脚本引用的多样性,在被使用前必须得判断,否则httpclient会被抛出异常
*默认协议为http,可以调用StringUtil类中对协议的判断方法
* */
//默认协议地址
if(jsurl.startsWith("//")) jsurl = protocol + ":" + jsurl;
//绝对路径
if(StringUtil.isSlashStart(jsurl)) jsurl = aurl + jsurl;
//相对路径之下级目录
if(StringUtil.isLetterOrNumberStart(jsurl) && !jsurl.startsWith("http:") && !jsurl.startsWith("https:")){
jsurl = lastpath + "/" + jsurl;
}
//相对路径之上级目录
if(jsurl.startsWith("..")) {
int n = StringUtil.DoubleDotNum(jsurl);
String truepath = StringUtil.getTruePath(url,n);
String filename = StringUtil.getAppath(jsurl);
jsurl = truepath + "/" + filename;
}
if(StringUtil.isJsFile(jsurl)){
//System.err.println(jsurl);
jsResult = browser.JsNavigate(jsurl);
if(jsResult != null){
Matcher m = p.matcher(jsResult);
while (m.find()) {
String fname = m.group(1).trim();
al.add(fname);
}
}
}
}
}
}
catch(Exception e){
e.printStackTrace();
}
return al;
}
}