Thursday, March 16, 2006

C#的一个URL加载器,能处理编码、相对地址解析、GET/POST、HTML的include、页面重定向

要让机器模拟上网,首要的问题是解决HTTP请求响应,看我们的Url加载器,功能比较强。它考虑了编码、URL的相对地址解析(见RFC),还可以POST数据,还有HTML里的,还有里的重定向,很好用的。

以下代码随便用吧,咱们已经拿这段代码申请过著作权拉。
1 /**////
2 /// 最基本的Url加载函数,其它重载函数均调用它
3 ///

4 ///
5 ///
6 ///
7 /// 是否在客户端包含include文件
8 /// 计算重定向的次数
9 ///
10 public static string LoadUrl(ref UrlOperation uo, string encoding, string postdata, bool include, int redirectioncounter)
11 {
12 string str;
13
14 string url=uo.Url;
15 HttpWebRequest request;
16 HttpWebResponse response;
17
18 //采用HTTP GET或者POST
19 if (postdata == null)
20 postdata = "";
21 if (postdata.Length == 0)//HTTP GET
22 {
23 try
24 {
25 request = (HttpWebRequest)System.Net.HttpWebRequest.Create(url);
26 }
27 catch
28 {
29 return "";
30 }
31
32 request.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows 98)";
33
34 //超时异常发生在这里
35 try
36 {
37 response = (HttpWebResponse)request.GetResponse();
38 //uo.Url = response.ResponseUri.ToString();
39 }
40 catch
41 {
42 return "";
43 }
44
45 System.IO.Stream stream = response.GetResponseStream();
46
47 Encoding source;
48 try
49 {
50 source = Encoding.GetEncoding(encoding);
51 }
52 catch
53 {
54 source = Encoding.UTF8;
55 }
56
57 StreamReader sr = new StreamReader(stream, source);
58 try
59 {
60 str = sr.ReadToEnd();
61 }
62 catch
63 {
64 return "";
65 }
66 sr.Close();
67 stream.Close();
68 }
69 else//HTTP POST
70 {
71 try
72 {
73 ASCIIEncoding asciiencoding = new ASCIIEncoding();
74 byte[] bytes = asciiencoding.GetBytes(postdata);
75
76 request = (HttpWebRequest)System.Net.HttpWebRequest.Create(url);
77 request.Method = "POST";
78 request.ContentType = "application/x-www-form-urlencoded";
79 request.ContentLength = postdata.Length;
80
81 Stream poststream = request.GetRequestStream();
82 poststream.Write(bytes, 0, bytes.Length);
83 poststream.Close();
84
85 response = (HttpWebResponse)request.GetResponse();
86
87 StreamReader sr = new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding("GB2312"));
88 str = sr.ReadToEnd();
89 response.Close();
90 }
91 catch
92 {
93 return "";
94 }
95 }
96
97 uo.Url = response.ResponseUri.ToString();
98
99 //在客户端包含include文件
100 if (include)
101 {
102 System.Text.RegularExpressions.Regex regex = new Regex(@"", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline);
103 MatchCollection mc = regex.Matches(str);
104 if (mc.Count > 0)
105 {
106 System.Text.RegularExpressions.Regex urlregex = new Regex("(?<=\").*(?=\")", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline); 107 108 string[] segments = regex.Split(str); 109 110 StringBuilder sb = new StringBuilder(); 111 sb.Append(segments[0]); 112 for (int i = 1; i <= mc.Count; i++) 113 { 114 string s = mc[i - 1].Value; 115 string newurl = urlregex.Match(s).Value; 116 UrlOperation newuo = uo.Forward(newurl); 117 string included = LoadUrl(ref newuo, encoding, "", true); 118 sb.Append(included); 119 sb.Append(segments[i]); 120 } 121 122 str = sb.ToString(); 123 } 124 } 125 126 //页面重定向 127 string redirection=GetRedirection(str).Trim(); 128 if (redirection.Length > 0&&amp;redirectioncounter<5)
129 {
130 uo=uo.Forward(redirection);
131 return LoadUrl(ref uo, encoding, postdata, include, redirectioncounter + 1);
132 }
133 else
134 return str;
135 }
136

0 Comments:

Post a Comment

Subscribe to Post Comments [Atom]

<< Home