從Internet上抓取指定URL的源碼的方案(C#)
2024-07-21 02:18:22
供稿:網友
調用方式:
#region 測試獲取遠程網頁
getpagecode gpc = new getpagecode();
gpc.url="http://ppcode.com/";
gpc.proxystate=1;//使用代理服務器,0為不使用,設置為1后下面的代理設置才起作用
gpc.proxyaddress="http://proxyname.com";//代理服務器地址
gpc.proxyport="80";//代理服務器的端口
gpc.proxyaccount="proxy";//代理服務器賬號
gpc.proxypassword="password";//代理服務器密碼
gpc.proxydomain="bqc";//代理服務器域
gpc.outfilepath=filepath;//設置輸出文件路徑的地方,如果不設置,則返回字符串
gpc.getsource();//處理
string temperr=gpc.notemessage;//如果出錯,這里會提示
string tempcode=gpc.outstring;//返回的字符串
#endregion
類代碼:
using system;
using system.collections;
using system.componentmodel;
using system.data;
using system.drawing;
using system.io;
using system.net;
using system.text;
using system.web;
namespace test.com
{
/// <summary>
/// 功能:取得internet上的url頁的源碼
/// 創建:2004-03-22
/// 作者:rexsp msn:[email protected]
/// </summary>
public class getpagecode
{
#region 私有變量
/// <summary>
/// 網頁url地址
/// </summary>
private string url=null;
/// <summary>
/// 是否使用代碼服務器:0 不使用 1 使用代理服務器
/// </summary>
private int proxystate=0;
/// <summary>
/// 代理服務器地址
/// </summary>
private string proxyaddress=null;
/// <summary>
/// 代理服務器端口
/// </summary>
private string proxyport=null;
/// <summary>
/// 代理服務器用戶名
/// </summary>
private string proxyaccount=null;
/// <summary>
/// 代理服務器密碼
/// </summary>
private string proxypassword=null;
/// <summary>
/// 代理服務器域
/// </summary>
private string proxydomain=null;
/// <summary>
/// 輸出文件路徑
/// </summary>
private string outfilepath=null;
/// <summary>
/// 輸出的字符串
/// </summary>
private string outstring=null;
/// <summary>
/// 提示信息
/// </summary>
private string notemessage;
#endregion
#region 公共屬性
/// <summary>
/// 欲讀取的url地址
/// </summary>
public string url
{
get{return url;}
set{url=value;}
}
/// <summary>
/// 是否使用代理服務器標志
/// </summary>
public int proxystate
{
get{return proxystate;}
set{proxystate=value;}
}
/// <summary>
/// 代理服務器地址
/// </summary>
public string proxyaddress
{
get{return proxyaddress;}
set{proxyaddress=value;}
}
/// <summary>
/// 代理服務器端口
/// </summary>
public string proxyport
{
get{return proxyport;}
set{proxyport=value;}
}
/// <summary>
/// 代理服務器賬號
/// </summary>
public string proxyaccount
{
get{return proxyaccount;}
set{proxyaccount=value;}
}
/// <summary>
/// 代理服務器密碼
/// </summary>
public string proxypassword
{
get{return proxypassword;}
set{proxypassword=value;}
}
/// <summary>
/// 代理服務器域
/// </summary>
public string proxydomain
{
get{return proxydomain;}
set{proxydomain=value;}
}
/// <summary>
/// 輸出文件路徑
/// </summary>
public string outfilepath
{
get{return outfilepath;}
set{outfilepath=value;}
}
/// <summary>
/// 返回的字符串
/// </summary>
public string outstring
{
get{return outstring;}
}
/// <summary>
/// 返回提示信息
/// </summary>
public string notemessage
{
get{return notemessage;}
}
#endregion
#region 構造函數
public getpagecode()
{
}
#endregion
#region 公共方法
/// <summary>
/// 讀取指定url地址,存到指定文件中
/// </summary>
public void getsource()
{
webrequest request = webrequest.create(this.url);
//使用代理服務器的處理
if(this.proxystate==1)
{
//默認讀取80端口的數據
if(this.proxyport==null)
this.proxyport="80";
webproxy myproxy=new webproxy();
myproxy = (webproxy)request.proxy;
myproxy.address = new uri(this.proxyaddress+":"+this.proxyport);
myproxy.credentials = new networkcredential(this.proxyaccount, this.proxypassword, this.proxydomain);
request.proxy = myproxy;
}
try
{
//請求服務
webresponse response = request.getresponse();
//返回信息
stream resstream = response.getresponsestream();
streamreader sr = new streamreader(resstream, system.text.encoding.default);
string tempcode= sr.readtoend();
resstream.close();
sr.close();
//如果輸出文件路徑為空,便將得到的內容賦給outstring屬性
if(this.outfilepath==null)
{
this.outstring=tempcode;
}
else
{
fileinfo fi = new fileinfo(this.outfilepath);
//如果存在文件則先干掉
if(fi.exists)
fi.delete();
streamwriter sw = new streamwriter(this.outfilepath,true,encoding.default);
sw.write(tempcode);
sw.flush();
sw.close();
}
}
catch
{
this.notemessage="出錯了,請檢查網絡是否連通;";
}
}
#endregion
}
}