麻豆小视频在线观看_中文黄色一级片_久久久成人精品_成片免费观看视频大全_午夜精品久久久久久久99热浪潮_成人一区二区三区四区

首頁 > 學院 > 開發設計 > 正文

C#實現網頁爬蟲

2019-11-14 13:30:18
字體:
來源:轉載
供稿:網友

HTTP請求工具類(功能:1、獲取網頁html;2、下載網絡圖片;):

using System;using System.Collections.Generic;using System.IO;using System.Linq;using System.Net;using System.Text;using System.Threading.Tasks;using System.Windows.Forms;namespace Utils{    /// <summary>    /// HTTP請求工具類    /// </summary>    public class HttPRequestUtil    {        /// <summary>        /// 獲取頁面html        /// </summary>        public static string GetPageHtml(string url)        {            // 設置參數            HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest;            request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)";            //發送請求并獲取相應回應數據            HttpWebResponse response = request.GetResponse() as HttpWebResponse;            //直到request.GetResponse()程序才開始向目標網頁發送Post請求            Stream responseStream = response.GetResponseStream();            StreamReader sr = new StreamReader(responseStream, Encoding.UTF8);            //返回結果網頁(html)代碼            string content = sr.ReadToEnd();            return content;        }        /// <summary>        /// Http下載文件        /// </summary>        public static void HttpDownloadFile(string url)        {            int pos = url.LastIndexOf("/") + 1;            string fileName = url.Substring(pos);            string path = application.StartupPath + "//download";            if (!Directory.Exists(path))            {                Directory.CreateDirectory(path);            }            string filePathName = path + "//" + fileName;            if (File.Exists(filePathName)) return;            // 設置參數            HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest;            request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)";            request.Proxy = null;            //發送請求并獲取相應回應數據            HttpWebResponse response = request.GetResponse() as HttpWebResponse;            //直到request.GetResponse()程序才開始向目標網頁發送Post請求            Stream responseStream = response.GetResponseStream();            //創建本地文件寫入流            Stream stream = new FileStream(filePathName, FileMode.Create);            byte[] bArr = new byte[1024];            int size = responseStream.Read(bArr, 0, (int)bArr.Length);            while (size > 0)            {                stream.Write(bArr, 0, size);                size = responseStream.Read(bArr, 0, (int)bArr.Length);            }            stream.Close();            responseStream.Close();        }    }}
View Code

多線程爬取網頁代碼:

using System;using System.Collections.Generic;using System.ComponentModel;using System.Data;using System.Drawing;using System.IO;using System.Linq;using System.Text;using System.Text.RegularExpressions;using System.Threading;using System.Threading.Tasks;using System.Windows.Forms;using Utils;namespace 爬蟲{    public partial class Form1 : Form    {        List<Thread> threadList = new List<Thread>();        Thread thread = null;        public Form1()        {            InitializeComponent();        }        private void button1_Click(object sender, EventArgs e)        {            DateTime dtStart = DateTime.Now;            button3.Enabled = true;            button2.Enabled = true;            button1.Enabled = false;            int page = 0;            int count = 0;            int personCount = 0;            lblPage.Text = "已完成頁數:0";            int index = 0;            for (int i = 1; i <= 10; i++)            {                thread = new Thread(new ParameterizedThreadStart(delegate(object obj)                {                    for (int j = 1; j <= 10; j++)                    {                        try                        {                            index = (Convert.ToInt32(obj) - 1) * 10 + j;                            string pageHtml = HttpRequestUtil.GetPageHtml("http://tt.mop.com/c44/0/1_" + index.ToString() + ".html");                            Regex regA = new Regex("<a[//s]+class=/"J-userPic([^<>]*?)[//s]+href=/"([^/"]*?)/"");                            Regex regImg = new Regex("<p class=/"tc mb10/"><img[//s]+src=/"([^/"]*?)/"");                            MatchCollection mc = regA.Matches(pageHtml);                            foreach (Match match in mc)                            {                                int start = match.ToString().IndexOf("href=/"");                                string url = match.ToString().Substring(start + 6);                                int end = url.IndexOf("/"");                                url = url.Substring(0, end);                                if (url.IndexOf("/") == 0)                                {                                    string imgPageHtml = HttpRequestUtil.GetPageHtml("http://tt.mop.com" + url);                                    personCount++;                                    lblPerson.Invoke(new Action(delegate() { lblPerson.Text = "已完成條數:" + personCount.ToString(); }));                                    MatchCollection mcImgPage = regImg.Matches(imgPageHtml);                                    foreach (Match matchImgPage in mcImgPage)                                    {                                        start = matchImgPage.ToString().IndexOf("src=/"");                                        string imgUrl = matchImgPage.ToString().Substring(start + 5);                                        end = imgUrl.IndexOf("/"");                                        imgUrl = imgUrl.Substring(0, end);                                        if (imgUrl.IndexOf("http://i1") == 0)                                        {                                            try                                            {                                                HttpRequestUtil.HttpDownloadFile(imgUrl);                                                count++;                                                lblNum.Invoke(new Action(delegate()                                                {                                                    lblNum.Text = "已下載圖片數" + count.ToString();                                                    DateTime dt = DateTime.Now;                                                    double time = dt.Subtract(dtStart).TotalSeconds;                                                    if (time > 0)                                                    {                                                        lblSpeed.Text = "速度:" + (count / time).ToString("0.0") + "張/秒";                                                    }                                                }));                                            }                                            catch { }                                            Thread.Sleep(1);                                        }                                    }                                }                            }                        }                        catch { }                        page++;                        lblPage.Invoke(new Action(delegate() { lblPage.Text = "已完成頁數:" + page.ToString(); }));                        if (page == 100)                        {                            button1.Invoke(new Action(delegate() { button1.Enabled = true; }));                            MessageBox.Show("完成!");                        }                    }                }));                thread.Start(i);                threadList.Add(thread);            }        }        private void button2_Click(object sender, EventArgs e)        {            button1.Invoke(new Action(delegate()            {                foreach (Thread thread in threadList)                {                    if (thread.ThreadState == ThreadState.Suspended)                    {                        thread.Resume();                    }                    thread.Abort();                }                button1.Enabled = true;                button2.Enabled = false;                button3.Enabled = false;                button4.Enabled = false;            }));        }        private void Form1_FormClosing(object sender, FormClosingEventArgs e)        {            foreach (Thread thread in threadList)            {                thread.Abort();            }        }        private void button3_Click(object sender, EventArgs e)        {            foreach (Thread thread in threadList)            {                if (thread.ThreadState == ThreadState.Running)                {                    thread.Suspend();                }            }            button3.Enabled = false;            button4.Enabled = true;        }        private void button4_Click(object sender, EventArgs e)        {            foreach (Thread thread in threadList)            {                if (thread.ThreadState == ThreadState.Suspended)                {                    thread.Resume();                }            }            button3.Enabled = true;            button4.Enabled = false;        }    }}
View Code

截圖:

 


發表評論 共有條評論
用戶名: 密碼:
驗證碼: 匿名發表
主站蜘蛛池模板: 手机国产乱子伦精品视频 | 日韩av在线网址 | 亚洲日本高清 | 一级毛片播放 | 手机黄色小视频 | 久久精品一区二区三区国产主播 | 国产宾馆3p国语对白 | 视频久久免费 | 99精美视频 | 蜜桃视频网站在线观看 | 草莓福利社区在线 | 姑娘第四集免费看视频 | 久久久看 | 欧美激情性色生活片在线观看 | av在线1| 久久国产精品免费视频 | 欧美日韩一区,二区,三区,久久精品 | 一区二区三视频 | 亚洲成人精品一区二区 | 伊人99re| 久久久国产精品电影 | 五月天影院,久久综合, | 男女无遮挡羞羞视频 | 一级黄片毛片免费看 | 日本一区二区不卡高清 | 欧洲精品久久 | 777午夜精品视频在线播放 | 欧美性猛交xxxxx按摩国内 | 色综合欧美 | av在线高清观看 | 色av成人天堂桃色av | 国产精品视频一区二区三区四区五区 | 精品一区二区三区在线观看视频 | 国产成人自拍av | 国色天香综合网 | 免费观看国产视频 | 久久久久久久网站 | 亚洲视频在线网 | 一本精品999爽爽久久久 | 成人在线观看一区 | 国产一级淫片在线观看 |