2023年6月29日发(作者:)
使⽤c#实现爬⾍技术这是我的第⼀个爬⾍项⽬,也是我第⼀次接触c# 窗体程序。我的需求:页⾯中有⾳频⽂件但是它时单个下载的,⽤户需要⼀个⼀个的去点击下载按钮进⾏下载,我的⽬的:根据⽤户的需求筛选出相关的数据,然后我拿到页⾯上⽤户筛选的数据,实现批量下载,然后将下载并存放到⽤户本地⽂件夹中,然后对下载下来的这些⽂件进⾏播放。主要⽤到的插件有:CefSharp HtmlAgilityPack将浏览器页⾯嵌⼊到winForm中将web页⾯嵌⼊到winForm的界⾯中//窗体load时执⾏下⾯⽅法private void Form1_Load(object sender, EventArgs e) { CefSettings settings = new CefSettings(); lize(settings); webbrowser = new ChromiumWebBrowser(“要嵌⼊的web地址”); = ; (webbrowser); oadEnd += Webbrowser_FrameLoadEnd;//注册窗体加载事件onload oadEnd += SetCookie; }下⾯是获取web页⾯的url地址做相应的操作private void Webbrowser_FrameLoadEnd(object sender, FrameLoadEndEventArgs e) { if () { if ( == "页⾯的url地址(不同的地址处理不同的事情)") { string listPage = "想要跳转的页⾯地址"; string js = "='" + listPage + "';"; eScriptAsync(js);//将这段js添加到web页⾯中,它会执⾏此跳转 return; } if ( == "url1") { string html = ""; rceAsync().ContinueWith(task =>//异步执⾏ { html = ;//抓取到的页⾯,然后分析页⾯的代码结构拿到想要的数据 String filePath = SavaProcess(html); }); return; } if ( == "url2") { rceAsync().ContinueWith(task => { string htmlDom = ; var doc = new HtmlDocument(); ml(htmlDom);//可以将html页⾯,使可以⽤类似于操作dom的⼀些⽅法来操作 //拿到总页数 request requoption = new request(); = "POST"; //下⾯是根据抓取到的实际的页⾯结构,和具体的也去需求,去获取页⾯上的数据 var pageTr = Nodes(@"/html[1]/body[1]/div[3]/table[1]/tbody[1]/tr[@class='forPage']/td[1]/div[1]/div[1]");//选择标签数组
if ( > 0) { var p = pageTr[0]; var spanNodes = pageTr[0].SelectNodes(@".//span");//取到该节点下的所有span节点 } } }); return; }
} }设置cookie⽅法private void SetCookie(object sender, oadEndEventArgs e) { var cookieManager = balCookieManager(); CookieVisitor visitor = new CookieVisitor(); okie += Visitor_SendCookie; llCookies(visitor); }/// /// 将Cookie保存到字典COOKIES中 /// /// private void Visitor_SendCookie( obj) { lock (lockObject) { string key = art('.') + "^" + ; string value = ; if (!nsKey(key)) { (key, value); } else { cookies[key] = value; } } }/// /// 将COOKIES解析成 /// /// private CookieCollection GetCookieCollection() { lock (lockObject) { CookieCollection cookieCollection = new CookieCollection(); foreach (var keyValue in cookies) { cookie = new (); = ('^')[0]; = ('^')[1]; = ; (cookie); } return cookieCollection; } }下⾯是已经拿到⾳频⽂件的地址了,然后请求下载地址下载⽂件/// /// 将⽂件下载到本地 /// public void HttpWebRequestGet(Uri url, string fileName, DataModel data) { try { HttpWebRequest AudioReq = (HttpWebRequest)(url); = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"; ive = true; r = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; ent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"; ("Accept-Encoding", "gzip,deflate"); ("Accept-Language", "zh-CN,zh;q=0.9"); ("Upgrade-Insecure-Requests", "1"); ("Cookie", "JSESSIONID=" + JSESSIONID + ";rememberPass=1;userAccount=" + uid + ";#pwd=" + pwd + ";loginByTwoCode=0"); string responseData = ; = "GET"; tType = "application/x-www-form-urlencoded"; string path = rectory + @"AudioListAMR"; if (!(path)) { Directory(path); } HttpWebResponse rsp = (HttpWebResponse)ponse();//获取回写流 //将⽂件存到本地 var localAmrnb = path + "" + fileName; FileStream fs = new FileStream(localAmrnb, , , ite);//创建本地⽂件写⼊流 ath = localAmrnb; var responseStream = ponseStream(); //创建本地⽂件写⼊流 byte[] bArr = new byte[1024]; int iTotalSize = 0; int size = (bArr, 0, (int)); while (size > 0) { iTotalSize += size; (bArr, 0, size); size = (bArr, 0, (int)); } (); (); (); e(); } catch (Exception ex) { ng(); } }c#序列化数据并写⼊⽂件List dataList = new List();Writer file1 = new Writer(DownloadDataPath, false);(new JavaScriptSerializer().Serialize(dataList));();e();从⽂件中读取数据并反序列化using (Reader sr = new Reader(DownloadDataPath, 8)){// 从⽂件读取并显⽰⾏,直到⽂件的末尾string line = ne();if (line != null){oldData = line;}}Writer file2 = new Writer(DownloadDataPath, false);List oldDataList = new JavaScriptSerializer().Deserialize(oldData);//反序列化读取到的值ge(oldDataList);//将新的数据添加到之前数据的末尾(new JavaScriptSerializer().Serialize(dataList));();e();下⾯向窗体中添加mediaPlay播放器⾸先添加引⽤如下图所⽰:其次将mediaPlayer组件添加到⼯具箱中,菜单栏:⼯具—>选择⼯具箱选项,添加如下组件添加完之后就可以在⼯具箱中将组件直接拖到界⾯上了,具体实现播放的代码如下所⽰ public Boolean getMediaPlayData(){ (); for (int i = 0; i < ; i++) { Item(ia(oldDataList[i].LocalPath));//将所有要播放的⽂件添加到播放列表
} return true;}/// /// 点击查询并播放按钮 /// /// private void button1_Click(object sender, EventArgs e) { if (getMediaPlayData()) { art = true; e("shuffle", false); (); } }private void wmp_PlayStateChange(object sender, AxWMPLib._WMPOCXEvents_PlayStateChangeEvent e){ //如果已播放完毕就播放下⼀个⽂件 if ((yState)te == eady) (); }以上不是完整的代码。总体来说把⼤致的过程和⽤到的⼀些技术记录下来,加深记忆。
发布者:admin,转转请注明出处:http://www.yc00.com/news/1687984648a63796.html
评论列表(0条)