ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

快速排名系统核心代码

2022-05-05 14:31:48  阅读:201  来源: 互联网

标签:string 代码 await 系统核心 排名 var new rsv page


        async static Task TaskRun(string link)
        {
            string ip = string.Empty;
            string errmsg = "";
            while (string.IsNullOrEmpty(ip))
            {
                IpList = IpHelper.GetAvilableIpList(out errmsg);
                if (!string.IsNullOrEmpty(errmsg))
                {
                    Console.WriteLine(errmsg);
                }
                else
                {
                    int index = new Random().Next(0, IpList.Count());
                    ip = IpList[index];
                    Console.WriteLine("当前代理IP:" + ip);
                }
                Thread.Sleep(new Random().Next(1000, 3000));
            }

            //await new BrowserFetcher().DownloadAsync(BrowserFetcher.DefaultRevision);
            LaunchOptions options = new LaunchOptions
            {
                Headless = false,
                Args = new[] {
                    string.Format("--proxy-server={0}",ip),
                    "--start-maximized",//最大窗口
                    "--disable-infobars",//--隐藏自动化标题
                    "--no-sandbox",
                    "--disable-setuid-sandbox",
                    "--ignore-certificate-errors",
                    "--app=https://www.baidu.com/"
                },
                IgnoreHTTPSErrors = true
            };
            var extra = new PuppeteerExtra();
            extra.Use(new StealthPlugin());
            using (var browser = await extra.LaunchAsync(options))
            {
                using (var page = await browser.NewPageAsync())
                {
                    string userAgent = UAList[new Random().Next(0, UAList.Count())];
                    await page.SetUserAgentAsync(userAgent);
                    ViewPortOptions vOptions = new ViewPortOptions
                    {
                        Width = 1920,
                        Height = 1080
                    };
                    await page.SetViewportAsync(vOptions);
                    Dictionary<string, string> dicHeader = new Dictionary<string, string>();
                    dicHeader.Add("referer", "https://www.baidu.com/s?ie=utf-8&f=3&rsv_bp=1&tn=baidu&wd=c%23%20htmlagility&oq=%25E5%25BE%25AE%25E8%25B0%25B1%25E6%25A3%2580%25E6%25B5%258B%25E6%2590%259C%25E4%25BA%2586%25E7%25BD%2591&rsv_pq=eb9ff0ce00008fdb&rsv_t=5794Qmog%2FW4kfXpoYcJXzzRk4iN0Dx7vYa8xiv%2Fhej8i69AmoTGkqlME680&rqlang=cn&rsv_dl=ts_2&rsv_enter=1&rsv_sug3=10&rsv_sug1=3&rsv_sug7=100&rsv_sug2=1&rsv_btype=t&prefixsug=%2526lt%253B%2523%2520htmla&rsp=2&inputT=6556&rsv_sug4=8091");
                    await page.SetExtraHttpHeadersAsync(dicHeader);
                    try
                    {
                        //隐藏webdriver特征
                        //await page.EvaluateExpressionOnNewDocumentAsync("delete navigator.__proto__.webdriver;");
                        try
                        {
                            await page.GoToAsync(link, WaitUntilNavigation.Networkidle2);
                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine("启动浏览器异常:" + ex.Message);
                            await browser.CloseAsync();//关闭浏览器
                            ip = string.Empty;
                            //重新运行任务
                            var d = Task.Run(() => TaskRun(link));
                            d.Wait();
                            return;
                        }

         
                        string eqid = await Geteqid(page);
                        if (string.IsNullOrEmpty(eqid))
                        {
                            await browser.CloseAsync();//关闭浏览器
                            ip = string.Empty;
                            ////重新运行任务
                            var d = Task.Run(() => TaskRun(link));
                            d.Wait();
                            return;
                        }
                        List<LinkModel> linkList = new List<LinkModel>();
                        List<Page> pages = new List<Page>();

                        string pagesource = await page.GetContentAsync();
                        linkList = GetAllHrefs(pagesource, eqid);//得到页面所有需要点击的链接
                        linkList = GetListRandomItems(linkList, linkList.Count()/2);
                        ElementHandle[] handlers = await page.XPathAsync("//a[@class='siteLink_9TPP3']");
                        //遍历访问搜索结果页面
                        foreach (var href in linkList)
                        {
                            using (var newPage = await browser.NewPageAsync())
                            {
                                await newPage.SetUserAgentAsync(userAgent);
                                await newPage.SetViewportAsync(vOptions);
                                try
                                {
                                    await newPage.GoToAsync(href.link, WaitUntilNavigation.DOMContentLoaded);
                                    await newPage.WaitForNavigationAsync(new NavigationOptions { Timeout = 15000 });
                                    await ScrollPage(newPage, 300, 700, 6, 500, 800);
                                }
                                catch (Exception ex)
                                {
                                    Console.WriteLine("当前打开页面链接异常:" + ex.Message);
                                }
                            }
                        }
                        if (linkList.Count() > 1)
                        {
                            Console.WriteLine("休息6秒钟...");
                            Thread.Sleep(6000);
                        }
                        else
                        {
                            Console.WriteLine("当前结果页面暂无目标链接...");
                        }
                        //关闭打开的子页面
                        foreach (var p in pages)
                        {
                            await p.CloseAsync();
                        }

                        await page.ClickAsync(".page-inner_2jZi2>a:last-child");//点击下一页
                        await page.ReloadAsync();
                        if(page.Url.Contains("pass.baidu.com"))
                        {
                            throw new Exception("安全验证");
                        }
                        var n = Task.Run(() => ChildTaskRun(browser, page, userAgent, vOptions));
                        n.Wait();
                        await browser.CloseAsync();//关闭浏览器
                        Console.WriteLine("全部任务已完成....");
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine("global:" + ex.Message);
                        if (browser != null && !browser.IsClosed)
                        {
                            await browser.CloseAsync();
                            ip = string.Empty;
                            var d = Task.Run(() => TaskRun(link));//递归执行
                            d.Wait();
                        }
                    }
                }
            }
        }

 

标签:string,代码,await,系统核心,排名,var,new,rsv,page
来源: https://www.cnblogs.com/wzf-Learning/p/16224627.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有