ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

通过 phantomjs抓取仁医在线的练习题

2022-06-02 09:02:34  阅读:100  来源: 互联网

标签:练习题 function console log phantomjs pageHtml var return 仁医


先模拟登录,再按照指定课程挨个去抓取,还有点不完善,会有重复题目出现。

var page = require('webpage').create();
phantom.outputEncoding = 'gbk';

page.settings.userAgent = 'chrome';

page.onConsoleMessage = function(msg) {
    console.log(msg);
};

function getBetween(targetString, beginString, endString) {
    if (!targetString) {
        return targetString;
    }
    if (!beginString && !endString) {
        return targetString;
    }
    if (!beginString) {
        var i = targetString.indexOf(endString);
        if (i < 0) {
            return '';
        }
        return targetString.substring(0, i);
    } else if (!endString) {
        var i = targetString.indexOf(beginString);
        if (i < 0) {
            return '';
        }
        return targetString.substring(i + beginString.length);
    } else {
        var i = targetString.indexOf(beginString);
        if (i < 0) {
            return null;
        }
        var j = targetString.indexOf(endString, i + beginString.length);
        if (j < 0) {
            return null;
        }
        return targetString.substring(i + beginString.length, j);
    }
}

var fs = require('fs');

var entryList = [
2684,230
,2685,145
,2686,235
,2687,237
,2688,224
,2689,117
,2690,120
,2691,79
,2692,80
,2693,40
,2694,70
,2695,80
,2696,40
,2697,38
,2698,90
];

var sId = 2683;

page.open('http://www.renyiwang.net/Mobile/Login.aspx', function(status) {
  if (status !== 'success') {
    console.log('Unable to access Login Page, status is ' + status + '!');
    phantom.exit();
  } else {
    console.log('success to open Login Page, status is ' + status + '!');
    
    page.evaluate(function() {
      document.getElementById('TstNumber').value = "用户名";
      document.getElementById('TstPassword').value = "密码";
      document.getElementById('But_Login').click();
    });

    setTimeout(function() {
        var pageHtml = page.evaluate(function() {
            return document.body.innerHTML;
        });
        if (pageHtml && pageHtml.indexOf("三基培训") > 0) {
            console.log('success to login');

            var captureQuestion = function(entryIndex, qIndex) {
                qIndex++;
                page.open('http://www.renyiwang.net/Mobile/Practice.aspx?o_id=6&SelQuesetions='+sId+'&q_id=' + entryList[entryIndex * 2] + '&class=0', function(status){
                    if (status !== 'success') {
                        console.log('Unable to access Practice Page, status is ' + status + '!');
                        phantom.exit();
                    } else {
                        var pageHtml = page.evaluate(function() {
                            //return document.getElementById('app1').innerHTML;
                            return document.body.innerHTML;
                        });
                        if (pageHtml && pageHtml.indexOf('Rad_T_A_Id') > 0) {
                            //console.log('success to open practice page!');

                            var answerId = getBetween(pageHtml, 'id="Hid_Answer" value="', '"');
                            //console.log('answerId: ' + answerId);
                            var questionTitle = getBetween(pageHtml, 'id="Hid_Choose" value="0">', '</div>');
                            if (questionTitle) {
                                questionTitle = questionTitle.trim();
                            } else {
                                console.log("ERROR: " + pageHtml);
                            }

                            var anserCode = '';
                            var optionList = [];
                            var optionInfoList = pageHtml.match(new RegExp('Rad_T_A_Id_[\\d]+', 'g'));
                            for (var i = 0; i < optionInfoList.length; i+=2) {
                                var optionId = optionInfoList[i].replace('Rad_T_A_Id_', '');
                                var optionTitle = getBetween(pageHtml, optionInfoList[i] + '">', '</label>');
                                if (optionTitle) {
                                    optionList.push(optionTitle);
                                    if (answerId == optionId) {
                                        anserCode = optionTitle[0];
                                    }
                                }
                            }
                            
                            var info = '第' + qIndex + '题:' + questionTitle + '\n' + optionList.join('\n') + '\n' + '答案:' + anserCode + '\n\n';
                            console.log(info);
                            
                            var categoryName = getBetween(pageHtml, '<span style="font-weight:bold;color:#808080;">', '</span>');
                            
                            var fs = require('fs');
                            fs.write('d:\\' + categoryName + '.txt', info, 'a');

                            var maxCount = entryList[entryIndex * 2 + 1];
                            if (qIndex >= maxCount) {
                                entryIndex++;
                                if (entryIndex * 2 >= entryList.length) {
                                    console.log('finished!');
                                    phantom.exit();
                                } else {
                                    qIndex = 0;
                                    page.open('http://www.renyiwang.net/Mobile/PracticeClear.aspx?o_id=6&SelQuesetions='+sId+'&q_id=' + entryList[entryIndex * 2], 'post', {}, function (status) {
                                        console.log('PracticeClear ' + categoryName);
                                        setTimeout(function(){
                                            captureQuestion(entryIndex, qIndex);
                                        }, 5000);
                                    });
                                }
                            } else {
                                setTimeout(function(){
                                    captureQuestion(entryIndex, qIndex);
                                }, 1000);
                            }
                        } else {
                            console.log(pageHtml);
                            console.log('fail to open pratice page!');
                            phantom.exit();
                        }
                    }
                });
            };
            captureQuestion(0, 0);
        } else {
            console.log(pageHtml);
            phantom.exit();
        }
    }, 5000);
  }
});

 

标签:练习题,function,console,log,phantomjs,pageHtml,var,return,仁医
来源: https://www.cnblogs.com/lavezhang/p/16336504.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有