phantomjs - Casperjs add "file://" to every url -
the function urls on page.
function gettitlelinks() { var link = $('.resultitem div a'); return _.map(link, function(e) { return e.getattribute('href').substring(2); }); }
it array of urls , try loop through thing urls if romated way:
file:///root/desktop/scrapper/http://....
it adds "file://" , have no idea did came from. when try echo array in loop output urls correctly, without "file://..."
i think bug. through i'm beginner have build couple of scrappers casperjs , first time error.
the full code:
var x = require('casper').selectxpath; var fs = require('fs'); var link = []; //---------------------------------------------------------- var casper = require('casper').create({ clientscripts: [ 'includes/jquery-3.2.0.min.js', 'includes/lodash.js' ], pagesettings: { loadimages: true, loadplugins: false, useragent: "mozilla/5.0 (windows nt 5.1; rv:52.0) gecko/20100101 firefox/52.0" }, viewportsize: { width: 1600, height: 950 }, loglevel: "error", verbose: true }); function gettitlelinks() { var link = $('.resultitem div a'); return _.map(link, function(e) { return e.getattribute('href').substring(2); }); } function contacteval() { var listingauthor = $('.listing-properties > div:nth-child(2) > span.last'); return _.map(listingauthor, function(e) { return e.textcontent; //return e.getelementsattribute('th'); }); } casper.start(); casper.then(function() { (var p = 1; p <= 100; p++) { this.thenopen('https://blahblah'+p, function() { this.echo(this.getcurrenturl()); link = this.evaluate(gettitlelinks); var = -1; this.eachthen(link, function() { i++; this.thenopen(link[i], function() { this.echo(this.getcurrenturl()); // content = this.evaluate(contacteval); // this.echo(content); }); }); }); } }); //run casper.run(function() { this.echo('fucking finish').exit(); });
Comments
Post a Comment