diff --git a/crawler.js b/crawler.js index 4d59616..570bfcb 100644 --- a/crawler.js +++ b/crawler.js @@ -1,6 +1,7 @@ var request = require('request'); var cheerio = require('cheerio'); var URL = require('url-parse'); +var fs = require('fs'); var plsStop = 0; var pagesVisited = {}; @@ -21,7 +22,6 @@ crawl(); // function declarations: function crawl() { -console.log(plsStop); if (plsStop === 1) { crawl(); } @@ -48,7 +48,16 @@ console.log("Visiting page " + url); // Parse the document body var $ = cheerio.load(body); console.log("Page title: " + $('title').text()); -// savePage($,crawl); + + // get real document + var token = $("[name='__RequestVerificationToken']").attr('value'); + console.log(token); + + // save page + innerhtml = $('div.kapitel'); +// console.log("Content: " + innerhtml); + fs.appendFileSync('beckOK.html', innerhtml + '\n'); + // prepare next page var nextPage = $('#next').attr('href'); console.log("Next up: " + nextPage);