Compare commits

...

2 Commits

Author SHA1 Message Date
Wurzelkoch 61bbd5cd1f Verification needed
But how?
2020-10-10 16:48:00 +02:00
Wurzelkoch 2fddbd00f3 update metadata 2020-10-10 16:06:05 +02:00
2 changed files with 14 additions and 5 deletions

View File

@ -1,6 +1,7 @@
var request = require('request');
var cheerio = require('cheerio');
var URL = require('url-parse');
var fs = require('fs');
var plsStop = 0;
var pagesVisited = {};
@ -21,7 +22,6 @@ crawl();
// function declarations:
function crawl() {
console.log(plsStop);
if (plsStop === 1) {
crawl();
}
@ -48,7 +48,16 @@ console.log("Visiting page " + url);
// Parse the document body
var $ = cheerio.load(body);
console.log("Page title: " + $('title').text());
// savePage($,crawl);
// get real document
var token = $("[name='__RequestVerificationToken']").attr('value');
console.log(token);
// save page
innerhtml = $('div.kapitel');
// console.log("Content: " + innerhtml);
fs.appendFileSync('beckOK.html', innerhtml + '\n');
// prepare next page
var nextPage = $('#next').attr('href');
console.log("Next up: " + nextPage);

View File

@ -1,9 +1,9 @@
{
"name": "simple-webcrawler-javascript",
"name": "BeckOK-scrawler-javascript",
"version": "0.0.0",
"description": "A simple webcrawler written in JavaScript to learn it.",
"description": "A webcrawler written in JavaScript to get BeckOK law books.",
"main": "crawler.js",
"author": "Stephen",
"author": "Gandalf",
"license": "ISC",
"dependencies": {
"cheerio": "^0.19.0",