Compare commits

...

2 commits

Author SHA1 Message Date
Wurzelkoch e55cacd224 Ich glaub ich krieg die Authentifizierung nicht hin 2020-10-10 20:00:22 +02:00
Wurzelkoch 4e0e931839 First guess verification
Warum läuft jetzt der Call Stack über?
2020-10-10 19:27:59 +02:00

View file

@ -5,6 +5,7 @@ var fs = require('fs');
var plsStop = 0; var plsStop = 0;
var pagesVisited = {}; var pagesVisited = {};
var printOptions = [];
// set a default starting page (which is the title of Grundgesetz for arbitrary reasons) but take input from CLI // set a default starting page (which is the title of Grundgesetz for arbitrary reasons) but take input from CLI
var argv = process.argv.slice(2); var argv = process.argv.slice(2);
@ -23,11 +24,11 @@ crawl();
function crawl() { function crawl() {
if (plsStop === 1) { if (plsStop === 1) {
crawl(); return;
} }
if (pageToVisit in pagesVisited) { if (pageToVisit in pagesVisited) {
// We've already visited this page, so repeat the crawl // We've already visited this page, so repeat the crawl
crawl(); //continue;
} else { } else {
// New page we haven't visited // New page we haven't visited
visitPage(pageToVisit, crawl); visitPage(pageToVisit, crawl);
@ -55,14 +56,25 @@ console.log("Visiting page " + url);
// save page // save page
innerhtml = $('div.kapitel'); innerhtml = $('div.kapitel');
// console.log("Content: " + innerhtml); // download
fs.appendFileSync('beckOK.html', innerhtml + '\n'); var options = {
url: baseUrl + "/Print/CurrentDoc" + url + "&printdialogmode=ParentChapter&actionname=&gesamtversionpath=&exportFormat=print",
headers: {
"__RequestVerificationToken": token
}
};
printOptions.push(options);
// prepare next page // prepare next page
var nextPage = $('#next').attr('href'); var nextPage = $('#next').attr('href');
console.log("Next up: " + nextPage); console.log("Next up: " + nextPage);
if (nextPage){ if (nextPage){
pageToVisit = baseUrl + nextPage; pageToVisit = {
url: baseUrl + nextPage,
headers: {
"__RequestVerificationToken": token
}
};
callback(); callback();
} else { } else {
plsStop = 1; plsStop = 1;
@ -71,8 +83,18 @@ console.log("Visiting page " + url);
}); });
} }
function savePage($,callback){ function savePage(callback){
console.log($); while(printOptions)
var options = printOptions.pop();
request(options, function(error, response, body){
if(response.statusCode === 200) {
// Parse the document body
var $ = cheerio.load(body);
console.log("Page title: " + $('title').text());
innerhtml = $('body');
fs.appendFileSync('beckOK.html', innerhtml + '\n');
};
});
} }