BeckOK/crawler.js
2020-10-10 12:16:28 +02:00

40 lines
1.1 KiB
JavaScript

var request = require('request');
var cheerio = require('cheerio');
var URL = require('url-parse');
var pageToVisit = "https://beck-online.beck.de/?vpath=bibdata/komm/BeckOKGG_44/cont/BECKOKGG.glUD1.htm";
console.log("Visiting page " + pageToVisit);
request(pageToVisit, function(error, response, body) {
if(error) {
console.log("Error: " + error);
}
// Check status code (200 is HTTP OK)
console.log("Status code: " + response.statusCode);
if(response.statusCode === 200) {
// Parse the document body
var $ = cheerio.load(body);
console.log("Page title: " + $('#dk2next').attr('href'));
collectInternalLinks($);
}
});
function collectInternalLinks($) {
var allRelativeLinks = [];
var allAbsoluteLinks = [];
var relativeLinks = $("a[href^='/']");
relativeLinks.each(function() {
allRelativeLinks.push($(this).attr('href'));
});
var absoluteLinks = $("a[href^='http']");
absoluteLinks.each(function() {
allAbsoluteLinks.push($(this).attr('href'));
});
console.log("Found " + allRelativeLinks.length + " relative links");
console.log("Found " + allAbsoluteLinks.length + " absolute links");
}