Skript returns href of next button
parent
9413ba6269
commit
b396572b7c
|
@ -0,0 +1,39 @@
|
|||
var request = require('request');
|
||||
var cheerio = require('cheerio');
|
||||
var URL = require('url-parse');
|
||||
|
||||
var pageToVisit = "https://beck-online.beck.de/?vpath=bibdata/komm/BeckOKGG_44/cont/BECKOKGG.glUD1.htm";
|
||||
console.log("Visiting page " + pageToVisit);
|
||||
request(pageToVisit, function(error, response, body) {
|
||||
if(error) {
|
||||
console.log("Error: " + error);
|
||||
}
|
||||
// Check status code (200 is HTTP OK)
|
||||
console.log("Status code: " + response.statusCode);
|
||||
if(response.statusCode === 200) {
|
||||
// Parse the document body
|
||||
var $ = cheerio.load(body);
|
||||
console.log("Page title: " + $('#dk2next').attr('href'));
|
||||
collectInternalLinks($);
|
||||
}
|
||||
});
|
||||
|
||||
function collectInternalLinks($) {
|
||||
var allRelativeLinks = [];
|
||||
var allAbsoluteLinks = [];
|
||||
|
||||
var relativeLinks = $("a[href^='/']");
|
||||
relativeLinks.each(function() {
|
||||
allRelativeLinks.push($(this).attr('href'));
|
||||
|
||||
});
|
||||
|
||||
var absoluteLinks = $("a[href^='http']");
|
||||
absoluteLinks.each(function() {
|
||||
allAbsoluteLinks.push($(this).attr('href'));
|
||||
});
|
||||
|
||||
console.log("Found " + allRelativeLinks.length + " relative links");
|
||||
console.log("Found " + allAbsoluteLinks.length + " absolute links");
|
||||
}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"name": "simple-webcrawler-javascript",
|
||||
"version": "0.0.0",
|
||||
"description": "A simple webcrawler written in JavaScript to learn it.",
|
||||
"main": "crawler.js",
|
||||
"author": "Stephen",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"cheerio": "^0.19.0",
|
||||
"url-parse": "^1.0.5",
|
||||
"request": "^2.65.0"
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue