-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.js
More file actions
117 lines (75 loc) · 4.42 KB
/
server.js
File metadata and controls
117 lines (75 loc) · 4.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
//This is the server that makes requests to the external website
var express = require('express'); //Express is a minimal and flexible Node.js web application framework that provides a robust set of features for web and mobile applications.
var fs = require('fs'); //fs library gives access to the computer's file system so files can be written to disk.
var request = require('request'); //simplest way possible to make http calls. It supports HTTPS and follows redirects by default.
var cheerio = require('cheerio'); //jQuery designed specifically for the server.
var app = express();
app.get('/scrape', function(req, res) {
url = 'http://www.imdb.com/title/tt1229340/'; // The URL we will scrape from - in our example Anchorman 2.
// The structure of our request call. The first parameter is our URL. The callback function takes 3 parameters, an error, response status code and the html
request(url, function(error, response, html) {
if (!error) { // Check to make sure no errors occurred when making the request
var $ = cheerio.load(html); //Utiltize cheerio library on the returned html (essentially gives jQuery functionality).
//var jQuery = $;
var title, release, rating; // Define the variables we're going to capture
var json = {
title: "",
release: "",
rating: ""
};
//$('.header').filter(function() {
var data = $(this); // Store the data we filter into a variable.
// My code
// Get Method-1 jQuery navigate and get:
//
title = $(".title_wrapper h1").first().clone().children().remove().end().text().trim(); //Gets text (title without release year) as trimmed text.
//
release = $(".title_block .title_wrapper h1 a").text();
//
// rating = $(".subtext meta")[0].content
rating = $(".subtext meta").first().attr('content'); //Gets rating from the class '.subtext'
//
// Get Method-2 no jQuery
// getElementsByTagName navigate and get:
// title = data.children().first().text();
// title = document.getElementsByTagName('h1')[1].innerHTML
// year = document.getElementsByTagName('h1')[1].children[0].innerText
//Tutorial code:
// In examining the DOM we notice that the title rests within the first child element of the header tag.
// title = data.children().first().text();
// title = $('h1')
// release = data.children().last().children().text();
//
json.title = title; // Once we have our title, we'll store it to the our json object.
json.release = release; // Once again, once we have the data extracted we'll save it to our json object
json.rating = rating // Add the rating to the JSON file
// })
$('.star-box-giga-star').filter(function() {
var data = $(this);
// // The .star-box-giga-star class was exactly where we wanted it to be.
// // To get the rating, we can simply just get the .text(), no need to traverse the DOM any further
rating = data.text();
json.rating = rating;
})
// To write to the system we will use the built in 'fs' library.
// // In this example we will pass 3 parameters to the writeFile function
// // Parameter 1 : output.json - this is what the created filename will be called
// // Parameter 2 : JSON.stringify(json, null, 4) - the data to write, here we do an extra step by calling JSON.stringify to make our JSON easier to read
// // Parameter 3 : callback function - a callback function to let us know the status of our function
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err) {
})
// Finally, we'll just send out a message to the browser reminding you that this app does not have a UI.
res.send('Something here, Dave. Check your console and project output file!')
} else {
res.send('there was an error')
}
})
})
app.listen('8081');
Export = module.exports = app;
// //Title wrapper has changed from tutorial. Must adapt to match current code. See below:
// //
// // <h1 itemprop="name" class="">Anchorman 2: The Legend Continues <span id="titleYear">
// // (<a href="/year/2013/?ref_=tt_ov_inf">2013</a>)</span> </h1>
// //.filter may not work.
// //IDEA: Use regular expression to extract strings from html elements.