嗨,我终于能够设置我的webscraper,并将数据导入到我的网页中:)
但是我的网页在端口3001上运行,而网页刮刀在端口8080上运行,我有点困惑,我怎么能设置一个计时器来更新后台的刮刀?
var scraper = require(__dirname + '/scripts/scraper.js');
var express = require('express');
var path = require('path');
var app = express();
var MongoClient = require('mongodb').MongoClient;
var url = "mongodb://127.0.0.1:27017/test";
app.use(express.static(__dirname + '/public'));
// set the view engine to ejs
app.set('view engine', 'ejs');
// index page
app.get('/', function(req, res) {
MongoClient.connect(url, function(err, db) {
if (err) throw err;
var dbo = db.db("mydb");
dbo.collection("customers").find({}).toArray(function(err, result) {
if (err) throw err;
res.render('pages/index', {
result: result,
});
db.close();
});
});
});
app.listen(3001);
console.log('navigate to: http://178.62.253.206:3001');
Scraper.js
var http = require('http');
var request = require('request');
var cheerio = require('cheerio');
var MongoClient = require('mongodb').MongoClient
http.createServer(function(req, res) {
request('http://www.xscores.com/soccer', function(error, response,
html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var list_items = "";
var arr = [];
var j = 1;
// Step1 Get Data
// Live Matches Even rows
$('div.match_line.score_row.live_match.e_true ').each(function(i,
element) {
// Get Home team and apply changes to it
var hteam = $(this).attr('data-home-team');
var hteam = hteam.toLowerCase();
var hteam = ucwords(hteam);
var hteam = soccer(hteam);
// Get Away team and apply changes to it
var ateam = $(this).attr('data-away-team');
var ateam = ateam.toLowerCase();
var ateam = ucwords(ateam);
var ateam = soccer(ateam);
// Get Country and apply changes to it
var country = $(this).attr('data-country-name');
var country = country.toLowerCase();
var country = ucwords(country);
// Get League and apply changes to it
var league = $(this).attr('data-league-name');
var league = league.toLowerCase();
var league = ucwords(league);
// Adjust the KO Clock
var Kickoff = $(this).attr('data-ko');
var Kickoff = subtracthour(Kickoff);
arr.push({
hteam: hteam,
ateam: ateam,
j: j,
statustype: $(this).attr('data-statustype'),
country: country,
league: league,
Kickoff: Kickoff
});
j = j + 2;
//list_items += "<li>" + a + "</li>";
//console.log(arr.length);
});
var j = 2;
// Step 1.5 Get Data
// Live Matches, Odd rows
$('div.match_line.score_row.live_match.o_true ').each(function(i, element)
{
// Get Home team and apply changes to it
var hteam = $(this).attr('data-home-team');
var hteam = hteam.toLowerCase();
var hteam = ucwords(hteam);
var hteam = soccer(hteam);
// Get Away team and apply changes to it
var ateam = $(this).attr('data-away-team');
var ateam = ateam.toLowerCase();
var ateam = ucwords(ateam);
var ateam = soccer(ateam);
// Get Country and apply changes to it
var country = $(this).attr('data-country-name');
var country = country.toLowerCase();
var country = ucwords(country);
// Get League and apply changes to it
var league = $(this).attr('data-league-name');
var league = league.toLowerCase();
var league = ucwords(league);
// Adjust the KO Clock
var Kickoff = $(this).attr('data-ko');
var Kickoff = subtracthour(Kickoff);
arr.push({
hteam: hteam,
ateam: ateam,
j: j,
statustype: $(this).attr('data-statustype'),
country: country,
league: league,
Kickoff: Kickoff
});
j = j + 2;
//list_items += "<li>" + b + "</li>";
//console.log(arr.length);
});
//Sort the data
arr.sort(function(a, b) {
return a.j - b.j
})
//Calculate which row to resume (odd/even)
if (isEven(arr.length) == true){
//console.log("Even Number");
var j = arr.length + 1;
var x = arr.length + 2;
} else {
//console.log("Odd Number");
var j = arr.length + 2;
var x = arr.length + 1;
}
// Step 2 Get Data
// "sched" games from even rows
$('div.match_line.score_row.other_match.e_true').each(function(i, element)
{
// Get Home team and apply changes to it
var hteam = $(this).attr('data-home-team');
var hteam = hteam.toLowerCase();
var hteam = ucwords(hteam);
var hteam = soccer(hteam);
// Get Away team and apply changes to it
var ateam = $(this).attr('data-away-team');
var ateam = ateam.toLowerCase();
var ateam = ucwords(ateam);
var ateam = soccer(ateam);
// Get Country and apply changes to it
var country = $(this).attr('data-country-name');
var country = country.toLowerCase();
var country = ucwords(country);
// Get League and apply changes to it
var league = $(this).attr('data-league-name');
var league = league.toLowerCase();
var league = ucwords(league);
// Adjust the KO Clock
var Kickoff = $(this).attr('data-ko');
var Kickoff = subtracthour(Kickoff);
// Get GameStatus
var gs = $(this).attr('data-statustype');
if (gs != "sched") { return false; }
arr.push({
hteam: hteam,
ateam: ateam,
j: j,
statustype: $(this).attr('data-statustype'),
country: country,
league: league,
Kickoff: Kickoff
});
j = j + 2;
//list_items += "<li>" + a + "</li>";
//console.log(arr.length);
});
// Step 2.5 Get Data
//Scrape "sched" games from odd rows
var j = x
$('div.match_line.score_row.other_match.o_true').each(function(i, element) {
// Get Home team and apply changes to it
var hteam = $(this).attr('data-home-team');
var hteam = hteam.toLowerCase();
var hteam = ucwords(hteam);
var hteam = soccer(hteam);
// Get Away team and apply changes to it
var ateam = $(this).attr('data-away-team');
var ateam = ateam.toLowerCase();
var ateam = ucwords(ateam);
var ateam = soccer(ateam);
// Get Country and apply changes to it
var country = $(this).attr('data-country-name');
var country = country.toLowerCase();
var country = ucwords(country);
// Get League and apply changes to it
var league = $(this).attr('data-league-name');
var league = league.toLowerCase();
var league = ucwords(league);
// Get GameStatus
var gs = $(this).attr('data-statustype');
// Adjust the KO Clock
var Kickoff = $(this).attr('data-ko');
var Kickoff = subtracthour(Kickoff);
if (gs != "sched") { return false; }
arr.push({
hteam: hteam,
ateam: ateam,
j: j,
statustype: $(this).attr('data-statustype'),
country: country,
league: league,
Kickoff: Kickoff
});
j = j + 2;
//list_items += "<li>" + a + "</li>";
//console.log(arr.length);
});
//Sort the data we fetched
arr.sort(function(a, b) {
return a.j - b.j
})
//Calculate which row to resume (odd/even)
if (isEven(arr.length) == true){
//console.log("Even Number");
var j = arr.length + 2;
var x = arr.length + 1;
} else {
//console.log("Odd Number");
var j = arr.length + 1;
var x = arr.length + 2;
}
// - Step 3 Get Data
// - Scrape Finished matches games from even rows
$('div.match_line.score_row.other_match.e_true').each(function(i, element) {
// Get Home team and apply changes to it
var hteam = $(this).attr('data-home-team');
var hteam = hteam.toLowerCase();
var hteam = ucwords(hteam);
var hteam = soccer(hteam);
// Get Away team and apply changes to it
var ateam = $(this).attr('data-away-team');
var ateam = ateam.toLowerCase();
var ateam = ucwords(ateam);
var ateam = soccer(ateam);
// Get Country and apply changes to it
var country = $(this).attr('data-country-name');
var country = country.toLowerCase();
var country = ucwords(country);
// Get League and apply changes to it
var league = $(this).attr('data-league-name');
var league = league.toLowerCase();
var league = ucwords(league);
// Adjust the KO Clock
var Kickoff = $(this).attr('data-ko');
var Kickoff = subtracthour(Kickoff);
// Get GameStatus
var gs = $(this).attr('data-statustype');
if (gs != 'sched'|| $(this).attr('data-ftr') == 'true') {
arr.push({
hteam: hteam,
ateam: ateam,
j: j,
statustype: $(this).attr('data-statustype'),
country: country,
league: league,
Kickoff: Kickoff
});
j = j + 2;
}
});
// - Step 3.5 Get Data
// - Scrape Finished matches games from even rows
var j = x
$('div.match_line.score_row.other_match.o_true').each(function(i, element) {
// Get Home team and apply changes to it
var hteam = $(this).attr('data-home-team');
var hteam = hteam.toLowerCase();
var hteam = ucwords(hteam);
var hteam = soccer(hteam);
// Get Away team and apply changes to it
var ateam = $(this).attr('data-away-team');
var ateam = ateam.toLowerCase();
var ateam = ucwords(ateam);
var ateam = soccer(ateam);
// Get Country and apply changes to it
var country = $(this).attr('data-country-name');
var country = country.toLowerCase();
var country = ucwords(country);
// Get League and apply changes to it
var league = $(this).attr('data-league-name');
var league = league.toLowerCase();
var league = ucwords(league);
// Adjust the KO Clock
var Kickoff = $(this).attr('data-ko');
var Kickoff = subtracthour(Kickoff);
// Get GameStatus
var gs = $(this).attr('data-statustype');
if (gs != 'sched'|| $(this).attr('data-ftr') == 'true') {
arr.push({
hteam: hteam,
ateam: ateam,
j: j,
statustype: $(this).attr('data-statustype'),
country: country,
league: league,
Kickoff: Kickoff
});
j = j + 2
}
});
//Sort the data
arr.sort(function(a, b) {
return a.j - b.j
})
//Output from the array to html on server page
var arrayLength = arr.length;
var columns = ""
for (var i = 0; i < arrayLength; i++) {
let row = ""
if (i < arr.length) {
row += "<td>" + arr[i].j + "</td>"
row += "<td>" + arr[i].Kickoff + "</td>"
row += "<td>" + arr[i].statustype + "</td>"
row += "<td>" + arr[i].country + "</td>"
row += "<td>" + arr[i].league + "</td>"
row += "<td>" + arr[i].hteam + "</td>"
row += "<td>" + arr[i].ateam + "</td>"
}
columns += "<tr>" + row + "</tr>";
}
//var html = "<table><tr>" + columns + "</tr></table>"
//res.writeHead(200, {
// 'Content-Type': 'text/html'
//});
//res.end(html);
// MongoDataBase
MongoClient.connect('mongodb://127.0.0.1:27017/test', function (err, db) {
if (err) throw err;
var dbo = db.db("mydb");
// Delete old records
dbo.dropCollection("customers", function(err, delOK) {
if (err) throw err;
if (delOK) console.log("Collection deleted");
});
// Add new records
dbo.collection("customers").insertMany(arr, function(err, res) {
if (err) throw err;
console.log("Collection created!");
db.close();
});
});
// End MongoDataBase
//console.log(arr.length);
}
});
}).listen(8080);
console.log('Server is running at http://178.62.253.206:8080/');
function isEven(n) {
return n == parseFloat(n)? !(n%2) : void 0;
}
function ucwords (str) {
return (str + '').replace(/^([a-z])|\s+([a-z])/g, function ($1) {
return $1.toUpperCase();
});
}
function subtracthour(str) {
var m = str.slice(3, 5);
var h = str.slice(0, 2);
if (h == '00') { h = "24"; }
var h = Number(h) - Number(1);
if (h <= 9) { h = "0" + h; }
var y = h + ":" + m;
return y; // Note this was return str
}
function soccer(str) {
var x = str
var x = x.replace(" Ff", " FF");
return x
}
弗雷德里克
我看到三种选择:
基于回合制游戏服务器的websockets与长轮询的区别
setInterval(function(){
data = updateData();
io.sockets.emit('webscraper data', data);
}, 1000);
socket.on('webscraper data', function(data){
$('#data').... // udpate dom here
});
下面是一个使用express EventEmitter:node.js:长轮询的示例:使用EventEmitter和Express4.x捕获请求关闭的长轮询
我在下面提供了一些示例,但是我需要清除大部分可用的数据元素,所以不是寻找复制和粘贴的代码,而是寻找最好的方法。见下文。 链接:https://boardgamegeek.com/boardgame/63888/innovation 我试图从中提取的HTML示例。Span没有返回html_nodes,所以我无法从那里开始。 我假设这是JSON?有没有一种方法来解析html_text输出,或者另一种方
初学者。我想提取巴克莱的所有作业(https://search.jobs.Barclays/search-jobs) 我通过刮了第一页,但挣扎着去下一页,因为url没有改变。我试着在下一页按钮上刮url,但那个href把我带回主页。 这是否意味着所有的职务数据实际上都存储在原始HTML中?如果是,我该如何提取? 谢谢!
Spring Web应用程序只是一个使用servlet API 3.0的普通Web应用程序。 在servlet API 3.0中web.xml文件是可选的(大多数时候)。我试图在我的Spring应用程序中不包含web.xml,但不知何故,即使使用Servlet 3.0,它似乎也是必需的 Spring应用程序和我在没有编写web.xml文件的情况下成功运行的Web应用程序之间的唯一区别是,在Spri
我正试图从whoscored.com(下面的变量根链接)中删除到EPL所有球员的链接,这里是代码: 如果你进入这个页面,你会看到一个玩家列表和一个下一个按钮来显示下一个10个玩家(其中有284个在29页)我想要的输出:保存链接到每个10个玩家的配置文件,然后移动到下一个页面与下一个10个玩家直到完成
引用脚本的内容: /*更改壁纸 by Ansifa*/ Name "更改壁纸" OutFile "更改壁纸.exe" Caption "更改壁纸" SetCompressor /FINAL /SOLID lzma !include UsefulLib.nsh page instfiles Var HWND Section ${GetWallpaper} "$r0" ;获得当前壁纸路径 ${Get
实现一个刮刮乐的刮奖效果。即用手指滑动刮奖区,会刮去上面一层视图,显示下面一层视图的内容。 [Code4App.com]