Puppeteer是NPM库,它提供了NodeJS高级API来控制Chrome。Puppeteer 默认以无头(无界面)方式运行,但也可以配置为运行有界面的Chrome。Puppeteer 提供了一系列 API,通过 Chrome DevTools Protocol 协议控制 Chromium/Chrome 浏览器的行为。其对应的Python版本为pyppeteer。
https://github.com/puppeteer/puppeteer
npm install puppeteer --save
Puppeteer具有以下特性:
const url = 'http://61.163.88.227:8006/hwsq.aspx?sr=0nkRxv6s9CTRMlwRgmfFF6jTpJPtAv87';
(async () => {
// 1. Open browser
const browser = await puppeteer.launch({
// headless: false
});
// 2. Create a new page
const page = await browser.newPage();
// 3. Go to the target website
await page.goto(url, { waitUntil: "networkidle2" });
// await page.waitFor(3000);
// 4. Get data
const dateInput = await page.$('#ContentLeft_menuDate1_TextBox11')
const submit = await page.$('#ContentLeft_Button1')
const now = Date.now();
for (let i = 0; i < 8280; i++) {
const offset = 1000 * 60 * 60 * 24 * i;
const date = new Date(now - offset).format('yyyy-MM-dd');
await page.evaluate((el, value) => el.setAttribute('value', value),
dateInput, date
)
submit.click();
await page.waitFor(3000)
await page.waitForSelector("#ContentLeft_menuDate1_TextBox11");
// console.log(dateInput)
let data = await page.evaluate(getInfo);
console.log(date + '...')
fs.writeFileSync('./data/' + date + '.txt', data)
}
// 5. Print out the data in the console
// console.log(data);
// 6. Close browser
await browser.close();
})();
async function getInfo() {
const table = document.querySelectorAll('.mainTxt');
const vList = table[0].innerText.split(/\t/)
return vList
}