Skip to content

Commit 78cd546

Browse files
authored
Cloudflare captcha solving (#66)
* install puppeteer-captcha-plugin * added cloudflare plugin and created action * documentation * export * route * add router * version update and variables names * readme * yarn.lock * Merged 2 endpoints into one. Deprecated recaptcha_solver endpoint * README, version update and move closeOnEmpty stuff * delete debug logs * CloudflareCaptchaSolverPlugin
1 parent 1ad52e1 commit 78cd546

File tree

11 files changed

+471
-201
lines changed

11 files changed

+471
-201
lines changed

README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,29 @@ Example request body:
149149
}
150150
```
151151

152+
### **/captcha_solver**
153+
154+
This POST method allows to solve captchas on the page.
155+
Before you use it you have to enable the corresponding puppeteer-extra plugin and provide `TOKEN_2CAPTCHA` environment variable.
156+
Currently available captcha types:
157+
* Recaptcha
158+
* Cloudflare
159+
160+
Example request body:
161+
```json5
162+
{
163+
"solveCloudflareCaptcha": true,
164+
"solveRecaptcha": true,
165+
"closeOnEmpty": false,
166+
"waitOptions": { // selector, xpath or timeout, same as in the goto method
167+
"timeout": 5000, //default timeout is 1000ms
168+
},
169+
"navigationOptions": { // use if click triggers navigation to other page; same as in goXXX methods
170+
"waitUntil": "domcontentloaded",
171+
}
172+
}
173+
```
174+
152175
### **/compose**
153176

154177
This POST method allows to combine several puppeteer actions into one.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/**
2+
* The function solves cloudflare captcha on the page.
3+
* If there is no cloudflare captcha on the page nothing will happen.
4+
* If there is a cloudflare captcha the function solves it and then inserts the special code
5+
* into the page automatically.
6+
*
7+
* Returns useful information about solving cloudflare captcha.
8+
* For more information about return value visit
9+
* https://www.npmjs.com/package/puppeteer-captcha-plugin
10+
*/
11+
exports.cloudflareCaptchaSolver = async function cloudflareCaptchaSolver(page, request) {
12+
let cloudflareCaptchaData;
13+
14+
console.log("cloudflareCaptchaSolver");
15+
if (request.body.solveCloudflareCaptcha) {
16+
cloudflareCaptchaData = await page.solveCloudflareCaptcha();
17+
} else {
18+
cloudflareCaptchaData = await page.findCloudflareCaptcha();
19+
}
20+
21+
if (request.body.navigationOptions) {
22+
await page.waitForNavigation(request.body.navigationOptions);
23+
}
24+
25+
return {
26+
cloudflareCaptchaData: cloudflareCaptchaData,
27+
}
28+
}

actions/captcha_solver/index.js

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
const {recaptchaSolver} = require("./recaptcha_solver");
2+
const {cloudflareCaptchaSolver} = require("./cloudflare_captcha_solver");
3+
const utils = require("../../helpers/utils");
4+
5+
6+
DEFAULT_TIMEOUT = 1000;
7+
8+
/**
9+
* The function solves captchas on the page.
10+
* Currently available captcha types: Recaptcha, Cloudflare.
11+
* Only work with enabled PuppeteerExtra.
12+
*/
13+
exports.captchaSolver = async function captchaSolver(page, request) {
14+
let responseData = {}
15+
if ("solveCloudflareCaptcha" in request.body) {
16+
Object.assign(responseData, await cloudflareCaptchaSolver(page, request));
17+
} else {
18+
console.log("No Solve CloudflareCaptcha");
19+
}
20+
if ("solveRecaptcha" in request.body) {
21+
Object.assign(responseData, await recaptchaSolver(page, request));
22+
}
23+
24+
if (request.body.navigationOptions) {
25+
await page.waitForNavigation(request.body.navigationOptions);
26+
}
27+
28+
const waitOptions = request.body.waitOptions || {timeout: DEFAULT_TIMEOUT};
29+
const contents = await utils.getContents(page, waitOptions);
30+
31+
if (request.query.closePage ||
32+
(request.body.closeOnEmpty && !responseData.recaptchaData?.captchas && !responseData.cloudflareCaptchaData?.data)) {
33+
await page.close();
34+
}
35+
36+
return {
37+
...contents,
38+
...responseData,
39+
};
40+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/**
2+
* The function solves recaptchas on the page.
3+
* If there is no recaptcha on the page nothing will happen.
4+
* If there is a recaptcha the function solves it and then inserts the special code
5+
* into the page automatically.
6+
*
7+
* Returns useful information about recaptcha_solving.
8+
* For more information about return value visit
9+
* https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-recaptcha#result-object
10+
*/
11+
exports.recaptchaSolver = async function recaptchaSolver(page, request) {
12+
let recaptchaData;
13+
14+
if (request.body.solveRecaptcha) {
15+
recaptchaData = await page.solveRecaptchas();
16+
} else {
17+
recaptchaData = await page.findRecaptchas();
18+
}
19+
20+
if (request.body.navigationOptions) {
21+
await page.waitForNavigation(request.body.navigationOptions);
22+
}
23+
24+
return {
25+
recaptchaData: recaptchaData,
26+
}
27+
}

actions/compose.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ const exceptions = require("../helpers/exceptions");
33
endpoint2action = {
44
action: require("./action").action,
55
click: require("./click").click,
6+
captcha_solver: require("./captcha_solver").captchaSolver,
67
fill_form: require("./fill_form").fillForm,
78
back: require("./goback").goBack,
89
forward: require("./goforward").goForward,

actions/recaptcha_solver.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
const utils = require('../helpers/utils')
1+
const utils = require('../helpers/utils');
22
const exceptions = require("../helpers/exceptions");
33

44
const DEFAULT_TIMEOUT = 1000; // 1 second
@@ -12,6 +12,8 @@ const DEFAULT_TIMEOUT = 1000; // 1 second
1212
* Returns useful information about recaptcha_solving.
1313
* For more information about return value visit
1414
* https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-recaptcha#result-object
15+
*
16+
* @deprecated Since version 0.4.0. Use captchaSolver router instead.
1517
*/
1618
exports.recaptchaSolver = async function recaptchaSolver(page, request) {
1719
if (!("solve_recaptcha" in request.body)) {

app.js

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,15 @@ const express = require('express');
44
const puppeteer = require('puppeteer-extra')
55

66
const RecaptchaPlugin = require('puppeteer-extra-plugin-recaptcha')
7+
const CaptchaPlugin = require('puppeteer-captcha-plugin').CloudflareCaptchaSolverPlugin
78
const StealthPlugin = require('puppeteer-extra-plugin-stealth')
89
const cookieParser = require('cookie-parser');
910
const bodyParser = require('body-parser');
1011
const AsyncLock = require('async-lock');
1112

1213
const indexRouter = require('./routes/index');
1314
const composeRouter = require('./routes/compose');
15+
const captchaRouter = require("./routes/captcha_solver");
1416
const healthCheckRouter = require('./routes/health_check');
1517
const gotoRouter = require('./routes/goto');
1618
const backRouter = require('./routes/goback');
@@ -56,7 +58,12 @@ async function setupBrowser() {
5658
token: TOKEN_2CAPTCHA
5759
}
5860
})
59-
)
61+
);
62+
puppeteer.use(
63+
new CaptchaPlugin({
64+
token: TOKEN_2CAPTCHA,
65+
})
66+
);
6067
}
6168
} catch (error) {
6269
console.error('Failed to proceed 2captcha token:', error);
@@ -113,6 +120,7 @@ app.use(cookieParser());
113120

114121
app.use('/', indexRouter);
115122
app.use('/compose', composeRouter);
123+
app.use('/captcha_solver', captchaRouter);
116124
app.use('/health_check', healthCheckRouter);
117125
app.use('/goto', gotoRouter);
118126
app.use('/back', backRouter);

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "scrapy-puppeteer-service",
3-
"version": "0.3.13",
3+
"version": "0.4.0",
44
"private": true,
55
"scripts": {
66
"start": "node ./bin/www"
@@ -21,6 +21,7 @@
2121
"morgan": "~1.10.0",
2222
"npm-run-all": "^4.1.5",
2323
"puppeteer": "^23.4.0",
24+
"puppeteer-captcha-plugin": "^0.1.0",
2425
"puppeteer-extra": "^3.3.6",
2526
"puppeteer-extra-plugin-recaptcha": "^3.6.8",
2627
"puppeteer-extra-plugin-stealth": "^2.11.2",

routes/captcha_solver.js

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
const express = require('express');
2+
const router = express.Router();
3+
4+
const {captchaSolver} = require('../actions/captcha_solver');
5+
const utils = require('../helpers/utils');
6+
7+
router.post('/', async function (req, res, next) {
8+
if (!process.env.TOKEN_2CAPTCHA) {
9+
res.status(501);
10+
res.send("TOKEN_2CAPTCHA is not provided!");
11+
next();
12+
return;
13+
}
14+
15+
try {
16+
let response = await utils.performAction(req, captchaSolver);
17+
res.header('scrapy-puppeteer-service-context-id', response.contextId);
18+
res.send(response)
19+
} catch (e) {
20+
next(e);
21+
}
22+
});
23+
24+
module.exports = router;

routes/recaptcha_solver.js

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1-
const express = require('express')
2-
const router = express.Router()
1+
const express = require('express');
2+
const router = express.Router();
33

44
const {recaptchaSolver} = require('../actions/recaptcha_solver');
5-
const utils = require('../helpers/utils')
6-
const exceptions = require('../helpers/exceptions');
5+
const utils = require('../helpers/utils');
76

7+
/**
8+
* @deprecated Since version 0.4.0. Use captchaSolver router instead.
9+
*/
810
router.post('/', async function (req, res, next) {
911
if (!process.env.TOKEN_2CAPTCHA) {
1012
res.status(501);

0 commit comments

Comments
 (0)