Browse Source

Suggestion for scraping with JSON

pull/2810/head
Hugo Persson 2 years ago
committed by Thomas Kaul
parent
commit
1e45de1d1c
  1. 4
      apps/api/src/app/admin/admin.controller.ts
  2. 38
      apps/api/src/services/data-provider/manual/manual.service.ts
  3. 3
      apps/api/src/services/symbol-profile/symbol-profile.service.ts
  4. 3
      libs/common/src/lib/helper.ts
  5. 1
      libs/common/src/lib/interfaces/scraper-configuration.interface.ts
  6. 1
      package.json
  7. 42
      yarn.lock

4
apps/api/src/app/admin/admin.controller.ts

@ -227,8 +227,8 @@ export class AdminController {
@Param('symbol') symbol: string
): Promise<{ price: number }> {
try {
const { headers, selector, url } = JSON.parse(data.scraperConfiguration);
const price = await this.manualService.test({ headers, selector, url });
const config = JSON.parse(data.scraperConfiguration);
const price = await this.manualService.test(config);
if (price) {
return { price };

38
apps/api/src/services/data-provider/manual/manual.service.ts

@ -19,6 +19,8 @@ import * as cheerio from 'cheerio';
import { isUUID } from 'class-validator';
import { addDays, format, isBefore } from 'date-fns';
import got, { Headers } from 'got';
import { ScraperConfiguration } from '@ghostfolio/common/interfaces';
import jsonpath from "jsonpath";
@Injectable()
export class ManualService implements DataProviderInterface {
@ -97,7 +99,7 @@ export class ManualService implements DataProviderInterface {
return {};
}
const value = await this.scrape({ headers, selector, url });
const value = await this.scrape(symbolProfile.scraperConfiguration);
return {
[symbol]: {
@ -220,23 +222,11 @@ export class ManualService implements DataProviderInterface {
return { items };
}
public async test(params: any) {
return this.scrape({
headers: params.headers,
selector: params.selector,
url: params.url
});
public async test(config: ScraperConfiguration) {
return this.scrape(config);
}
private async scrape({
headers = {},
selector,
url
}: {
headers?: Headers;
selector: string;
url: string;
}): Promise<number> {
private async scrape(config: ScraperConfiguration): Promise<number> {
try {
const abortController = new AbortController();
@ -244,15 +234,23 @@ export class ManualService implements DataProviderInterface {
abortController.abort();
}, this.configurationService.get('REQUEST_TIMEOUT'));
const { body } = await got(url, {
headers,
const { body } = await got(config.url, {
headers: config.headers as Headers,
// @ts-ignore
signal: abortController.signal
});
if(config.type === 'json') {
const data = JSON.parse(body);
const field = String(jsonpath.query(data, config.selector)[0]);
return extractNumberFromString(field);
}
else{
const $ = cheerio.load(body);
const $ = cheerio.load(body);
return extractNumberFromString($(config.selector).first().text());
}
return extractNumberFromString($(selector).first().text());
} catch (error) {
throw error;
}

3
apps/api/src/services/symbol-profile/symbol-profile.service.ts

@ -203,7 +203,8 @@ export class SymbolProfileService {
headers:
scraperConfiguration.headers as ScraperConfiguration['headers'],
selector: scraperConfiguration.selector as string,
url: scraperConfiguration.url as string
url: scraperConfiguration.url as string,
type: (scraperConfiguration.type ?? "html") as ScraperConfiguration['type']
};
}

3
libs/common/src/lib/helper.ts

@ -124,7 +124,8 @@ export function extractNumberFromString(aString: string): number {
try {
const [numberString] = aString.match(NUMERIC_REGEXP);
return parseFloat(numberString.trim());
} catch {
} catch (err){
console.error(err);
return undefined;
}
}

1
libs/common/src/lib/interfaces/scraper-configuration.interface.ts

@ -3,4 +3,5 @@ export interface ScraperConfiguration {
headers?: { [key: string]: string };
selector: string;
url: string;
type: 'html' | 'json';
}

1
package.json

@ -110,6 +110,7 @@
"helmet": "7.0.0",
"http-status-codes": "2.3.0",
"ionicons": "7.1.0",
"jsonpath": "^1.1.1",
"lodash": "4.17.21",
"marked": "9.1.6",
"ms": "3.0.0-canary.1",

42
yarn.lock

@ -10959,6 +10959,18 @@ escape-string-regexp@^5.0.0:
resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-5.0.0.tgz#4683126b500b61762f2dbebace1806e8be31b1c8"
integrity sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw==
escodegen@^1.8.1:
version "1.14.3"
resolved "https://registry.yarnpkg.com/escodegen/-/escodegen-1.14.3.tgz#4e7b81fba61581dc97582ed78cab7f0e8d63f503"
integrity sha512-qFcX0XJkdg+PB3xjZZG/wKSuT1PnQWx57+TVSjIMmILd2yC/6ByYElPwJnslDsuWuSAp4AwJGumarAAmJch5Kw==
dependencies:
esprima "^4.0.1"
estraverse "^4.2.0"
esutils "^2.0.2"
optionator "^0.8.1"
optionalDependencies:
source-map "~0.6.1"
escodegen@^2.0.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/escodegen/-/escodegen-2.1.0.tgz#ba93bbb7a43986d29d6041f99f5262da773e2e17"
@ -11187,6 +11199,11 @@ espree@^9.0.0, espree@^9.4.0:
acorn-jsx "^5.3.2"
eslint-visitor-keys "^3.4.1"
esprima@1.2.2:
version "1.2.2"
resolved "https://registry.yarnpkg.com/esprima/-/esprima-1.2.2.tgz#76a0fd66fcfe154fd292667dc264019750b1657b"
integrity sha512-+JpPZam9w5DuJ3Q67SqsMGtiHKENSMRVoxvArfJZK01/BfLEObtZ6orJa/MtoGNR/rfMgp5837T41PAmTwAv/A==
esprima@^4.0.0, esprima@^4.0.1, esprima@~4.0.0:
version "4.0.1"
resolved "https://registry.yarnpkg.com/esprima/-/esprima-4.0.1.tgz#13b04cdb3e6c5d19df91ab6987a8695619b0aa71"
@ -11206,7 +11223,7 @@ esrecurse@^4.1.0, esrecurse@^4.3.0:
dependencies:
estraverse "^5.2.0"
estraverse@^4.1.1:
estraverse@^4.1.1, estraverse@^4.2.0:
version "4.3.0"
resolved "https://registry.yarnpkg.com/estraverse/-/estraverse-4.3.0.tgz#398ad3f3c5a24948be7725e83d11a7de28cdbd1d"
integrity sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==
@ -14245,6 +14262,15 @@ jsonparse@^1.3.1:
resolved "https://registry.yarnpkg.com/jsonparse/-/jsonparse-1.3.1.tgz#3f4dae4a91fac315f71062f8521cc239f1366280"
integrity sha512-POQXvpdL69+CluYsillJ7SUhKvytYjW9vG/GKpnf+xP8UWgYEM/RaMzHHofbALDiKbbP1W8UEYmgGl39WkPZsg==
jsonpath@^1.1.1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/jsonpath/-/jsonpath-1.1.1.tgz#0ca1ed8fb65bb3309248cc9d5466d12d5b0b9901"
integrity sha512-l6Cg7jRpixfbgoWgkrl77dgEj8RPvND0wMH6TwQmi9Qs4TFfS9u5cUFnbeKTwj5ga5Y3BTGGNI28k117LJ009w==
dependencies:
esprima "1.2.2"
static-eval "2.0.2"
underscore "1.12.1"
jsonwebtoken@9.0.0:
version "9.0.0"
resolved "https://registry.yarnpkg.com/jsonwebtoken/-/jsonwebtoken-9.0.0.tgz#d0faf9ba1cc3a56255fe49c0961a67e520c1926d"
@ -16051,7 +16077,7 @@ opn@5.3.0:
dependencies:
is-wsl "^1.1.0"
optionator@^0.8.2:
optionator@^0.8.1, optionator@^0.8.2:
version "0.8.3"
resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.8.3.tgz#84fa1d036fe9d3c7e21d99884b601167ec8fb495"
integrity sha512-+IW9pACdk3XWmmTXG8m3upGUJst5XRGzxMRjXzAuJ1XnIFNvfhjjIuYkDvysnPQ7qzqVzLt78BCruntqRhWQbA==
@ -18446,6 +18472,13 @@ standard-as-callback@^2.1.0:
resolved "https://registry.yarnpkg.com/standard-as-callback/-/standard-as-callback-2.1.0.tgz#8953fc05359868a77b5b9739a665c5977bb7df45"
integrity sha512-qoRRSyROncaz1z0mvYqIE4lCd9p2R90i6GxW3uZv5ucSu8tU7B5HXUP1gG8pVZsYNVaXjk8ClXHPttLyxAL48A==
static-eval@2.0.2:
version "2.0.2"
resolved "https://registry.yarnpkg.com/static-eval/-/static-eval-2.0.2.tgz#2d1759306b1befa688938454c546b7871f806a42"
integrity sha512-N/D219Hcr2bPjLxPiV+TQE++Tsmrady7TqAJugLy7Xk1EumfDWS/f5dtBbkRCGE7wKKXuYockQoj8Rm2/pVKyg==
dependencies:
escodegen "^1.8.1"
static-extend@^0.1.1:
version "0.1.2"
resolved "https://registry.yarnpkg.com/static-extend/-/static-extend-0.1.2.tgz#60809c39cbff55337226fd5e0b520f341f1fb5c6"
@ -19380,6 +19413,11 @@ unbox-primitive@^1.0.2:
has-symbols "^1.0.3"
which-boxed-primitive "^1.0.2"
underscore@1.12.1:
version "1.12.1"
resolved "https://registry.yarnpkg.com/underscore/-/underscore-1.12.1.tgz#7bb8cc9b3d397e201cf8553336d262544ead829e"
integrity sha512-hEQt0+ZLDVUMhebKxL4x1BTtDY7bavVofhZ9KZ4aI26X9SRaE+Y3m83XUL1UP2jn8ynjndwCCpEHdUG+9pP1Tw==
undici-types@~5.26.4:
version "5.26.5"
resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-5.26.5.tgz#bcd539893d00b56e964fd2657a4866b221a65617"

Loading…
Cancel
Save