Browse Source

fix: trackinsight data gathering

Make the requests to trackinsight via puppeteer or curl-impersonate. Each option has its own trade-offs:

- Puppeteer: This method requires executing Chromium,
  which introduces significant overhead. However, it works the 100% of
  the times.

- curl-impersonate: This is more lightweight and works efficiently for most cases.
  The main issue arises when AWS WAF detects excessive requests from the same IP, triggering a JavaScript challenge.
  If this happens, you're either forced to wait for a timeout or change your IP address.
  This method may still be suitable for users with a small number of ETFs, as they are less likely to trigger WAF detection.
pull/6357/head
Pablo Fraile Alonso 1 month ago
parent
commit
9e2b67e789
No known key found for this signature in database GPG Key ID: ACAB771EDAA2DA2C
  1. 28
      Dockerfile
  2. 5
      apps/api/src/services/configuration/configuration.service.ts
  3. 93
      apps/api/src/services/data-provider/data-enhancer/trackinsight/trackinsight.service.ts
  4. 3
      apps/api/src/services/interfaces/environment.interface.ts
  5. 1697
      package-lock.json
  6. 2
      package.json

28
Dockerfile

@ -53,10 +53,30 @@ FROM node:22-slim
LABEL org.opencontainers.image.source="https://github.com/ghostfolio/ghostfolio"
ENV NODE_ENV=production
RUN apt-get update && apt-get install -y --no-install-suggests \
curl \
openssl \
&& rm -rf /var/lib/apt/lists/*
# install Chrome + puppeteer deps
RUN apt-get update && apt-get install -y --no-install-recommends \
chromium \
fonts-liberation \
libasound2 \
libatk-bridge2.0-0 \
libatk1.0-0 \
libcups2 \
libdbus-1-3 \
libdrm2 \
libgbm1 \
libgtk-3-0 \
libnspr4 \
libnss3 \
libx11-xcb1 \
libxcomposite1 \
libxdamage1 \
libxrandr2 \
xdg-utils \
curl \
openssl \
&& apt-get clean && rm -rf /var/lib/apt/lists/*
RUN npx puppeteer browsers install chrome
COPY --chown=node:node --from=builder /ghostfolio/dist/apps /ghostfolio/apps/
COPY --chown=node:node ./docker/entrypoint.sh /ghostfolio/

5
apps/api/src/services/configuration/configuration.service.ts

@ -106,7 +106,10 @@ export class ConfigurationService {
TWITTER_ACCESS_TOKEN: str({ default: 'dummyAccessToken' }),
TWITTER_ACCESS_TOKEN_SECRET: str({ default: 'dummyAccessTokenSecret' }),
TWITTER_API_KEY: str({ default: 'dummyApiKey' }),
TWITTER_API_SECRET: str({ default: 'dummyApiSecret' })
TWITTER_API_SECRET: str({ default: 'dummyApiSecret' }),
TRACK_INSIGHT_TRY_CURL_IMPERSONATE: bool({ default: false }),
TRACK_INSIGHT_TRY_PUPPETEER: bool({ default: true }),
TRACK_INSIGHT_CHROMIUM_PATH: str({ default: '/usr/bin/chromium' })
});
}

93
apps/api/src/services/data-provider/data-enhancer/trackinsight/trackinsight.service.ts

@ -7,12 +7,16 @@ import { Sector } from '@ghostfolio/common/interfaces/sector.interface';
import { Injectable, Logger } from '@nestjs/common';
import { SymbolProfile } from '@prisma/client';
import { countries } from 'countries-list';
import { Browser, impersonate } from 'node-libcurl-ja3';
import { launch } from 'puppeteer';
@Injectable()
export class TrackinsightDataEnhancerService implements DataEnhancerInterface {
private static baseUrl = 'https://www.trackinsight.com/data-api';
private static countriesMapping = {
'Russian Federation': 'Russia'
'Russian Federation': 'Russia',
USA: 'United States',
'Republic of Korea': 'South Korea'
};
private static holdingsWeightTreshold = 0.85;
private static sectorsMapping = {
@ -21,11 +25,54 @@ export class TrackinsightDataEnhancerService implements DataEnhancerInterface {
'Health Care': 'Healthcare',
'Information Technology': 'Technology'
};
private static curly = impersonate(Browser.Chrome);
public constructor(
private readonly configurationService: ConfigurationService
) {}
private async fetchFromTrackInsight({
url,
requestTimeout
}: {
url: string;
requestTimeout?: number;
}): Promise<any> {
const useImpersonate = this.configurationService.get(
'TRACK_INSIGHT_TRY_CURL_IMPERSONATE'
);
const usePuppeteer = this.configurationService.get(
'TRACK_INSIGHT_TRY_PUPPETEER'
);
if (usePuppeteer) {
const browserPath = this.configurationService.get(
'TRACK_INSIGHT_CHROMIUM_PATH'
);
const browser = await launch({
args: ['--no-sandbox', '--disable-setuid-sandbox'],
executablePath: browserPath
});
const page = await browser.newPage();
await page.setJavaScriptEnabled(true);
await page.goto(url, {
waitUntil: 'networkidle0'
});
const data = await page.evaluate(() => {
return document.body.innerText;
});
await browser.close();
return JSON.parse(data);
}
if (useImpersonate) {
return TrackinsightDataEnhancerService.curly
.get(url)
.then((res) => res.data);
}
return fetch(url, { signal: AbortSignal.timeout(requestTimeout) }).then(
(res) => res.json()
);
}
public async enhance({
requestTimeout = this.configurationService.get('REQUEST_TIMEOUT'),
response,
@ -60,16 +107,11 @@ export class TrackinsightDataEnhancerService implements DataEnhancerInterface {
return response;
}
const profile = await fetch(
`${TrackinsightDataEnhancerService.baseUrl}/funds/${trackinsightSymbol}.json`,
{
signal: AbortSignal.timeout(requestTimeout)
}
)
.then((res) => res.json())
.catch(() => {
return {};
});
const profile = await this.fetchFromTrackInsight({
url: `${TrackinsightDataEnhancerService.baseUrl}/funds/${trackinsightSymbol}.json`
}).catch(() => {
return {};
});
const cusip = profile?.cusip;
@ -83,16 +125,11 @@ export class TrackinsightDataEnhancerService implements DataEnhancerInterface {
response.isin = isin;
}
const holdings = await fetch(
`${TrackinsightDataEnhancerService.baseUrl}/holdings/${trackinsightSymbol}.json`,
{
signal: AbortSignal.timeout(requestTimeout)
}
)
.then((res) => res.json())
.catch(() => {
return {};
});
const holdings = await this.fetchFromTrackInsight({
url: `${TrackinsightDataEnhancerService.baseUrl}/holdings/${trackinsightSymbol}.json`
}).catch(() => {
return {};
});
if (
holdings?.weight < TrackinsightDataEnhancerService.holdingsWeightTreshold
@ -182,19 +219,14 @@ export class TrackinsightDataEnhancerService implements DataEnhancerInterface {
requestTimeout: number;
symbol: string;
}) {
return fetch(
`https://www.trackinsight.com/search-api/search_v2/${symbol}/_/ticker/default/0/3`,
{
signal: AbortSignal.timeout(requestTimeout)
}
)
.then((res) => res.json())
return await this.fetchFromTrackInsight({
url: `https://www.trackinsight.com/search-api/search_v2/${symbol}/_/ticker/default/0/3`,
requestTimeout
})
.then((jsonRes) => {
if (
jsonRes['results']?.['count'] === 1 ||
// Allow exact match
jsonRes['results']?.['docs']?.[0]?.['ticker'] === symbol ||
// Allow EXCHANGE:SYMBOL
jsonRes['results']?.['docs']?.[0]?.['ticker']?.endsWith(`:${symbol}`)
) {
return jsonRes['results']['docs'][0]['ticker'];
@ -207,7 +239,6 @@ export class TrackinsightDataEnhancerService implements DataEnhancerInterface {
`Failed to search Trackinsight symbol for ${symbol} (${message})`,
'TrackinsightDataEnhancerService'
);
return undefined;
});
}

3
apps/api/src/services/interfaces/environment.interface.ts

@ -53,6 +53,9 @@ export interface Environment extends CleanedEnvAccessors {
REQUEST_TIMEOUT: number;
ROOT_URL: string;
STRIPE_SECRET_KEY: string;
TRACK_INSIGHT_TRY_CURL_IMPERSONATE: boolean;
TRACK_INSIGHT_TRY_PUPPETEER: boolean;
TRACK_INSIGHT_CHROMIUM_PATH: string;
TWITTER_ACCESS_TOKEN: string;
TWITTER_ACCESS_TOKEN_SECRET: string;
TWITTER_API_KEY: string;

1697
package-lock.json

File diff suppressed because it is too large

2
package.json

@ -121,6 +121,7 @@
"ngx-device-detector": "11.0.0",
"ngx-markdown": "21.1.0",
"ngx-skeleton-loader": "12.0.0",
"node-libcurl-ja3": "^5.2.2",
"open-color": "1.9.1",
"papaparse": "5.3.1",
"passport": "0.7.0",
@ -128,6 +129,7 @@
"passport-headerapikey": "1.2.2",
"passport-jwt": "4.0.1",
"passport-openidconnect": "0.1.2",
"puppeteer": "^24.37.4",
"reflect-metadata": "0.2.2",
"rxjs": "7.8.1",
"stripe": "20.3.0",

Loading…
Cancel
Save