Browse Source

Improve extractNumberFromString() for international number formats (#2843)

* Set up test

* Add support for international formatted numbers

* Expose locale in scraper configuration

* Update changelog
pull/2865/head
Thomas Kaul 1 year ago
committed by GitHub
parent
commit
005890d785
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      CHANGELOG.md
  2. 6
      apps/api/src/app/info/info.service.ts
  3. 16
      apps/api/src/services/data-provider/manual/manual.service.ts
  4. 1
      apps/api/src/services/symbol-profile/symbol-profile.service.ts
  5. 39
      libs/common/src/lib/helper.spec.ts
  6. 19
      libs/common/src/lib/helper.ts
  7. 1
      libs/common/src/lib/interfaces/scraper-configuration.interface.ts
  8. 5
      package.json
  9. 14
      yarn.lock

2
CHANGELOG.md

@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Broken down the performance into asset and currency on the analysis page (experimental)
- Added support for international formatted numbers in the scraper configuration
- Added the attribute `locale` to the scraper configuration to parse the number
### Changed

6
apps/api/src/app/info/info.service.ts

@ -195,11 +195,11 @@ export class InfoService {
const $ = cheerio.load(body);
return extractNumberFromString(
$(
return extractNumberFromString({
value: $(
`a[href="/ghostfolio/ghostfolio/graphs/contributors"] .Counter`
).text()
);
});
} catch (error) {
Logger.error(error, 'InfoService - GitHub');

16
apps/api/src/services/data-provider/manual/manual.service.ts

@ -236,6 +236,7 @@ export class ManualService implements DataProviderInterface {
abortController.abort();
}, this.configurationService.get('REQUEST_TIMEOUT'));
let locale = scraperConfiguration.locale;
const { body, headers } = await got(scraperConfiguration.url, {
headers: scraperConfiguration.headers as Headers,
// @ts-ignore
@ -248,13 +249,20 @@ export class ManualService implements DataProviderInterface {
jsonpath.query(data, scraperConfiguration.selector)[0]
);
return extractNumberFromString(value);
return extractNumberFromString({ locale, value });
} else {
const $ = cheerio.load(body);
return extractNumberFromString(
$(scraperConfiguration.selector).first().text()
);
if (!locale) {
try {
locale = $('html').attr('lang');
} catch {}
}
return extractNumberFromString({
locale,
value: $(scraperConfiguration.selector).first().text()
});
}
} catch (error) {
throw error;

1
apps/api/src/services/symbol-profile/symbol-profile.service.ts

@ -202,6 +202,7 @@ export class SymbolProfileService {
defaultMarketPrice: scraperConfiguration.defaultMarketPrice as number,
headers:
scraperConfiguration.headers as ScraperConfiguration['headers'],
locale: scraperConfiguration.locale as string,
selector: scraperConfiguration.selector as string,
url: scraperConfiguration.url as string
};

39
libs/common/src/lib/helper.spec.ts

@ -0,0 +1,39 @@
import { extractNumberFromString } from '@ghostfolio/common/helper';
describe('Helper', () => {
describe('Extract number from string', () => {
it('Get decimal number', async () => {
expect(extractNumberFromString({ value: '999.99' })).toEqual(999.99);
});
it('Get decimal number (with spaces)', async () => {
expect(extractNumberFromString({ value: ' 999.99 ' })).toEqual(999.99);
});
it('Get decimal number (with currency)', async () => {
expect(extractNumberFromString({ value: '999.99 CHF' })).toEqual(999.99);
});
it('Get decimal number (comma notation)', async () => {
expect(
extractNumberFromString({ locale: 'de-DE', value: '999,99' })
).toEqual(999.99);
});
it('Get decimal number with group (dot notation)', async () => {
expect(
extractNumberFromString({ locale: 'de-CH', value: '99’999.99' })
).toEqual(99999.99);
});
it('Get decimal number with group (comma notation)', async () => {
expect(
extractNumberFromString({ locale: 'de-DE', value: '99.999,99' })
).toEqual(99999.99);
});
it('Not a number', async () => {
expect(extractNumberFromString({ value: 'X' })).toEqual(NaN);
});
});
});

19
libs/common/src/lib/helper.ts

@ -1,4 +1,5 @@
import * as currencies from '@dinero.js/currencies';
import { NumberParser } from '@internationalized/number';
import { DataSource, MarketData } from '@prisma/client';
import Big from 'big.js';
import {
@ -20,8 +21,6 @@ export const DATE_FORMAT = 'yyyy-MM-dd';
export const DATE_FORMAT_MONTHLY = 'MMMM yyyy';
export const DATE_FORMAT_YEARLY = 'yyyy';
const NUMERIC_REGEXP = /[-]{0,1}[\d]*[.,]{0,1}[\d]+/g;
export function calculateBenchmarkTrend({
days,
historicalData
@ -120,10 +119,20 @@ export function encodeDataSource(aDataSource: DataSource) {
return undefined;
}
export function extractNumberFromString(aString: string): number {
export function extractNumberFromString({
locale = 'en-US',
value
}: {
locale?: string;
value: string;
}): number {
try {
const [numberString] = aString.match(NUMERIC_REGEXP);
return parseFloat(numberString.trim());
// Remove non-numeric characters (excluding international formatting characters)
const numericValue = value.replace(/[^\d.,'’\s]/g, '');
let parser = new NumberParser(locale);
return parser.parse(numericValue);
} catch {
return undefined;
}

1
libs/common/src/lib/interfaces/scraper-configuration.interface.ts

@ -1,6 +1,7 @@
export interface ScraperConfiguration {
defaultMarketPrice?: number;
headers?: { [key: string]: string };
locale?: string;
selector: string;
url: string;
}

5
package.json

@ -44,7 +44,9 @@
"start:production": "yarn database:migrate && yarn database:seed && node main",
"start:server": "nx run api:serve --watch",
"start:storybook": "nx run ui:storybook",
"test": "npx dotenv-cli -e .env.example -- nx test",
"test": "yarn test:api && yarn test:common",
"test:api": "npx dotenv-cli -e .env.example -- nx test api",
"test:common": "npx dotenv-cli -e .env.example -- nx test common",
"test:single": "nx run api:test --test-file portfolio-calculator-novn-buy-and-sell.spec.ts",
"ts-node": "ts-node",
"update": "nx migrate latest",
@ -71,6 +73,7 @@
"@dfinity/identity": "0.15.7",
"@dfinity/principal": "0.15.7",
"@dinero.js/currencies": "2.0.0-alpha.8",
"@internationalized/number": "3.5.0",
"@nestjs/bull": "10.0.1",
"@nestjs/cache-manager": "2.1.0",
"@nestjs/common": "10.1.3",

14
yarn.lock

@ -3063,6 +3063,13 @@
resolved "https://registry.yarnpkg.com/@humanwhocodes/object-schema/-/object-schema-2.0.1.tgz#e5211452df060fa8522b55c7b3c0c4d1981cb044"
integrity sha512-dvuCeX5fC9dXgJn9t+X5atfmgQAzUOWqS1254Gh0m6i8wKd10ebXkfNKiRK+1GWi/yTvvLDHpoxLr0xxxeslWw==
"@internationalized/number@3.5.0":
version "3.5.0"
resolved "https://registry.yarnpkg.com/@internationalized/number/-/number-3.5.0.tgz#9de6018424b441a6545f209afa286ad7df4a2906"
integrity sha512-ZY1BW8HT9WKYvaubbuqXbbDdHhOUMfE2zHHFJeTppid0S+pc8HtdIxFxaYMsGjCb4UsF+MEJ4n2TfU7iHnUK8w==
dependencies:
"@swc/helpers" "^0.5.0"
"@ioredis/commands@^1.1.1":
version "1.2.0"
resolved "https://registry.yarnpkg.com/@ioredis/commands/-/commands-1.2.0.tgz#6d61b3097470af1fdbbe622795b8921d42018e11"
@ -6201,6 +6208,13 @@
resolved "https://registry.yarnpkg.com/@swc/counter/-/counter-0.1.2.tgz#bf06d0770e47c6f1102270b744e17b934586985e"
integrity sha512-9F4ys4C74eSTEUNndnER3VJ15oru2NumfQxS8geE+f3eB5xvfxpWyqE5XlVnxb/R14uoXi6SLbBwwiDSkv+XEw==
"@swc/helpers@^0.5.0":
version "0.5.3"
resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.3.tgz#98c6da1e196f5f08f977658b80d6bd941b5f294f"
integrity sha512-FaruWX6KdudYloq1AHD/4nU+UsMTdNE8CKyrseXWEcgjDAbvkwJg2QGPAnfIJLIWsjZOSPLOAykK6fuYp4vp4A==
dependencies:
tslib "^2.4.0"
"@swc/types@^0.1.5":
version "0.1.5"
resolved "https://registry.yarnpkg.com/@swc/types/-/types-0.1.5.tgz#043b731d4f56a79b4897a3de1af35e75d56bc63a"

Loading…
Cancel
Save