Reputation: 3159
I'm trying to write a node script that identifies unused translation strings in my React project.
First, I want to get a list of all the translations that are used. To do this, I am getting a list of each JS file in my /src/components
folder and then reading the file.
My translation strings look like this: t('some.translation.key')
, so basically, I want to identify each instance of t('...')
using RegEx and then get the key in between those parentheses (i.e. "some.translation.key"). From there, I should be able to compare the keys to the ones in my translation JSON file and remove the ones that aren't being used.
unused.js
const path = require('path');
const fs = require('fs');
let files = [];
// https://stackoverflow.com/a/63111390/2262604
function getFiles(dir) {
fs.readdirSync(dir).forEach(file => {
const absolute = path.join(dir, file);
if (fs.statSync(absolute).isDirectory()) {
getFiles(absolute);
} else {
if (absolute.includes('.js')) {
files.push(absolute);
}
}
});
return files;
}
function getTranslations() {
const pathComponents = path.join(__dirname, '../../src/components');
// get all js files in components directory
const files = getFiles(pathComponents);
const translationKeys = [];
// for each js file
for(let i = 0; i < files.length; i++) {
// read contents of file
const contents = fs.readFileSync(files[i]).toString();
// search contents for all instances of t('...')
// and get the key between the parentheses
}
}
getTranslations();
How can I use RegEx to find all instances of t('...')
in contents
and then extract the ...
string between the parentheses?
Upvotes: 1
Views: 920
Reputation: 29062
Yes, you could use a regular expression:
for (const [, str] of contents.matchAll(/\bt\(['"](.*?)['"]\)/g)) {
console.log('t called with string argument:', str)
}
However, with regular expressions the problem will be that they don't understand the code and would cause trouble with matching strings that contain ( )
or \'
themselves, have issues with concatenated strings or extra whitespace, etc., and you'd then also get the contents literally, including possible escape sequences.
A more robust way would be to create an AST (abstract syntax tree) from the code and look for calls to t
in it.
A popular AST parser would be acorn. There is also the supplementary module acorn-walk that helps walking through the whole syntax tree without building your own recursive algorithm.
import acorn from 'acorn'
import walk from 'acorn-walk'
// Example
const contents = "function a () { if (123) { t('hello') } return t('world') }"
// The arguments to acorn.parse would have to be adjusted based
// on what kind of syntax your files can use.
const result = acorn.parse(contents, {ecmaVersion: 2020})
walk.full(result, node => {
if (node.type === 'CallExpression' && node.callee.type === 'Identifier' && node.callee.name === 't') {
if (node.arguments.length === 1 && node.arguments[0].type === 'Literal' && typeof node.arguments[0].value === 'string') {
// This is for the case `t` is called with a single string
// literal as argument.
console.log('t called with string argument:', node.arguments[0].value)
} else {
// In case you have things like template literals as well,
// or multiple arguments, you'd need to handle them here too.
console.log('t called with unknown arguments:', node.arguments)
}
}
})
// Will output:
// t called with string argument: hello
// t called with string argument: world
Upvotes: 2