Reputation: 6302
In a project we have text files looking like this:
mv A, R3
mv R2, B
mv R1, R3
mv B, R4
add A, R1
add B, R1
add R1, R2
add R3, R3
add R21, X
add R12, Y
mv X, R2
I need to replace the strings according to the following, but I am looking for a more general solution.
R1 => R2
R2 => R3
R3 => R1
R12 => R21
R21 => R12
I know I could do it in Perl, the replace() function in the following code, but the real application is written in Java, so the solution needs to be in Java as well.
#!/usr/bin/perl
use strict;
use warnings;
use File::Slurp qw(read_file write_file);
my %map = (
R1 => 'R2',
R2 => 'R3',
R3 => 'R1',
R12 => 'R21',
R21 => 'R12',
);
replace(\%map, \@ARGV);
sub replace {
my ($map, $files) = @_;
# Create R12|R21|R1|R2|R3
# making sure R12 is before R1
my $regex = join "|",
sort { length($b) <=> length($a) }
keys %$map;
my $ts = time;
foreach my $file (@$files) {
my $data = read_file($file);
$data =~ s/\b($regex)\b/$map{$1}/g;
rename $file, "$file.$ts"; # backup with current timestamp
write_file( $file, $data);
}
}
Your help for the Java implementation would be appreciated.
Upvotes: 5
Views: 2598
Reputation: 990
My suggestion would be replacing the strings while reading from the file itself You can use RandomAccessFile. While reading from the file character by character, You can actually check for the pattern and then do the replacement then and there itself. And then you can write all the content at once into the file. I think this will save you more time.
Upvotes: 0
Reputation: 104178
You can use a HashMap:
Map<String, String> map = new HashMap<String, String>();
map.put("R1", "R2");
map.put("R2", "R3");
for(String key: map.keySet()) {
str.replaceAll(key, map.get(key));
}
replaceAll also handles regular expressions.
EDIT: The above solution, as many have pointed out, doesn't work because it doesn't handle cyclic replacements. So this is my second approach:
public class Replacement {
private String newS;
private String old;
public Replacement(String old, String newS) {
this.newS = newS;
this.old = old;
}
public String getOld() {
return old;
}
public String getNew() {
return newS;
}
}
SortedMap<Integer, Replacement> map = new TreeMap<Integer, Replacement>();
map.put(new Integer(1), new Replacement("R2", "R3"));
map.put(new Integer(2), new Replacement("R1", "R2"));
for(Integer key: map.keySet()) {
str.replaceAll(map.get(key).getOld(), map.get(key).getNew());
}
This works provided that you order the replacements properly and that you guard yourself against cyclic replacements. Some replacements are impossible:
R1 -> R2
R2 -> R3
R3 -> R1
You must use some 'temp' variables for these:
R1 -> R@1
R2 -> R@3
R3 -> R1
R@(\d{1}) -> R\1
You could write a library that it would do all these for you.
Upvotes: -2
Reputation: 75222
Here's a less verbose way to do this in one pass, using Matcher's lower-level API: appendReplacement()
and appendTail()
.
import java.util.*;
import java.util.regex.*;
public class Test
{
public static void main(String[] args) throws Exception
{
String inputString
= "mv A, R3\n"
+ "mv R2, B\n"
+ "mv R1, R3\n"
+ "mv B, R4\n"
+ "add A, R1\n"
+ "add B, R1\n"
+ "add R1, R2\n"
+ "add R3, R3\n"
+ "add R21, X\n"
+ "add R12, Y\n"
+ "mv X, R2"
;
System.out.println(inputString);
System.out.println();
System.out.println(doReplace(inputString));
}
public static String doReplace(String str)
{
Map<String, String> map = new HashMap<String, String>()
{{
put("R1", "R2");
put("R2", "R3");
put("R3", "R1");
put("R12", "R21");
put("R21", "R12");
}};
Pattern p = Pattern.compile("\\bR\\d\\d?\\b");
Matcher m = p.matcher(str);
StringBuffer sb = new StringBuffer();
while (m.find())
{
String repl = map.get(m.group());
if (repl != null)
{
m.appendReplacement(sb, "");
sb.append(repl);
}
}
m.appendTail(sb);
return sb.toString();
}
}
Note that appendReplacement()
processes the replacement string to replace $n sequences with text from capture groups, which we don't want in this case. To avoid that, I pass it an empty string, then use StringBuffer's append()
method instead.
Elliott Hughes has published a pre-packaged implementation of this technique here. (He tends to throw in references to other utility classes he's written, so you may want to delete the tests in his main()
method before you compile it.)
Upvotes: 0
Reputation: 29854
I've actually had to use this sort of algorithm several times in the past two weeks. So here it is the world's second-most verbose language...
import java.util.HashMap;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
/*
R1 => R2
R2 => R3
R3 => R1
R12 => R21
R21 => R12
*/
String inputString
= "mv A, R3\n"
+ "mv R2, B\n"
+ "mv R1, R3\n"
+ "mv B, R4\n"
+ "add A, R1\n"
+ "add B, R1\n"
+ "add R1, R2\n"
+ "add R3, R3\n"
+ "add R21, X\n"
+ "add R12, Y\n"
+ "mv X, R2"
;
System.out.println( "inputString = \"" + inputString + "\"" );
HashMap h = new HashMap();
h.put( "R1", "R2" );
h.put( "R2", "R3" );
h.put( "R3", "R1" );
h.put( "R12", "R21" );
h.put( "R21", "R12" );
Pattern p = Pattern.compile( "\\b(R(?:12?|21?|3))\\b");
Matcher m = p.matcher( inputString );
StringBuffer sbuff = new StringBuffer();
int lastEnd = 0;
while ( m.find()) {
int mstart = m.start();
if ( lastEnd < mstart ) {
sbuff.append( inputString.substring( lastEnd, mstart ));
}
String key = m.group( 1 );
String value = (String)h.get( key );
sbuff.append( value );
lastEnd = m.end();
}
if ( lastEnd < inputString.length() ) {
sbuff.append( inputString.substring( lastEnd ));
}
System.out.println( "sbuff = \"" + sbuff + "\"" );
This can be Java-ified by these classes:
import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
interface StringReplacer {
public CharSequence getReplacement( Matcher matcher );
}
class Replacementifier {
static Comparator keyComparator = new Comparator() {
public int compare( Object o1, Object o2 ) {
String s1 = (String)o1;
String s2 = (String)o2;
int diff = s1.length() - s2.length();
return diff != 0 ? diff : s1.compareTo( s2 );
}
};
Map replaceMap = null;
public Replacementifier( Map aMap ) {
if ( aMap != null ) {
setReplacements( aMap );
}
}
public setReplacements( Map aMap ) {
replaceMap = aMap;
}
private static String createKeyExpression( Map m ) {
Set set = new TreeSet( keyComparator );
set.addAll( m.keySet());
Iterator sit = set.iterator();
StringBuffer sb = new StringBuffer( "(" + sit.next());
while ( sit.hasNext()) {
sb.append( "|" ).append( sit.next());
}
sb.append( ")" );
return sb.toString();
}
public String replace( Pattern pattern, CharSequence input, StringReplacer replaceFilter ) {
StringBuffer output = new StringBuffer();
Matcher matcher = pattern.matcher( inputString );
int lastEnd = 0;
while ( matcher.find()) {
int mstart = matcher.start();
if ( lastEnd < mstart ) {
output.append( inputString.substring( lastEnd, mstart ));
}
CharSequence cs = replaceFilter.getReplacement( matcher );
if ( cs != null ) {
output.append( cs );
}
lastEnd = matcher.end();
}
if ( lastEnd < inputString.length() ) {
sbuff.append( inputString.substring( lastEnd ));
}
}
public String replace( Map rMap, CharSequence input ) {
// pre-condition
if ( rMap == null && replaceMap == null ) return input;
Map repMap = rMap != null ? rMap : replaceMap;
Pattern pattern
= Pattern.compile( createKeyExpression( repMap ))
;
StringReplacer replacer = new StringReplacer() {
public CharSequence getReplacement( Matcher matcher ) {
String key = matcher.group( 1 );
return (String)repMap.get( key );
}
};
return replace( pattern, input, replacer );
}
}
Upvotes: 5
Reputation:
The perl solution has an advantage of replacing all strings in one shot, sort of "transactionally". If you don't have the same option in Java (and I can't think of a way make it happen), you need to be careful of replacing R1=>R2, then R2=>R3. In that case, both R1 and R2 end up being replaced with R3.
Upvotes: 2