#! /usr/bin/perl
use strict;
use warnings;
use autodie;
use Getopt::Std;
use File::Find::Rule;
my $_DIR = "";
my $_EXT = "";
my $_FILE = "";
my $_IN = "";
my $_OUT = "";
# By default, ignore boundary issues.
my $_REGEX_BORDER = '()';
# A sane word boundary consists of alphanumeric characters, plus underscore (of
# course, the start and end of line are also valid boundaries!).
my $_REGEX_ALPHANUM_BORDER = '(^|[^a-zA-Z0-9_]|$)';
sub rewrite_file {
my $_FILE = shift;
# You can still read from $in after the unlink, the underlying
# data in $_FILE will remain until the filehandle is closed.
# The unlink ensures $in and $out will point at different data.
open my $in, "<", $_FILE;
unlink $_FILE;
# This creates a new file with the same name but points at
# different data.
open my $out, ">", $_FILE;
return ($in, $out);
}
# Brief: Return 0 is everything ok, return nonzero otherwise.
#
# Param: hashtabble for options like "-i foo". In this case, the hash would
# contain a key 'i', with corresponding value 'foo'.
sub check_args {
my $params = shift @_;
if (not (exists $params->{'i'} and exists $params->{'o'})) {
print "Text to be searched (-i) and replaced (-o) are mandatory. See -h for help.\n";
return 1;
}
if ((not exists $params->{'f'}) and (not exists $params->{'d'})) {
print "Either a file name (-f) or a directory path (-d) must be provided. See -h for help.\n";
return 1;
}
if (exists $params->{'f'} and exists $params->{'d'}) {
print "File name (-f) and a directory path (-d) cannot be provided simultaneously.\n See -h for help.\n";
return 1;
}
if (exists $params->{'d'} and not exists $params->{'t'}) {
print "With -d option, the extension of files to be searched/replaced\n in the given directory is mandatory.\n See -h for help.\n";
return 1;
}
if (exists $params->{'f'} and exists $params->{'t'}) {
print "Options for file (-f) and file extension (-t) cannot be given simultaneously.\n See -h for help.\n";
return 1;
}
return 0;
}
sub process_args {
my $params = shift @_;
if (exists $params->{'B'}) {
$_REGEX_BORDER = $_REGEX_ALPHANUM_BORDER;
}
if (exists $params->{'i'}) {
$_IN = $params->{'i'};
} else {
print "ERROR: Argument of -i should exist, but does not...\n";
exit(1);
}
if (exists $params->{'o'}) {
$_OUT = $params->{'o'};
} else {
print "ERROR: Argument of -o should exist, but does not...\n";
exit(1);
}
if (exists $params->{'f'}) {
$_FILE = $params->{'f'};
}
if (exists $params->{'t'}) {
$_EXT = $params->{'t'};
}
if (exists $params->{'d'}) {
$_DIR = $params->{'d'};
}
}
sub process_file {
if (not -e $_FILE) {
print "ERROR: file $_FILE does not exist!\n";
exit(1);
}
if (not -r $_FILE) {
print "ERROR: file $_FILE is not readable!\n";
exit(1);
}
if (not -w $_FILE) {
print "ERROR: file $_FILE is not writable!\n";
exit(1);
}
my $count = process_file_do_count($_FILE);
print "WARNING: about to change $count line(s) in 1 file. Hit <Enter> to continue... (Ctrl-C to cancel)";
<>;
process_file_inner($_FILE);
}
sub process_file_do_count {
my $_file = shift;
open my $tmpfh, "<", $_file;
my $count = 0;
while(my $line = <$tmpfh>) {
# For an explanation about the regex, see function process_file_inner().
$count++ if ($line =~ m/$_REGEX_BORDER\Q$_IN\E$_REGEX_BORDER/);
}
return $count;
}
# Brief: Actually do the content replacing in the file.
sub process_file_inner {
my $file = shift;
my($in, $out) = rewrite_file($file);
my @modifiedLines = ();
my $lineNum = 1;
my $lineOrig = "";
while(my $line = <$in>) {
$lineOrig = $line;
# The \Q...\E means the string $_IN is interpreted literally (things like
# \n are ignored.)
# The $_REGEX_BORDER is set to $_REGEX_ALPHANUM_BORDER when the -B is
# given (cf. help()). In this case (-B given), replacing "bar" with "baz"
# will do so only when "bar" is a single word -- so a word like "foobar"
# will NOT be modified.
# If the -B handle is not given, then $_REGEX_BORDER is null, and "bar"
# is replaced with "baz", even inside other words like "foobar".
$line =~ s/$_REGEX_BORDER\Q$_IN\E$_REGEX_BORDER/$1$_OUT$2/g;
if ($line ne $lineOrig) {
push(@modifiedLines, $lineNum);
}
print $out $line;
$lineNum++;
} # while(my $line = <$in>)
if (@modifiedLines == 1) {
print "File $file: modified line $modifiedLines[0].\n";
} elsif (@modifiedLines > 1) {
print "File $file: modified lines " . join(", ", @modifiedLines) . ".\n";
}
}
sub process_dir {
if (not -e $_DIR) {
print "ERROR: directory $_DIR does not exist!\n";
exit(1);
}
if (not -r $_DIR) {
print "ERROR: directory $_DIR cannot be read! This means its contents cannot be accessed...\n";
exit(1);
}
if (not -w $_DIR) {
print "ERROR: directory $_DIR cannot be written! This means its contents cannot be modified...\n";
exit(1);
}
if (not -x $_DIR) {
print "ERROR: directory $_DIR cannot be entered into! This means its contents cannot be modified...\n";
exit(1);
}
my @files = File::Find::Rule->file()->name( "*.$_EXT" )->in( $_DIR );
my $number_of_lines_to_be_changed = 0;
my $count_changes = 0;
my $count_files = 0;
my $errors = 0;
foreach my $file (@files) {
if (not -e $file) {
print "ERROR: file $file does not exist!\n";
$errors = 1;
last;
}
if (not -r $file) {
print "ERROR: file $file is not readable!\n";
$errors = 1;
last;
}
if (not -w $file) {
print "ERROR: file $file is not writable!\n";
$errors = 1;
last;
}
$number_of_lines_to_be_changed = process_file_do_count($file);
if ($number_of_lines_to_be_changed != 0) {
$count_files += 1;
$count_changes += $number_of_lines_to_be_changed;
}
}
if ($errors != 0) {
print "Exiting due to previous errors...\n";
exit(1);
}
print "WARNING: about to change $count_changes line(s) in $count_files file(s). Hit <Enter> to continue... (Ctrl-C to cancel)";
<>;
foreach my $file (@files) {
process_file_inner($file);
}
}
sub help {
print "Script can be used in two different ways:\n\n\$ /path/to/script -i '<original text>' -o '<replacement text>' -f <file name>\n";
print "\$ /path/to/script -i '<original text>' -o '<replacement text>' -d <dir name> -t <file extension>\n\n";
print "To avoid involuntary Shell escaping, ALWAYS ENCLOSE ORIGINAL AND REPLACEMENT TEXTS IN SINGLE QUOTES!! That way, both search and replacement texts can be entered verbatim, even if they contain spaces! Furthermore, before changing anything, a message will be shown asking for confirmation.\n\n";
print "Arguments can be given in any order. If the -B flag is present, then -i 'foo' -o 'fii' will replace 'foo' with 'fii', but only when 'foo' is a separate word -- 'foobar' will NOT be replaced with 'fiibar'!\n\nFor more information, see https://randomwalk.eu/software/search-and-replace/\n\n";
}
sub main {
my %opts;
getopts('i:o:t:f:d:Bh', \%opts);
if (exists $opts{'h'}) {
help();
exit(0);
}
if(check_args(\%opts) == 0) {
process_args(\%opts);
}
if ($_FILE ne "") {
process_file();
exit(0);
} elsif ($_DIR ne "") {
if ($_EXT eq "") {
print "ERROR: -d given without -t (extension)!\n";
exit(1);
}
process_dir();
exit(0);
}
}
main();