lineend
A hardcore line ending adjustment script. If you’ve ever seen extraneous ^Ms or ^Ms where newlines should be, that’s a line ending mismatch.
#!/usr/bin/perl
#############################################################################
# lineend.pl - Adjust line endings between the various system formats
# The latest version of this can be found at:
# http://encryptio.com/code/lineend
#############################################################################
# Copyright (c) 2007-2008 Chris Kastorff
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * The name Chris Kastorff may not be used to endorse or promote
# products derived from this software without specific prior written
# permission.
###############################################################################
use warnings;
use strict;
our $VERSION = 0.11;
$| = 1;
my $help = <<EOF;
Usage: $0 target file [ file ... ]
where "target" is the target line ending type, and is one of the following:
crlf, dos, win, windows
lf, unix
mac, cr
lfcr
This program automatically figures out the line ending of a file and
replaces the file's line endings with the type given.
EOF
sub help { print $help; exit 1; }
help if @ARGV < 2;
my $dir = lc shift @ARGV;
my $le; # target line ending
if ( $dir eq "dos" or $dir eq "win" or $dir eq "crlf" or $dir eq "windows" ) {
$le = "\x0d\x0a";
} elsif ( $dir eq "unix" or $dir eq "lf" ) {
$le = "\x0a";
} elsif ( $dir eq "mac" or $dir eq "cr" ) {
$le = "\x0d";
} elsif ( $dir eq "lfcr" ) {
$le = "\x0a\x0d";
} else {
help;
}
for my $filename ( @ARGV ) {
print $filename.": ";
if ( -d $filename ) {
print "Directory, skipping.\n";
next;
}
open my $lf, "<", $filename
or die "Couldn't open: $!\n";
binmode $lf; # ugh, windows
# read up to 8k to look for a line ending
my $buf = '';
if ( read($lf, $buf, 8192) <= 4 ) {
# 4 bytes or shorter
print "Too short, skipping.\n";
next;
}
# the first line ending we see, CRLF, LFCR, LF, or CR, will be used
# as the line ending for all of the file.
my ($initial) = ($buf =~ /(\x0d\x0a|\x0a\x0d|\x0a|\x0d)/);
if ( ! defined $initial ) {
print "Couldn't figure out the line ending, skipping.\n";
next;
}
# tell the user what we found.
print format_ending($initial)." ";
# don't rewrite the file if the line ending is the same
if ( $initial eq $le ) {
print "- same as target, skipping\n";
next;
}
seek $lf, 0, 0; # back to the start
# find an unused temporary filename
my $temp;
do {
$temp = "$filename.lineend.$$.".int(rand 100000);
} while -e $temp;
# open the temporary file for writing
open my $sf, ">", "$temp"
or die "Couldn't open $temp for writing: $!\n";
binmode $sf;
# we read lines by $/, and chomp $/ from the end - set $/ to the input
local $/ = $initial;
while ( my $line = <$lf> ) { # splits by $/
chomp $line; # removes $/ from the end
print $sf $line.$le; # adds the new line ending
}
# close before moving the files to flush the buffer.
close $sf;
close $lf;
# overwrite the original with the temporary file
rename $temp, $filename
or die "Couldn't rename $temp to $filename: $!\n";
# tell the user we've converted the file
print "-> ".format_ending($le)."\n";
}
sub format_ending {
my ($end) = @_;
$end =~ s/\x0a/LF/g;
$end =~ s/\x0d/CR/g;
return $end;
}
__END__
Version history:
0.11:
Will now skip directories
0.1: initial release
#############################################################################
# lineend.pl - Adjust line endings between the various system formats
# The latest version of this can be found at:
# http://encryptio.com/code/lineend
#############################################################################
# Copyright (c) 2007-2008 Chris Kastorff
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions, and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * The name Chris Kastorff may not be used to endorse or promote
# products derived from this software without specific prior written
# permission.
###############################################################################
use warnings;
use strict;
our $VERSION = 0.11;
$| = 1;
my $help = <<EOF;
Usage: $0 target file [ file ... ]
where "target" is the target line ending type, and is one of the following:
crlf, dos, win, windows
lf, unix
mac, cr
lfcr
This program automatically figures out the line ending of a file and
replaces the file's line endings with the type given.
EOF
sub help { print $help; exit 1; }
help if @ARGV < 2;
my $dir = lc shift @ARGV;
my $le; # target line ending
if ( $dir eq "dos" or $dir eq "win" or $dir eq "crlf" or $dir eq "windows" ) {
$le = "\x0d\x0a";
} elsif ( $dir eq "unix" or $dir eq "lf" ) {
$le = "\x0a";
} elsif ( $dir eq "mac" or $dir eq "cr" ) {
$le = "\x0d";
} elsif ( $dir eq "lfcr" ) {
$le = "\x0a\x0d";
} else {
help;
}
for my $filename ( @ARGV ) {
print $filename.": ";
if ( -d $filename ) {
print "Directory, skipping.\n";
next;
}
open my $lf, "<", $filename
or die "Couldn't open: $!\n";
binmode $lf; # ugh, windows
# read up to 8k to look for a line ending
my $buf = '';
if ( read($lf, $buf, 8192) <= 4 ) {
# 4 bytes or shorter
print "Too short, skipping.\n";
next;
}
# the first line ending we see, CRLF, LFCR, LF, or CR, will be used
# as the line ending for all of the file.
my ($initial) = ($buf =~ /(\x0d\x0a|\x0a\x0d|\x0a|\x0d)/);
if ( ! defined $initial ) {
print "Couldn't figure out the line ending, skipping.\n";
next;
}
# tell the user what we found.
print format_ending($initial)." ";
# don't rewrite the file if the line ending is the same
if ( $initial eq $le ) {
print "- same as target, skipping\n";
next;
}
seek $lf, 0, 0; # back to the start
# find an unused temporary filename
my $temp;
do {
$temp = "$filename.lineend.$$.".int(rand 100000);
} while -e $temp;
# open the temporary file for writing
open my $sf, ">", "$temp"
or die "Couldn't open $temp for writing: $!\n";
binmode $sf;
# we read lines by $/, and chomp $/ from the end - set $/ to the input
local $/ = $initial;
while ( my $line = <$lf> ) { # splits by $/
chomp $line; # removes $/ from the end
print $sf $line.$le; # adds the new line ending
}
# close before moving the files to flush the buffer.
close $sf;
close $lf;
# overwrite the original with the temporary file
rename $temp, $filename
or die "Couldn't rename $temp to $filename: $!\n";
# tell the user we've converted the file
print "-> ".format_ending($le)."\n";
}
sub format_ending {
my ($end) = @_;
$end =~ s/\x0a/LF/g;
$end =~ s/\x0d/CR/g;
return $end;
}
__END__
Version history:
0.11:
Will now skip directories
0.1: initial release