encryptio.com

This sentence is very meta.

lineend

A hardcore line ending adjustment script. If youve ever seen extraneous ^Ms or ^Ms where newlines should be, thats a line ending mismatch.

Download

#!/usr/bin/perl
#############################################################################
# lineend.pl - Adjust line endings between the various system formats
# The latest version of this can be found at:
# http://encryptio.com/code/lineend

#############################################################################
# Copyright (c) 2007-2008 Chris Kastorff
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions, and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * The name Chris Kastorff may not be used to endorse or promote
#       products derived from this software without specific prior written
#       permission.
###############################################################################

use warnings;
use strict;

our $VERSION = 0.11;

$| = 1;

my $help = <<EOF;
Usage:
$0 target file [ file ... ]
  where "target" is the target line ending type, and is one of the following:
    crlf, dos, win, windows
    lf, unix
    mac, cr
    lfcr

This program automatically figures out the line ending of a file and
replaces the file's line endings with the type given.

EOF


sub help { print $help; exit 1; }

help if @ARGV < 2;

my $dir = lc shift @ARGV;
my $le; # target line ending

if ( $dir eq "dos" or $dir eq "win" or $dir eq "crlf" or $dir eq "windows" ) {
    $le = "\x0d\x0a";
} elsif ( $dir eq "unix" or $dir eq "lf" ) {
    $le = "\x0a";
} elsif ( $dir eq "mac" or $dir eq "cr" ) {
    $le = "\x0d";
} elsif ( $dir eq "lfcr" ) {
    $le = "\x0a\x0d";
} else {
    help;
}

for my $filename ( @ARGV ) {
    print $filename.": ";

    if ( -d $filename ) {
        print "Directory, skipping.\n";
        next;
    }
    
    open my $lf, "<", $filename
        or die "Couldn't open: $!\n";
    binmode $lf; # ugh, windows
    
    # read up to 8k to look for a line ending
    my $buf = '';
    if ( read($lf, $buf, 8192) <= 4 ) {
        # 4 bytes or shorter
        print "Too short, skipping.\n";
        next;
    }
    
    # the first line ending we see, CRLF, LFCR, LF, or CR, will be used
    # as the line ending for all of the file.
    my ($initial) = ($buf =~ /(\x0d\x0a|\x0a\x0d|\x0a|\x0d)/);
    if ( ! defined $initial ) {
        print "Couldn't figure out the line ending, skipping.\n";
        next;
    }

    # tell the user what we found.
    print format_ending($initial)." ";

    # don't rewrite the file if the line ending is the same
    if ( $initial eq $le ) {
        print "- same as target, skipping\n";
        next;
    }
    
    seek $lf, 0, 0; # back to the start
    
    # find an unused temporary filename
    my $temp;
    do {
        $temp = "$filename.lineend.$$.".int(rand 100000);
    } while -e $temp;
    
    # open the temporary file for writing
    open my $sf, ">", "$temp"
        or die "Couldn't open $temp for writing: $!\n";
    binmode $sf;
    
    # we read lines by $/, and chomp $/ from the end - set $/ to the input
    local $/ = $initial;
    
    while ( my $line = <$lf> ) { # splits by $/
        chomp $line; # removes $/ from the end
        print $sf $line.$le; # adds the new line ending
    }
    
    # close before moving the files to flush the buffer.
    close $sf;
    close $lf;
    
    # overwrite the original with the temporary file
    rename $temp, $filename
        or die "Couldn't rename $temp to $filename: $!\n";

    # tell the user we've converted the file
    print "-> ".format_ending($le)."\n";
}

sub format_ending {
    my ($end) = @_;
    $end =~ s/\x0a/LF/g;
    $end =~ s/\x0d/CR/g;
    return $end;
}

__END__

Version history:

0.11:
    Will now skip directories

0.1: initial release