This commit is contained in:
2024-10-14 00:08:40 +02:00
parent dbfba56f66
commit 1462d52e13
4572 changed files with 2658864 additions and 0 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,742 @@
package Text::Diff;
use 5.006;
use strict;
use warnings;
use Carp qw/ croak confess /;
use Exporter ();
use Algorithm::Diff ();
our $VERSION = '1.44';
our @ISA = qw/ Exporter /;
our @EXPORT = qw/ diff /;
## Hunks are made of ops. An op is the starting index for each
## sequence and the opcode:
use constant A => 0; # Array index before match/discard
use constant B => 1;
use constant OPCODE => 2; # "-", " ", "+"
use constant FLAG => 3; # What to display if not OPCODE "!"
my %internal_styles = (
Unified => undef,
Context => undef,
OldStyle => undef,
Table => undef, ## "internal", but in another module
);
sub diff {
my @seqs = ( shift, shift );
my $options = shift || {};
for my $i ( 0 .. 1 ) {
my $seq = $seqs[$i];
my $type = ref $seq;
while ( $type eq "CODE" ) {
$seqs[$i] = $seq = $seq->( $options );
$type = ref $seq;
}
my $AorB = !$i ? "A" : "B";
if ( $type eq "ARRAY" ) {
## This is most efficient :)
$options->{"OFFSET_$AorB"} = 0
unless defined $options->{"OFFSET_$AorB"};
}
elsif ( $type eq "SCALAR" ) {
$seqs[$i] = [split( /^/m, $$seq )];
$options->{"OFFSET_$AorB"} = 1
unless defined $options->{"OFFSET_$AorB"};
}
elsif ( ! $type ) {
$options->{"OFFSET_$AorB"} = 1
unless defined $options->{"OFFSET_$AorB"};
$options->{"FILENAME_$AorB"} = $seq
unless defined $options->{"FILENAME_$AorB"};
$options->{"MTIME_$AorB"} = (stat($seq))[9]
unless defined $options->{"MTIME_$AorB"};
local $/ = "\n";
open F, "<$seq" or croak "$!: $seq";
$seqs[$i] = [<F>];
close F;
}
elsif ( $type eq "GLOB" || UNIVERSAL::isa( $seq, "IO::Handle" ) ) {
$options->{"OFFSET_$AorB"} = 1
unless defined $options->{"OFFSET_$AorB"};
local $/ = "\n";
$seqs[$i] = [<$seq>];
}
else {
confess "Can't handle input of type ", ref;
}
}
## Config vars
my $output;
my $output_handler = $options->{OUTPUT};
my $type = ref $output_handler ;
if ( ! defined $output_handler ) {
$output = "";
$output_handler = sub { $output .= shift };
}
elsif ( $type eq "CODE" ) {
## No problems, mate.
}
elsif ( $type eq "SCALAR" ) {
my $out_ref = $output_handler;
$output_handler = sub { $$out_ref .= shift };
}
elsif ( $type eq "ARRAY" ) {
my $out_ref = $output_handler;
$output_handler = sub { push @$out_ref, shift };
}
elsif ( $type eq "GLOB" || UNIVERSAL::isa $output_handler, "IO::Handle" ) {
my $output_handle = $output_handler;
$output_handler = sub { print $output_handle shift };
}
else {
croak "Unrecognized output type: $type";
}
my $style = $options->{STYLE};
$style = "Unified" unless defined $options->{STYLE};
$style = "Text::Diff::$style" if exists $internal_styles{$style};
if ( ! $style->can( "hunk" ) ) {
eval "require $style; 1" or die $@;
}
$style = $style->new if ! ref $style && $style->can( "new" );
my $ctx_lines = $options->{CONTEXT};
$ctx_lines = 3 unless defined $ctx_lines;
$ctx_lines = 0 if $style->isa( "Text::Diff::OldStyle" );
my @keygen_args = $options->{KEYGEN_ARGS}
? @{$options->{KEYGEN_ARGS}}
: ();
## State vars
my $diffs = 0; ## Number of discards this hunk
my $ctx = 0; ## Number of " " (ctx_lines) ops pushed after last diff.
my @ops; ## ops (" ", +, -) in this hunk
my $hunks = 0; ## Number of hunks
my $emit_ops = sub {
$output_handler->( $style->file_header( @seqs, $options ) )
unless $hunks++;
$output_handler->( $style->hunk_header( @seqs, @_, $options ) );
$output_handler->( $style->hunk ( @seqs, @_, $options ) );
$output_handler->( $style->hunk_footer( @seqs, @_, $options ) );
};
## We keep 2*ctx_lines so that if a diff occurs
## at 2*ctx_lines we continue to grow the hunk instead
## of emitting diffs and context as we go. We
## need to know the total length of both of the two
## subsequences so the line count can be printed in the
## header.
my $dis_a = sub {push @ops, [@_[0,1],"-"]; ++$diffs ; $ctx = 0 };
my $dis_b = sub {push @ops, [@_[0,1],"+"]; ++$diffs ; $ctx = 0 };
Algorithm::Diff::traverse_sequences(
@seqs,
{
MATCH => sub {
push @ops, [@_[0,1]," "];
if ( $diffs && ++$ctx > $ctx_lines * 2 ) {
$emit_ops->( [ splice @ops, 0, $#ops - $ctx_lines ] );
$ctx = $diffs = 0;
}
## throw away context lines that aren't needed any more
shift @ops if ! $diffs && @ops > $ctx_lines;
},
DISCARD_A => $dis_a,
DISCARD_B => $dis_b,
},
$options->{KEYGEN}, # pass in user arguments for key gen function
@keygen_args,
);
if ( $diffs ) {
$#ops -= $ctx - $ctx_lines if $ctx > $ctx_lines;
$emit_ops->( \@ops );
}
$output_handler->( $style->file_footer( @seqs, $options ) ) if $hunks;
return defined $output ? $output : $hunks;
}
sub _header {
my ( $h ) = @_;
my ( $p1, $fn1, $t1, $p2, $fn2, $t2 ) = @{$h}{
"FILENAME_PREFIX_A",
"FILENAME_A",
"MTIME_A",
"FILENAME_PREFIX_B",
"FILENAME_B",
"MTIME_B"
};
## remember to change Text::Diff::Table if this logic is tweaked.
return "" unless defined $fn1 && defined $fn2;
return join( "",
$p1, " ", $fn1, defined $t1 ? "\t" . localtime $t1 : (), "\n",
$p2, " ", $fn2, defined $t2 ? "\t" . localtime $t2 : (), "\n",
);
}
## _range encapsulates the building of, well, ranges. Turns out there are
## a few nuances.
sub _range {
my ( $ops, $a_or_b, $format ) = @_;
my $start = $ops->[ 0]->[$a_or_b];
my $after = $ops->[-1]->[$a_or_b];
## The sequence indexes in the lines are from *before* the OPCODE is
## executed, so we bump the last index up unless the OP indicates
## it didn't change.
++$after
unless $ops->[-1]->[OPCODE] eq ( $a_or_b == A ? "+" : "-" );
## convert from 0..n index to 1..(n+1) line number. The unless modifier
## handles diffs with no context, where only one file is affected. In this
## case $start == $after indicates an empty range, and the $start must
## not be incremented.
my $empty_range = $start == $after;
++$start unless $empty_range;
return
$start == $after
? $format eq "unified" && $empty_range
? "$start,0"
: $start
: $format eq "unified"
? "$start,".($after-$start+1)
: "$start,$after";
}
sub _op_to_line {
my ( $seqs, $op, $a_or_b, $op_prefixes ) = @_;
my $opcode = $op->[OPCODE];
return () unless defined $op_prefixes->{$opcode};
my $op_sym = defined $op->[FLAG] ? $op->[FLAG] : $opcode;
$op_sym = $op_prefixes->{$op_sym};
return () unless defined $op_sym;
$a_or_b = $op->[OPCODE] ne "+" ? 0 : 1 unless defined $a_or_b;
my @line = ( $op_sym, $seqs->[$a_or_b][$op->[$a_or_b]] );
unless ( $line[1] =~ /(?:\n|\r\n)$/ ) {
$line[1] .= "\n\\ No newline at end of file\n";
}
return @line;
}
SCOPE: {
package Text::Diff::Base;
sub new {
my $proto = shift;
return bless { @_ }, ref $proto || $proto;
}
sub file_header { return "" }
sub hunk_header { return "" }
sub hunk { return "" }
sub hunk_footer { return "" }
sub file_footer { return "" }
}
@Text::Diff::Unified::ISA = qw( Text::Diff::Base );
sub Text::Diff::Unified::file_header {
shift; ## No instance data
my $options = pop ;
_header(
{ FILENAME_PREFIX_A => "---", FILENAME_PREFIX_B => "+++", %$options }
);
}
sub Text::Diff::Unified::hunk_header {
shift; ## No instance data
pop; ## Ignore options
my $ops = pop;
return join( "",
"@@ -",
_range( $ops, A, "unified" ),
" +",
_range( $ops, B, "unified" ),
" @@\n",
);
}
sub Text::Diff::Unified::hunk {
shift; ## No instance data
pop; ## Ignore options
my $ops = pop;
my $prefixes = { "+" => "+", " " => " ", "-" => "-" };
return join "", map _op_to_line( \@_, $_, undef, $prefixes ), @$ops
}
@Text::Diff::Context::ISA = qw( Text::Diff::Base );
sub Text::Diff::Context::file_header {
_header { FILENAME_PREFIX_A=>"***", FILENAME_PREFIX_B=>"---", %{$_[-1]} };
}
sub Text::Diff::Context::hunk_header {
return "***************\n";
}
sub Text::Diff::Context::hunk {
shift; ## No instance data
pop; ## Ignore options
my $ops = pop;
## Leave the sequences in @_[0,1]
my $a_range = _range( $ops, A, "" );
my $b_range = _range( $ops, B, "" );
## Sigh. Gotta make sure that differences that aren't adds/deletions
## get prefixed with "!", and that the old opcodes are removed.
my $after;
for ( my $start = 0; $start <= $#$ops ; $start = $after ) {
## Scan until next difference
$after = $start + 1;
my $opcode = $ops->[$start]->[OPCODE];
next if $opcode eq " ";
my $bang_it;
while ( $after <= $#$ops && $ops->[$after]->[OPCODE] ne " " ) {
$bang_it ||= $ops->[$after]->[OPCODE] ne $opcode;
++$after;
}
if ( $bang_it ) {
for my $i ( $start..($after-1) ) {
$ops->[$i]->[FLAG] = "!";
}
}
}
my $b_prefixes = { "+" => "+ ", " " => " ", "-" => undef, "!" => "! " };
my $a_prefixes = { "+" => undef, " " => " ", "-" => "- ", "!" => "! " };
return join( "",
"*** ", $a_range, " ****\n",
map( _op_to_line( \@_, $_, A, $a_prefixes ), @$ops ),
"--- ", $b_range, " ----\n",
map( _op_to_line( \@_, $_, B, $b_prefixes ), @$ops ),
);
}
@Text::Diff::OldStyle::ISA = qw( Text::Diff::Base );
sub _op {
my $ops = shift;
my $op = $ops->[0]->[OPCODE];
$op = "c" if grep $_->[OPCODE] ne $op, @$ops;
$op = "a" if $op eq "+";
$op = "d" if $op eq "-";
return $op;
}
sub Text::Diff::OldStyle::hunk_header {
shift; ## No instance data
pop; ## ignore options
my $ops = pop;
my $op = _op $ops;
return join "", _range( $ops, A, "" ), $op, _range( $ops, B, "" ), "\n";
}
sub Text::Diff::OldStyle::hunk {
shift; ## No instance data
pop; ## ignore options
my $ops = pop;
## Leave the sequences in @_[0,1]
my $a_prefixes = { "+" => undef, " " => undef, "-" => "< " };
my $b_prefixes = { "+" => "> ", " " => undef, "-" => undef };
my $op = _op $ops;
return join( "",
map( _op_to_line( \@_, $_, A, $a_prefixes ), @$ops ),
$op eq "c" ? "---\n" : (),
map( _op_to_line( \@_, $_, B, $b_prefixes ), @$ops ),
);
}
1;
__END__
=head1 NAME
Text::Diff - Perform diffs on files and record sets
=head1 SYNOPSIS
use Text::Diff;
## Mix and match filenames, strings, file handles, producer subs,
## or arrays of records; returns diff in a string.
## WARNING: can return B<large> diffs for large files.
my $diff = diff "file1.txt", "file2.txt", { STYLE => "Context" };
my $diff = diff \$string1, \$string2, \%options;
my $diff = diff \*FH1, \*FH2;
my $diff = diff \&reader1, \&reader2;
my $diff = diff \@records1, \@records2;
## May also mix input types:
my $diff = diff \@records1, "file_B.txt";
=head1 DESCRIPTION
C<diff()> provides a basic set of services akin to the GNU C<diff> utility. It
is not anywhere near as feature complete as GNU C<diff>, but it is better
integrated with Perl and available on all platforms. It is often faster than
shelling out to a system's C<diff> executable for small files, and generally
slower on larger files.
Relies on L<Algorithm::Diff> for, well, the algorithm. This may not produce
the same exact diff as a system's local C<diff> executable, but it will be a
valid diff and comprehensible by C<patch>. We haven't seen any differences
between Algorithm::Diff's logic and GNU diff's, but we have not examined them
to make sure they are indeed identical.
B<Note>: If you don't want to import the C<diff> function, do one of the
following:
use Text::Diff ();
require Text::Diff;
That's a pretty rare occurence, so C<diff()> is exported by default.
If you pass a filename, but the file can't be read,
then C<diff()> will C<croak>.
=head1 OPTIONS
diff() takes two parameters from which to draw input and a set of
options to control it's output. The options are:
=over
=item FILENAME_A, MTIME_A, FILENAME_B, MTIME_B
The name of the file and the modification time "files"
These are filled in automatically for each file when diff() is passed a
filename, unless a defined value is passed in.
If a filename is not passed in and FILENAME_A and FILENAME_B are not provided
or C<undef>, the header will not be printed.
Unused on C<OldStyle> diffs.
=item OFFSET_A, OFFSET_B
The index of the first line / element. These default to 1 for all
parameter types except ARRAY references, for which the default is 0. This
is because ARRAY references are presumed to be data structures, while the
others are line oriented text.
=item STYLE
"Unified", "Context", "OldStyle", or an object or class reference for a class
providing C<file_header()>, C<hunk_header()>, C<hunk()>, C<hunk_footer()> and
C<file_footer()> methods. The two footer() methods are provided for
overloading only; none of the formats provide them.
Defaults to "Unified" (unlike standard C<diff>, but Unified is what's most
often used in submitting patches and is the most human readable of the three.
If the package indicated by the STYLE has no hunk() method, c<diff()> will
load it automatically (lazy loading). Since all such packages should inherit
from Text::Diff::Base, this should be marvy.
Styles may be specified as class names (C<STYLE =E<gt> 'Foo'>),
in which case they will be C<new()>ed with no parameters,
or as objects (C<STYLE =E<gt> Foo-E<gt>new>).
=item CONTEXT
How many lines before and after each diff to display. Ignored on old-style
diffs. Defaults to 3.
=item OUTPUT
Examples and their equivalent subroutines:
OUTPUT => \*FOOHANDLE, # like: sub { print FOOHANDLE shift() }
OUTPUT => \$output, # like: sub { $output .= shift }
OUTPUT => \@output, # like: sub { push @output, shift }
OUTPUT => sub { $output .= shift },
If no C<OUTPUT> is supplied, returns the diffs in a string. If
C<OUTPUT> is a C<CODE> ref, it will be called once with the (optional)
file header, and once for each hunk body with the text to emit. If
C<OUTPUT> is an L<IO::Handle>, output will be emitted to that handle.
=item FILENAME_PREFIX_A, FILENAME_PREFIX_B
The string to print before the filename in the header. Unused on C<OldStyle>
diffs. Defaults are C<"---">, C<"+++"> for Unified and C<"***">, C<"+++"> for
Context.
=item KEYGEN, KEYGEN_ARGS
These are passed to L<Algorithm::Diff/traverse_sequences>.
=back
B<Note>: if neither C<FILENAME_> option is defined, the header will not be
printed. If at one is present, the other and both MTIME_ options must be
present or "Use of undefined variable" warnings will be generated (except
on C<OldStyle> diffs, which ignores these options).
=head1 Formatting Classes
These functions implement the output formats. They are grouped in to classes
so diff() can use class names to call the correct set of output routines and so
that you may inherit from them easily. There are no constructors or instance
methods for these classes, though subclasses may provide them if need be.
Each class has file_header(), hunk_header(), hunk(), and footer() methods
identical to those documented in the Text::Diff::Unified section. header() is
called before the hunk() is first called, footer() afterwards. The default
footer function is an empty method provided for overloading:
sub footer { return "End of patch\n" }
Some output formats are provided by external modules (which are loaded
automatically), such as L<Text::Diff::Table>. These are
are documented here to keep the documentation simple.
=head2 Text::Diff::Base
Returns "" for all methods (other than C<new()>).
=head2 Text::Diff::Unified
--- A Mon Nov 12 23:49:30 2001
+++ B Mon Nov 12 23:49:30 2001
@@ -2,13 +2,13 @@
2
3
4
-5d
+5a
6
7
8
9
+9a
10
11
-11d
12
13
=over
=item file_header
$s = Text::Diff::Unified->file_header( $options );
Returns a string containing a unified header. The sole parameter is the
options hash passed in to diff(), containing at least:
FILENAME_A => $fn1,
MTIME_A => $mtime1,
FILENAME_B => $fn2,
MTIME_B => $mtime2
May also contain
FILENAME_PREFIX_A => "---",
FILENAME_PREFIX_B => "+++",
to override the default prefixes (default values shown).
=item hunk_header
Text::Diff::Unified->hunk_header( \@ops, $options );
Returns a string containing the output of one hunk of unified diff.
=item Text::Diff::Unified::hunk
Text::Diff::Unified->hunk( \@seq_a, \@seq_b, \@ops, $options );
Returns a string containing the output of one hunk of unified diff.
=back
=head2 Text::Diff::Table
+--+----------------------------------+--+------------------------------+
| |../Test-Differences-0.2/MANIFEST | |../Test-Differences/MANIFEST |
| |Thu Dec 13 15:38:49 2001 | |Sat Dec 15 02:09:44 2001 |
+--+----------------------------------+--+------------------------------+
| | * 1|Changes *
| 1|Differences.pm | 2|Differences.pm |
| 2|MANIFEST | 3|MANIFEST |
| | * 4|MANIFEST.SKIP *
| 3|Makefile.PL | 5|Makefile.PL |
| | * 6|t/00escape.t *
| 4|t/00flatten.t | 7|t/00flatten.t |
| 5|t/01text_vs_data.t | 8|t/01text_vs_data.t |
| 6|t/10test.t | 9|t/10test.t |
+--+----------------------------------+--+------------------------------+
This format also goes to some pains to highlight "invisible" characters on
differing elements by selectively escaping whitespace:
+--+--------------------------+--------------------------+
| |demo_ws_A.txt |demo_ws_B.txt |
| |Fri Dec 21 08:36:32 2001 |Fri Dec 21 08:36:50 2001 |
+--+--------------------------+--------------------------+
| 1|identical |identical |
* 2| spaced in | also spaced in *
* 3|embedded space |embedded tab *
| 4|identical |identical |
* 5| spaced in |\ttabbed in *
* 6|trailing spaces\s\s\n |trailing tabs\t\t\n *
| 7|identical |identical |
* 8|lf line\n |crlf line\r\n *
* 9|embedded ws |embedded\tws *
+--+--------------------------+--------------------------+
See L<Text::Diff::Table> for more details, including how the whitespace
escaping works.
=head2 Text::Diff::Context
*** A Mon Nov 12 23:49:30 2001
--- B Mon Nov 12 23:49:30 2001
***************
*** 2,14 ****
2
3
4
! 5d
6
7
8
9
10
11
- 11d
12
13
--- 2,14 ----
2
3
4
! 5a
6
7
8
9
+ 9a
10
11
12
13
Note: hunk_header() returns only "***************\n".
=head2 Text::Diff::OldStyle
5c5
< 5d
---
> 5a
9a10
> 9a
12d12
< 11d
Note: no file_header().
=head1 LIMITATIONS
Must suck both input files entirely in to memory and store them with a normal
amount of Perlish overhead (one array location) per record. This is implied by
the implementation of Algorithm::Diff, which takes two arrays. If
Algorithm::Diff ever offers an incremental mode, this can be changed (contact
the maintainers of Algorithm::Diff and Text::Diff if you need this; it
shouldn't be too terribly hard to tie arrays in this fashion).
Does not provide most of the more refined GNU diff options: recursive directory
tree scanning, ignoring blank lines / whitespace, etc., etc. These can all be
added as time permits and need arises, many are rather easy; patches quite
welcome.
Uses closures internally, this may lead to leaks on C<perl> versions 5.6.1 and
prior if used many times over a process' life time.
=head1 SEE ALSO
L<Algorithm::Diff> - the underlying implementation of the diff algorithm
used by C<Text::Diff>.
L<YAML::Diff> - find difference between two YAML documents.
L<HTML::Differences> - find difference between two HTML documents.
This uses a more sane approach than L<HTML::Diff>.
L<XML::Diff> - find difference between two XML documents.
L<Array::Diff> - find the differences between two Perl arrays.
L<Hash::Diff> - find the differences between two Perl hashes.
L<Data::Diff> - find difference between two arbitrary data structures.
=head1 REPOSITORY
L<https://github.com/neilbowers/Text-Diff>
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
Barrie Slaymaker E<lt>barries@slaysys.comE<gt>
=head1 COPYRIGHT
Some parts copyright 2009 Adam Kennedy.
Copyright 2001 Barrie Slaymaker. All Rights Reserved.
You may use this under the terms of either the Artistic License or GNU Public
License v 2.0 or greater.
=cut
1;

View File

@@ -0,0 +1,142 @@
package Text::Diff::Config;
use 5.006;
use strict;
use warnings;
our $VERSION = '1.43';
our $Output_Unicode;
BEGIN
{
$Output_Unicode = $ENV{'DIFF_OUTPUT_UNICODE'};
}
1;
__END__
=pod
=head1 NAME
Text::Diff::Config - global configuration for Text::Diff (as a
separate module).
=head1 SYNOPSIS
use Text::Diff::Config;
$Text::Diff::Config::Output_Unicode = 1;
=head1 DESCRIPTION
This module configures Text::Diff and its related modules. Currently it contains
only one global variable $Text::Diff::Config::Output_Unicode which is a boolean
flag, that if set outputs unicode characters as themselves without escaping them
as C< \x{HHHH} > first.
It is initialized to the value of C< $ENV{DIFF_OUTPUT_UNICODE} >, but can be
set to a different value at run-time, including using local.
=head1 AUTHOR
Shlomi Fish, L<http://www.shlomifish.org/> .
=head1 LICENSE
Copyright 2010, Shlomi Fish.
This file is licensed under the MIT/X11 License:
L<http://www.opensource.org/licenses/mit-license.php>.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
=cut
package Text::Diff::Config;
use strict;
use warnings;
use vars qw($Output_Unicode);
BEGIN
{
$Output_Unicode = $ENV{'DIFF_OUTPUT_UNICODE'};
}
1;
__END__
=pod
=head1 NAME
Text::Diff::Config - global configuration for Text::Diff (as a
separate module).
=head1 SYNOPSIS
use Text::Diff::Config;
$Text::Diff::Config::Output_Unicode = 1;
=head1 DESCRIPTION
This module configures Text::Diff and its related modules. Currently it contains
only one global variable $Text::Diff::Config::Output_Unicode which is a boolean
flag, that if set outputs unicode characters as themselves without escaping them
as C< \x{HHHH} > first.
It is initialized to the value of C< $ENV{DIFF_OUTPUT_UNICODE} >, but can be
set to a different value at run-time, including using local.
=head1 AUTHOR
Shlomi Fish, L<http://www.shlomifish.org/> .
=head1 LICENSE
Copyright 2010, Shlomi Fish.
This file is licensed under the MIT/X11 License:
L<http://www.opensource.org/licenses/mit-license.php>.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
=cut

View File

@@ -0,0 +1,374 @@
package Text::Diff::FormattedHTML;
use 5.006;
use strict;
use warnings;
use File::Slurp;
use Algorithm::Diff 'traverse_balanced';
use String::Diff 'diff';
use base 'Exporter';
our @EXPORT = (qw'diff_files diff_strings diff_css');
=head1 NAME
Text::Diff::FormattedHTML - Generate a colorful HTML diff of strings/files.
=head1 VERSION
Version 0.08
=cut
our $VERSION = '0.08';
=head1 SYNOPSIS
use Text::Diff::FormattedHTML;
my $output = diff_files($file1, $file2);
# for strings
my $output = diff_strings( { vertical => 1 }, $file1, $file2);
# as you might want some CSS:
open OUT, ">diff.html";
print OUT "<style type='text/css'>\n", diff_css(), "</style>\n";
print OUT diff_files('fileA', 'fileB');
close OUT;
=head1 DESCRIPTION
Presents in a (nice?) HTML table the difference between two files or strings.
Inspired on GitHub diff view.
=head1 SUBROUTINES
=head2 diff_files
my $html = diff_files("filename1", "filename2");
C<diff_files> and C<diff_strings> support a first optional argument
(an hash reference) where options can be set.
Valid options are:
=over 4
=item C<vertical>
Can be set to a true value, for a more compact table.
=item C<limit_onesided>
Makes tables look nicer when there is a side with too many new lines.
=back
=cut
sub diff_files {
my $settings = {};
$settings = shift if ref($_[0]) eq "HASH";
my ($f1, $f2) = @_;
die "$f1 not available" unless -f $f1;
die "$f2 not available" unless -f $f2;
my @f1 = read_file $f1;
my @f2 = read_file $f2;
_internal_diff($settings, \@f1, \@f2);
}
=head2 diff_strings
my $html = diff_strings("string1", "string2");
Compare strings. First split by newline, and then treat them as file
content (see function above).
=cut
sub diff_strings {
my $settings = {};
$settings = shift if ref($_[0]) eq "HASH";
my ($s1, $s2) = @_;
my @f1 = split /\n/, $s1;
my @f2 = split /\n/, $s2;
_internal_diff($settings, \@f1, \@f2);
}
=head2 diff_css
my $css = diff_css;
Return the default css. You are invited to override it.
=cut
sub diff_css {
return <<'EOCSS';
table.diff {
border-collapse: collapse;
border-top: solid 1px #999999;
border-left: solid 1px #999999;
}
table.diff td {
padding: 2px;
padding-left: 5px;
padding-right: 5px;
border-right: solid 1px #999999;
border-bottom: solid 1px #999999;
}
table.diff td:nth-child(1),
table.diff td:nth-child(2) {
background-color: #deedff;
}
table.diff tr.change,
table.diff tr.disc_a,
table.diff tr.disc_b {
background-color: #ffffdd;
}
table.diff tr.del {
background-color: #ffeeee;
}
table.diff tr.ins {
background-color: #eeffee;
}
table.diff td:nth-child(3),
table.diff td:nth-child(4) {
font-family: monospace;
white-space: pre;
}
table.diff td ins {
padding: 2px;
color: #009900;
background-color: #ccffcc;
text-decoration: none;
font-weight: bold;
}
table.diff td del {
padding: 2px;
color: #990000;
background-color: #ffcccc;
text-decoration: none;
font-weight: bold;
}
EOCSS
}
sub _protect {
my $x = shift;
if ($x) {
$x =~ s/&/&amp;/g;
$x =~ s/</&lt;/g;
$x =~ s/>/&gt;/g;
}
return $x;
}
sub _internal_diff {
my ($settings, $sq1, $sq2) = @_;
my $get = sub {
my ($l, $r) = @_;
$l = $sq1->[$l];
$r = $sq2->[$r];
chomp($l) if $l;
chomp($r) if $r;
return ($l,$r);
};
my ($ll, $rl);
my $line = sub {
sprintf("<tr class='%s'><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>\n", @_);
};
if ($settings->{limit_onesided}) {
# Prevent really long lists where we just go on showing
# all of the values that one side does not have
if($settings->{vertical}){
die "Option: [vertical] is incompatible with [limit_empty]";
}
my ($am_skipping, $num_since_lc, $num_since_rc) = (0, 0, 0);
$line = sub {
my ($class, $ln, $rn, $l, $r) = @_;
my $out = '';
if(
($class ne 'disc_a') &&
($class ne 'disc_b')
){
if($am_skipping){
$out .= "($num_since_lc, $num_since_rc)</td></tr>\n";
}
($am_skipping, $num_since_lc, $num_since_rc) = (0, 0, 0);
}elsif($class ne 'disc_a'){
$num_since_lc++;
}elsif($class ne 'disc_b'){
$num_since_rc++;
}
if(
($num_since_lc > $settings->{limit_onesided}) ||
($num_since_rc > $settings->{limit_onesided})
){
if(!$am_skipping){
$out = '<tr><td colspan=4>';
$am_skipping = 1;
}
$out .= '. ';
return $out;
}
$out .= sprintf("<tr class='%s'><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>\n", @_);
return $out;
};
}
if ($settings->{vertical}) {
$line = sub {
my $out = "";
my ($class, $ln, $rn, $l, $r) = @_;
if ($l eq $r) {
$out .= sprintf("<tr class='%s'><td>%s</td><td>%s</td><td>%s</td></tr>\n",
$class, $ln, $rn, $l);
} else {
$class eq "disc_a" && ($class = "disc_a del");
$class eq "disc_b" && ($class = "disc_b ins");
$class eq "change" && ($class = "change del");
$l and $out .= sprintf("<tr class='%s'><td>%s</td><td></td><td>%s</td></tr>\n",
$class, $ln, $l);
$class eq "change del" && ($class = "change ins");
$r and $out .= sprintf("<tr class='%s'><td></td><td>%s</td><td>%s</td></tr>\n",
$class, $rn, $r);
}
$out
}
}
my $out = "<table class='diff'>\n";
traverse_balanced $sq1, $sq2,
{
MATCH => sub {
my ($l, $r) = $get->(@_);
++$ll; ++$rl;
$out .= $line->('match', $ll, $rl, _protect($l), _protect($r));
},
DISCARD_A => sub {
my ($l, $r) = $get->(@_);
++$ll;
$out .= $line->('disc_a', $ll, '', _protect($l), '');
},
DISCARD_B => sub {
my ($l, $r) = $get->(@_);
++$rl;
$out .= $line->('disc_b', '', $rl, '', _protect($r));
},
CHANGE => sub {
my ($l, $r) = $get->(@_);
my $diff = diff($l, $r,
remove_open => '#del#',
remove_close => '#/del#',
append_open => '#ins#',
append_close => '#/ins#',
);
++$ll; ++$rl;
$out .= $line->('change', $ll, $rl,
_retag(_protect($diff->[0])), _retag(_protect($diff->[1])));
},
};
$out .= "</table>\n";
}
sub _retag {
my $x = shift;
$x =~ s/#(.?(?:del|ins))#/<$1>/g;
return $x;
}
=head1 AUTHOR
Alberto Simoes, C<< <ambs at cpan.org> >>
=head1 BUGS
Please report any bugs or feature requests to
C<bug-text-diff-formattedhtml at rt.cpan.org>, or through the web
interface at
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Text-Diff-FormattedHTML>.
I will be notified, and then you'll automatically be notified of
progress on your bug as I make changes.
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc Text::Diff::FormattedHTML
You can also look for information at:
=over 4
=item * RT: CPAN's request tracker (report bugs here)
L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=Text-Diff-FormattedHTML>
=item * AnnoCPAN: Annotated CPAN documentation
L<http://annocpan.org/dist/Text-Diff-FormattedHTML>
=item * CPAN Ratings
L<http://cpanratings.perl.org/d/Text-Diff-FormattedHTML>
=item * Search CPAN
L<http://search.cpan.org/dist/Text-Diff-FormattedHTML/>
=back
=head1 ACKNOWLEDGEMENTS
=head1 LICENSE AND COPYRIGHT
Copyright 2011 Alberto Simoes.
This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.
See http://dev.perl.org/licenses/ for more information.
=cut
1; # End of Text::Diff::FormattedHTML

View File

@@ -0,0 +1,222 @@
package Text::Diff::HTML;
use strict;
use vars qw(@ISA $VERSION);
use HTML::Entities;
use Text::Diff (); # Just to be safe.
$VERSION = '0.07';
@ISA = qw(Text::Diff::Unified);
sub file_header {
return '<div class="file"><span class="fileheader">'
. encode_entities(shift->SUPER::file_header(@_))
. '</span>';
}
sub hunk_header {
return '<div class="hunk"><span class="hunkheader">'
. encode_entities(shift->SUPER::hunk_header(@_))
. '</span>';
}
sub hunk_footer {
return '<span class="hunkfooter">'
. encode_entities(shift->SUPER::hunk_footer(@_))
. '</span></div>';
}
sub file_footer {
return '<span class="filefooter">'
. encode_entities(shift->SUPER::file_footer(@_))
. '</span></div>';
}
# Each of the items in $seqs is an array reference. The first one has the
# contents of the first file and the second has the contents of the second
# file, all broken into hunks. $ops is an array reference of array references,
# one corresponding to each of the hunks in the sequences.
#
# The contents of each op in $ops tell us what to do with each hunk. Each op
# can have up to four items:
#
# 0: The index of the relevant hunk in the first file sequence.
# 1: The index of the relevant hunk in the second file sequence.
# 2: The opcode for the hunk, either '+', '-', or ' '.
# 3: A flag; not sure what this is, doesn't seem to apply to unified diffs.
#
# So what we do is figure out which op we have and output the relevant span
# element if it is different from the last op. Then we select the hunk from
# second sequence (SEQ_B_IDX) if it's '+' and the first sequence (SEQ_A_IDX)
# otherwise, and then output the opcode and the hunk.
use constant OPCODE => 2; # "-", " ", "+"
use constant SEQ_A_IDX => 0;
use constant SEQ_B_IDX => 1;
my %code_map = (
'+' => [ 'ins' => 'ins' ],
'-' => [ 'del' => 'del' ],
' ' => [ 'span class="ctx"' => 'span' ]
);
sub hunk {
shift;
my $seqs = [ shift, shift ];
my $ops = shift;
return unless @$ops;
# Start the span element for the first opcode.
my $last = $ops->[0][ OPCODE ];
my $hunk = qq{<$code_map{ $last }->[0]>};
# Output each line of the hunk.
while (my $op = shift @$ops) {
my $opcode = $op->[OPCODE];
my $elem = $code_map{ $opcode } or next;
# Close the last span and start a new one for a new opcode.
if ($opcode ne $last) {
$hunk .= "</$code_map{ $last }->[1]><$elem->[0]>";
$last = $opcode;
}
# Output the appropriate line.
my $idx = $opcode ne '+' ? SEQ_A_IDX : SEQ_B_IDX;
$hunk .= encode_entities("$opcode $seqs->[$idx][$op->[$idx]]");
}
return $hunk . "</$code_map{ $last }->[1]>";
}
1;
__END__
##############################################################################
=head1 Name
Text::Diff::HTML - XHTML format for Text::Diff::Unified
=head1 Synopsis
use Text::Diff;
my $diff = diff "file1.txt", "file2.txt", { STYLE => 'Text::Diff::HTML' };
my $diff = diff \$string1, \$string2, { STYLE => 'Text::Diff::HTML' };
my $diff = diff \*FH1, \*FH2, { STYLE => 'Text::Diff::HTML' };
my $diff = diff \&reader1, \&reader2, { STYLE => 'Text::Diff::HTML' };
my $diff = diff \@records1, \@records2, { STYLE => 'Text::Diff::HTML' };
my $diff = diff \@records1, "file.txt", { STYLE => 'Text::Diff::HTML' };
=head1 Description
This class subclasses Text::Diff::Unified, a formatting class provided by the
L<Text::Diff> module, to add XHTML markup to the unified diff format. For
details on the interface of the C<diff()> function, see the L<Text::Diff>
documentation.
In the XHTML formatted by this module, the contents of the diff returned by
C<diff()> are wrapped in a C<< <div> >> element, as is each hunk of the diff.
Within each hunk, all content is properly HTML encoded using
L<HTML::Entities>, and the various sections of the diff are marked up with the
appropriate XHTML elements. The elements used are as follows:
=over
=item * C<< <div class="file"> >>
This element contains the entire contents of the diff "file" returned by
C<diff()>. All of the following elements are subsumed by this one.
=over
=item * C<< <span class="fileheader"> >>
The header section for the files being C<diff>ed, usually something like:
--- in.txt Thu Sep 1 12:51:03 2005
+++ out.txt Thu Sep 1 12:52:12 2005
This element immediately follows the opening "file" C<< <div> >> element.
=item * C<< <div class="hunk"> >>
This element contains a single diff "hunk". Each hunk may contain the
following elements:
=over
=item * C<< <span class="hunkheader"> >>
Header for a diff hunk. The hunk header is usually something like:
@@ -1,5 +1,7 @@
This element immediately follows the opening "hunk" C<< <div> >> element.
=item * C<< <span class="ctx"> >>
Context around the important part of a C<diff> hunk. These are contents that
have I<not> changed between the files being C<diff>ed.
=item * C<< <ins> >>
Inserted content, each line starting with C<+>.
=item * C<< <del> >>
Deleted content, each line starting with C<->.
=item * C<< <span class="hunkfooter"> >>
The footer section of a hunk; contains no contents.
=back
=item * C<< <span class="filefooter"> >>
The footer section of a file; contains no contents.
=back
=back
You may do whatever you like with these elements and classes; I highly
recommend that you style them using CSS. You'll find an example CSS file in
the F<eg> directory in the Text-Diff-HTML distribution. You will also likely
want to wrap the output of your diff in its own element (a C<< <div> >> will
do) styled with "white-space: pre".
=head1 See Also
=over
=item L<Text::Diff>
=item L<Algorithm::Diff>
=back
=head1 Support
This module is stored in an open L<GitHub
repository|http://github.com/theory/text-diff-html/>. Feel free to fork and
contribute!
Please file bug reports via L<GitHub
Issues|http://github.com/theory/text-diff-html/issues/> or by sending mail to
L<bug-Text-Diff-HTML@rt.cpan.org|mailto:bug-Text-Diff-HTML@rt.cpan.org>.
=head1 Author
David E. Wheeler <david@justatheory.com>
=head1 Copyright and License
Copyright (c) 2005-2011 David E. Wheeler. Some Rights Reserved.
This module is free software; you can redistribute it and/or modify it under the
same terms as Perl itself.
=cut

View File

@@ -0,0 +1,429 @@
package Text::Diff::Table;
use 5.006;
use strict;
use warnings;
use Carp;
use Text::Diff::Config;
our $VERSION = '1.43';
our @ISA = qw( Text::Diff::Base Exporter );
our @EXPORT_OK = qw( expand_tabs );
my %escapes = map {
my $c =
$_ eq '"' || $_ eq '$' ? qq{'$_'}
: $_ eq "\\" ? qq{"\\\\"}
: qq{"$_"};
( ord eval $c => $_ )
} (
map( chr, 32..126),
map( sprintf( "\\x%02x", $_ ), ( 0..31, 127..255 ) ),
# map( "\\c$_", "A".."Z"),
"\\t", "\\n", "\\r", "\\f", "\\b", "\\a", "\\e"
## NOTE: "\\\\" is not here because some things are explicitly
## escaped before escape() is called and we don't want to
## double-escape "\". Also, in most texts, leaving "\" more
## readable makes sense.
);
sub expand_tabs($) {
my $s = shift;
my $count = 0;
$s =~ s{(\t)(\t*)|([^\t]+)}{
if ( $1 ) {
my $spaces = " " x ( 8 - $count % 8 + 8 * length $2 );
$count = 0;
$spaces;
}
else {
$count += length $3;
$3;
}
}ge;
return $s;
}
sub trim_trailing_line_ends($) {
my $s = shift;
$s =~ s/[\r\n]+(?!\n)$//;
return $s;
}
sub escape($);
SCOPE: {
## use utf8 if available. don't if not.
my $escaper = <<'EOCODE';
sub escape($) {
use utf8;
join "", map {
my $c = $_;
$_ = ord;
exists $escapes{$_}
? $escapes{$_}
: $Text::Diff::Config::Output_Unicode
? $c
: sprintf( "\\x{%04x}", $_ );
} split //, shift;
}
1;
EOCODE
unless ( eval $escaper ) {
$escaper =~ s/ *use *utf8 *;\n// or die "Can't drop use utf8;";
eval $escaper or die $@;
}
}
sub new {
my $proto = shift;
return bless { @_ }, $proto
}
my $missing_elt = [ "", "" ];
sub hunk {
my $self = shift;
my @seqs = ( shift, shift );
my $ops = shift; ## Leave sequences in @_[0,1]
my $options = shift;
my ( @A, @B );
for ( @$ops ) {
my $opcode = $_->[Text::Diff::OPCODE()];
if ( $opcode eq " " ) {
push @A, $missing_elt while @A < @B;
push @B, $missing_elt while @B < @A;
}
push @A, [ $_->[0] + ( $options->{OFFSET_A} || 0), $seqs[0][$_->[0]] ]
if $opcode eq " " || $opcode eq "-";
push @B, [ $_->[1] + ( $options->{OFFSET_B} || 0), $seqs[1][$_->[1]] ]
if $opcode eq " " || $opcode eq "+";
}
push @A, $missing_elt while @A < @B;
push @B, $missing_elt while @B < @A;
my @elts;
for ( 0..$#A ) {
my ( $A, $B ) = (shift @A, shift @B );
## Do minimal cleaning on identical elts so these look "normal":
## tabs are expanded, trailing newelts removed, etc. For differing
## elts, make invisible characters visible if the invisible characters
## differ.
my $elt_type = $B == $missing_elt ? "A" :
$A == $missing_elt ? "B" :
$A->[1] eq $B->[1] ? "="
: "*";
if ( $elt_type ne "*" ) {
if ( $elt_type eq "=" || $A->[1] =~ /\S/ || $B->[1] =~ /\S/ ) {
$A->[1] = escape trim_trailing_line_ends expand_tabs $A->[1];
$B->[1] = escape trim_trailing_line_ends expand_tabs $B->[1];
}
else {
$A->[1] = escape $A->[1];
$B->[1] = escape $B->[1];
}
}
else {
## not using \z here for backcompat reasons.
$A->[1] =~ /^(\s*?)([^ \t].*?)?(\s*)(?![\n\r])$/s;
my ( $l_ws_A, $body_A, $t_ws_A ) = ( $1, $2, $3 );
$body_A = "" unless defined $body_A;
$B->[1] =~ /^(\s*?)([^ \t].*?)?(\s*)(?![\n\r])$/s;
my ( $l_ws_B, $body_B, $t_ws_B ) = ( $1, $2, $3 );
$body_B = "" unless defined $body_B;
my $added_escapes;
if ( $l_ws_A ne $l_ws_B ) {
## Make leading tabs visible. Other non-' ' chars
## will be dealt with in escape(), but this prevents
## tab expansion from hiding tabs by making them
## look like ' '.
$added_escapes = 1 if $l_ws_A =~ s/\t/\\t/g;
$added_escapes = 1 if $l_ws_B =~ s/\t/\\t/g;
}
if ( $t_ws_A ne $t_ws_B ) {
## Only trailing whitespace gets the \s treatment
## to make it obvious what's going on.
$added_escapes = 1 if $t_ws_A =~ s/ /\\s/g;
$added_escapes = 1 if $t_ws_B =~ s/ /\\s/g;
$added_escapes = 1 if $t_ws_A =~ s/\t/\\t/g;
$added_escapes = 1 if $t_ws_B =~ s/\t/\\t/g;
}
else {
$t_ws_A = $t_ws_B = "";
}
my $do_tab_escape = $added_escapes || do {
my $expanded_A = expand_tabs join( $body_A, $l_ws_A, $t_ws_A );
my $expanded_B = expand_tabs join( $body_B, $l_ws_B, $t_ws_B );
$expanded_A eq $expanded_B;
};
my $do_back_escape = $do_tab_escape || do {
my ( $unescaped_A, $escaped_A,
$unescaped_B, $escaped_B
) =
map
join( "", /(\\.)/g ),
map {
( $_, escape $_ )
}
expand_tabs join( $body_A, $l_ws_A, $t_ws_A ),
expand_tabs join( $body_B, $l_ws_B, $t_ws_B );
$unescaped_A ne $unescaped_B && $escaped_A eq $escaped_B;
};
if ( $do_back_escape ) {
$body_A =~ s/\\/\\\\/g;
$body_B =~ s/\\/\\\\/g;
}
my $line_A = join $body_A, $l_ws_A, $t_ws_A;
my $line_B = join $body_B, $l_ws_B, $t_ws_B;
unless ( $do_tab_escape ) {
$line_A = expand_tabs $line_A;
$line_B = expand_tabs $line_B;
}
$A->[1] = escape $line_A;
$B->[1] = escape $line_B;
}
push @elts, [ @$A, @$B, $elt_type ];
}
push @{$self->{ELTS}}, @elts, ["bar"];
return "";
}
sub _glean_formats {
my $self = shift;
}
sub file_footer {
my $self = shift;
my @seqs = (shift,shift);
my $options = pop;
my @heading_lines;
if ( defined $options->{FILENAME_A} || defined $options->{FILENAME_B} ) {
push @heading_lines, [
map(
{
( "", escape( defined $_ ? $_ : "<undef>" ) );
}
( @{$options}{qw( FILENAME_A FILENAME_B)} )
),
"=",
];
}
if ( defined $options->{MTIME_A} || defined $options->{MTIME_B} ) {
push @heading_lines, [
map( {
( "",
escape(
( defined $_ && length $_ )
? localtime $_
: ""
)
);
}
@{$options}{qw( MTIME_A MTIME_B )}
),
"=",
];
}
if ( defined $options->{INDEX_LABEL} ) {
push @heading_lines, [ "", "", "", "", "=" ] unless @heading_lines;
$heading_lines[-1]->[0] = $heading_lines[-1]->[2] =
$options->{INDEX_LABEL};
}
## Not ushifting on to @{$self->{ELTS}} in case it's really big. Want
## to avoid the overhead.
my $four_column_mode = 0;
for my $cols ( @heading_lines, @{$self->{ELTS}} ) {
next if $cols->[-1] eq "bar";
if ( $cols->[0] ne $cols->[2] ) {
$four_column_mode = 1;
last;
}
}
unless ( $four_column_mode ) {
for my $cols ( @heading_lines, @{$self->{ELTS}} ) {
next if $cols->[-1] eq "bar";
splice @$cols, 2, 1;
}
}
my @w = (0,0,0,0);
for my $cols ( @heading_lines, @{$self->{ELTS}} ) {
next if $cols->[-1] eq "bar";
for my $i (0..($#$cols-1)) {
$w[$i] = length $cols->[$i]
if defined $cols->[$i] && length $cols->[$i] > $w[$i];
}
}
my %fmts = $four_column_mode
? (
"=" => "| %$w[0]s|%-$w[1]s | %$w[2]s|%-$w[3]s |\n",
"A" => "* %$w[0]s|%-$w[1]s * %$w[2]s|%-$w[3]s |\n",
"B" => "| %$w[0]s|%-$w[1]s * %$w[2]s|%-$w[3]s *\n",
"*" => "* %$w[0]s|%-$w[1]s * %$w[2]s|%-$w[3]s *\n",
)
: (
"=" => "| %$w[0]s|%-$w[1]s |%-$w[2]s |\n",
"A" => "* %$w[0]s|%-$w[1]s |%-$w[2]s |\n",
"B" => "| %$w[0]s|%-$w[1]s |%-$w[2]s *\n",
"*" => "* %$w[0]s|%-$w[1]s |%-$w[2]s *\n",
);
my @args = ('', '', '');
push(@args, '') if $four_column_mode;
$fmts{bar} = sprintf $fmts{"="}, @args;
$fmts{bar} =~ s/\S/+/g;
$fmts{bar} =~ s/ /-/g;
# Sometimes the sprintf has too many arguments,
# which results in a warning on Perl 5.021+
# I really wanted to write:
# no warnings 'redundant';
# but that causes a compilation error on older versions of perl
# where the warnings pragma doesn't know about 'redundant'
no warnings;
return join( "",
map {
sprintf( $fmts{$_->[-1]}, @$_ );
} (
["bar"],
@heading_lines,
@heading_lines ? ["bar"] : (),
@{$self->{ELTS}},
),
);
@{$self->{ELTS}} = [];
}
1;
__END__
=pod
=head1 NAME
Text::Diff::Table - Text::Diff plugin to generate "table" format output
=head1 SYNOPSIS
use Text::Diff;
diff \@a, $b, { STYLE => "Table" };
=head1 DESCRIPTION
This is a plugin output formatter for Text::Diff that generates "table" style
diffs:
+--+----------------------------------+--+------------------------------+
| |../Test-Differences-0.2/MANIFEST | |../Test-Differences/MANIFEST |
| |Thu Dec 13 15:38:49 2001 | |Sat Dec 15 02:09:44 2001 |
+--+----------------------------------+--+------------------------------+
| | * 1|Changes *
| 1|Differences.pm | 2|Differences.pm |
| 2|MANIFEST | 3|MANIFEST |
| | * 4|MANIFEST.SKIP *
| 3|Makefile.PL | 5|Makefile.PL |
| | * 6|t/00escape.t *
| 4|t/00flatten.t | 7|t/00flatten.t |
| 5|t/01text_vs_data.t | 8|t/01text_vs_data.t |
| 6|t/10test.t | 9|t/10test.t |
+--+----------------------------------+--+------------------------------+
This format also goes to some pains to highlight "invisible" characters on
differing elements by selectively escaping whitespace. Each element is split
in to three segments (leading whitespace, body, trailing whitespace). If
whitespace differs in a segement, that segment is whitespace escaped.
Here is an example of the selective whitespace.
+--+--------------------------+--------------------------+
| |demo_ws_A.txt |demo_ws_B.txt |
| |Fri Dec 21 08:36:32 2001 |Fri Dec 21 08:36:50 2001 |
+--+--------------------------+--------------------------+
| 1|identical |identical |
* 2| spaced in | also spaced in *
* 3|embedded space |embedded tab *
| 4|identical |identical |
* 5| spaced in |\ttabbed in *
* 6|trailing spaces\s\s\n |trailing tabs\t\t\n *
| 7|identical |identical |
* 8|lf line\n |crlf line\r\n *
* 9|embedded ws |embedded\tws *
+--+--------------------------+--------------------------+
Here's why the lines do or do not have whitespace escaped:
=over
=item lines 1, 4, 7 don't differ, no need.
=item lines 2, 3 differ in non-whitespace, no need.
=item lines 5, 6, 8, 9 all have subtle ws changes.
=back
Whether or not line 3 should have that tab character escaped is a judgement
call; so far I'm choosing not to.
=head1 UNICODE
To output the raw unicode chracters consult the documentation of
L<Text::Diff::Config>. You can set the C<DIFF_OUTPUT_UNICODE> environment
variable to 1 to output it from the command line. For more information,
consult this bug: L<https://rt.cpan.org/Ticket/Display.html?id=54214> .
=head1 LIMITATIONS
Table formatting requires buffering the entire diff in memory in order to
calculate column widths. This format should only be used for smaller
diffs.
Assumes tab stops every 8 characters, as $DIETY intended.
Assumes all character codes >= 127 need to be escaped as hex codes, ie that the
user's terminal is ASCII, and not even "high bit ASCII", capable. This can be
made an option when the need arises.
Assumes that control codes (character codes 0..31) that don't have slash-letter
escapes ("\n", "\r", etc) in Perl are best presented as hex escapes ("\x01")
instead of octal ("\001") or control-code ("\cA") escapes.
=head1 AUTHOR
Barrie Slaymaker E<lt>barries@slaysys.comE<gt>
=head1 LICENSE
Copyright 2001 Barrie Slaymaker, All Rights Reserved.
You may use this software under the terms of the GNU public license, any
version, or the Artistic license.
=cut

View File

@@ -0,0 +1,294 @@
package Text::vFile::asData;
use strict;
use warnings;
no warnings 'uninitialized';
use base qw( Class::Accessor::Chained::Fast );
__PACKAGE__->mk_accessors(qw( preserve_params ));
our $VERSION = '0.08';
=head1 NAME
Text::vFile::asData - parse vFile formatted files into data structures
=head1 SYNOPSIS
use Text::vFile::asData;
open my $fh, "foo.ics"
or die "couldn't open ics: $!";
my $data = Text::vFile::asData->new->parse( $fh );
=head1 DESCRIPTION
Text::vFile::asData reads vFile format files, such as vCard (RFC 2426) and
vCalendar (RFC 2445).
=cut
sub _unwrap_lines {
my $self = shift;
my @lines;
for (@_) {
my $line = $_; # $_ may be readonly
$line =~ s{[\r\n]+$}{}; # lines SHOULD end CRLF
if ($line =~ /^[ \t](.*)/) { # Continuation line (RFC Sect. 4.1)
die "Continuation line, but no preceding line" unless @lines;
$lines[-1] .= $1;
next;
}
push @lines, $line;
}
return @lines;
}
sub parse {
my $self = shift;
my $fh = shift;
return $self->parse_lines( <$fh> );
}
# like Text::ParseWords' parse_line, only C-style so the regex engine doesn't
# blow its stack, and it's also got a $limit like split
# this only took a trainride, so I'm pretty sure there are lurking
# corner cases - when I get a tuit I'll take the Text::ParseWords
# tests and run them through it
sub parse_line {
my ($delim, $keep, $text, $limit) = @_;
my ($current, @parts);
my ($quote, $escaped);
while (length $text) {
if ($text =~ s{^(\\)}{}) {
$current .= $1 if $escaped || $keep;
$escaped = !$escaped;
next;
}
if (!$quote && !$escaped && $text =~ s{^$delim}{}) {
push @parts, $current;
$current = undef;
if (defined $limit && @parts == $limit -1) {
return @parts, $text;
}
}
else {
# pull the character off to take a looksee
$text =~ s{(.)}{};
my $char = $1;
if ($char eq '"' && !$escaped) {
# either it's defined and matches, in which case we
# clear the quote variable, or it's undefined which
# makes this quote an opening quote
$quote = !$quote;
$current .= $char if $keep;
}
else {
$current .= $char;
}
}
$escaped = 0;
}
return @parts, $current;
}
sub parse_lines {
my $self = shift;
my @path;
my $current;
for ($self->_unwrap_lines( @_ )) {
# Ignore leading or trailing blank lines at the top/bottom of the
# input. Not sure about completely blank lines within the input
next if scalar @path == 0 and $_ =~ /^\s*$/;
if (/^BEGIN:(.*)/i) {
push @path, $current;
$current = { type => $1 };
push @{ $path[-1]{objects} }, $current;
next;
}
if (/^END:(.*)/i) {
die "END $1 in $current->{type}"
unless lc $current->{type} eq lc $1;
$current = pop @path;
next;
}
# we'd use Text::ParseWords here, but it likes to segfault.
my ($name, $value) = parse_line( ':', 1, $_, 2);
$value = '' unless defined $value;
my @params = parse_line( ';', 0, $name );
$name = shift @params;
$value = { value => $value };
foreach my $param (@params) {
my ($p_name, $p_value) = split /=/, $param;
push @{ $value->{params} }, { $p_name => $p_value }
if $self->preserve_params;
$value->{param}{ $p_name } = $p_value;
}
push @{ $current->{properties}{ $name } }, $value;
}
# something did a BEGIN but no END - TODO, unwind this nicely as
# it may be more than one level
die "BEGIN $current->{type} without matching END"
if @path;
return $current;
}
# this might not strictly comply
sub generate_lines {
my $self = shift;
my $this = shift;
my @lines;
# XXX all the existence checks are to prevent auto-vivification
# breaking if_diff tests - do we mind, or should the fields have been
# there anyway?
push @lines, "BEGIN:$this->{type}" if exists $this->{type};
if (exists $this->{properties}) {
while (my ($name, $v) = each %{ $this->{properties} } ) {
for my $value (@$v) {
# XXX so we're taking params in preference to param,
# let's be sure to document that when we document this
# method
my $param = join ';', '', map {
my $hash = $_;
map {
"$_" . (defined $hash->{$_} ? "=" . $hash->{$_} : "")
} keys %$hash
} @{ $value->{params} || [ $value->{param} ] };
my $line = "$name$param:$value->{value}";
# wrapping, but done ugly
my @chunks = $line =~ m/(.{1,72})/g;
push @lines, shift @chunks;
push @lines, map { " $_" } @chunks;
}
}
}
if (exists $this->{objects}) {
push @lines, $self->generate_lines( $_ ) for @{ $this->{objects} }
}
push @lines, "END:$this->{type}" if exists $this->{type};
return @lines;
}
1;
__END__
=head1 DATA STRUCTURE
A vFile contains one or more objects, delimited by BEGIN and END tags.
BEGIN:VCARD
...
END:VCARD
Objects may contain sub-objects;
BEGIN:VCALENDAR
...
BEGIN:VEVENT
...
END:VEVENT
...
ENV:VCALENDAR
Each object consists of one or more properties. Each property
consists of a name, zero or more optional parameters, and then a
value. This fragment:
DTSTART;VALUE=DATE:19970317
identifies a property with the name, C<DSTART>, the parameter
C<VALUE>, which has the value C<DATE>, and the property's value is
C<19970317>. Those of you with an XML bent might find this more
recognisable as:
<dtstart value="date">19970317</dtstart>
The return value from the C<parse()> method is a hash ref.
The top level key, C<objects>, refers to an array ref. Each entry in the
array ref is a hash ref with two or three keys.
The value of the first key, C<type>, is a string corresponding to the
type of the object. E.g., C<VCARD>, C<VEVENT>, and so on.
The value of the second key, C<properties>, is a hash ref, with property
names as keys, and an array ref of those property values. It's an array
ref, because some properties may appear within an object multiple times
with different values. For example;
BEGIN:VEVENT
ATTENDEE;CN="Nik Clayton":mailto:nik@FreeBSD.org
ATTENDEE;CN="Richard Clamp":mailto:richardc@unixbeard.net
...
END:VEVENT
Each entry in the array ref is a hash ref with one or two keys.
The first key, C<value>, corresponds to the property's value.
The second key, C<param>, contains a hash ref of the property's
parameters. Keys in this hash ref are the parameter's name, the value
is the parameter's value. (If you enable the C<preserve_params>
option there is an additional key populated, called C<params>. It is
an array ref of hash refs, each hash ref is the parameter's name and
the parameter's value - these are collected in the order they are
encountered to prevent hash collisions as seen in some vCard files)
line.)
The third key in the top level C<objects> hash ref is C<objects>. If
it exists, it indicates that sub-objects were found. The value of
this key is an array ref of sub-objects, with identical keys and
behaviour to that of the top level C<objects> key. This recursive
structure continues, nesting as deeply as there were sub-objects in
the input file.
The C<bin/v2yaml> script that comes with this distribution displays the
format of a vFile as YAML. C<t/03usage.t> has examples of picking out
the relevant information from the data structure.
=head1 AUTHORS
Richard Clamp <richardc@unixbeard.net> and Nik Clayton <nik@FreeBSD.org>
=head1 COPYRIGHT
Copyright 2004, 2010, 2013 Richard Clamp and Nik Clayton. All Rights Reserved.
This program is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.
=head1 CAVEATS
We don't do any decoding of property values, including descaping
C<\,>, we're still undecided as to whether this is a bug.
=head1 BUGS
Aside from the TODO list items, none known.
=head1 SEE ALSO
Text::vFile - parses to objects, doesn't handle nested items
RFC 2426 - vCard specification
RFC 2445 - vCalendar specification
=cut
# Emacs local variables to keep the style consistent
Local Variables:
cperl-indent-level: 4
End: