#!/usr/bin/perl use strict; use warnings; use Tk; use Tk::Scale; my $mw = Tk::MainWindow->new(-title=> "EMA Logs von Duplikaten bereinigen"); my $answer = ""; my $button1 = $mw->Button( -text => "Datei öffnen", -command => \&show_file_dialog, )->pack(-side => 'left',); my $label = $mw->Label( -text => 'Zu verarbeitende Datei auswählen.', )->pack(-side => 'left',); my $button2 = $mw->Button( -text => "Start", -command => \&convert, )->pack(-side => 'right',); $mw->MainLoop(); sub show_file_dialog { my @ext = ( ["Text Files", [qw/.csv .txt/]], ["All files", [qw/*/]], ); $answer = $mw->getOpenFile( -filetypes => \@ext, ); my $out; $out=$answer; $out =~ s/(.*)\.([a-z0-9]*)$/$1\.conv\.$2/; my $txt = "Verarbeite $answer nach $out"; $txt =~ s/\//\\/g; $label->configure(-text => "$txt"); } sub convert { my $file=$answer; return if ($file eq ""); my $out = $file; $out =~ s/(.*)\.([a-z0-9]*)$/$1\.conv\.$2/; open IN, "<$file"; my @INA=; my $count=@INA; close IN; open OUT, ">$out"; my $liner = 0; my $linew = 0; my @uniques; my $found; my $text; foreach (@INA) { chomp; $liner++; $text = sprintf "[%d/%d] %05.2f%%, %d geschrieben", $liner, $count, $liner/$count*100, $linew; $label->configure(-text => "$text"); $mw->update if ($liner%10); my ($id,$account,$direction,$doctype,$subtype,$size,$from,$to,$cc,$subject,$senttime,$receivedtime,$archivetime,$messageid) = split /;/,$_; my $unique = $senttime . $messageid; my $rein = 0; if ($messageid eq "\"\"") { $rein = 1; } else { $found = 0; foreach (@uniques) { if ($_ eq $unique) { $found = 1; } } if ($found == 1) { $rein = 0; } else { push @uniques, $unique; $rein = 1; } } if ($rein == 1) { print OUT "$id;$account;$direction;$doctype;$subtype;$size;$from;$to;$cc;$subject;$senttime;$receivedtime;$archivetime;$messageid\n"; $linew++; } my $c=@uniques; shift @uniques if ($c>100); } $text = sprintf "[%d/%d] %05.2f%%, %d geschrieben", $liner, $count, $liner/$count*100, $linew; $label->configure(-text => "$text"); $mw->update; close OUT; }