0.6.9
This commit is contained in:
378
bin/lib/File/RandomAccess.pm
Normal file
378
bin/lib/File/RandomAccess.pm
Normal file
@@ -0,0 +1,378 @@
|
||||
#------------------------------------------------------------------------------
|
||||
# File: RandomAccess.pm
|
||||
#
|
||||
# Description: Buffer to support random access reading of sequential file
|
||||
#
|
||||
# Revisions: 02/11/2004 - P. Harvey Created
|
||||
# 02/20/2004 - P. Harvey Added flag to disable SeekTest in new()
|
||||
# 11/18/2004 - P. Harvey Fixed bug with seek relative to end of file
|
||||
# 01/02/2005 - P. Harvey Added DEBUG code
|
||||
# 01/09/2006 - P. Harvey Fixed bug in ReadLine() when using
|
||||
# multi-character EOL sequences
|
||||
# 02/20/2006 - P. Harvey Fixed bug where seek past end of file could
|
||||
# generate "substr outside string" warning
|
||||
# 06/10/2006 - P. Harvey Decreased $CHUNK_SIZE from 64k to 8k
|
||||
# 11/23/2006 - P. Harvey Limit reads to < 0x80000000 bytes
|
||||
# 11/26/2008 - P. Harvey Fixed bug in ReadLine when reading from a
|
||||
# scalar with a multi-character newline
|
||||
# 01/24/2009 - PH Protect against reading too much at once
|
||||
#
|
||||
# Notes: Calls the normal file i/o routines unless SeekTest() fails, in
|
||||
# which case the file is buffered in memory to allow random access.
|
||||
# SeekTest() is called automatically when the object is created
|
||||
# unless specified.
|
||||
#
|
||||
# May also be used for string i/o (just pass a scalar reference)
|
||||
#
|
||||
# Legal: Copyright (c) 2003-2018 Phil Harvey (phil at owl.phy.queensu.ca)
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the same terms as Perl itself.
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
package File::RandomAccess;
|
||||
|
||||
use strict;
|
||||
require 5.002;
|
||||
require Exporter;
|
||||
|
||||
use vars qw($VERSION @ISA @EXPORT_OK);
|
||||
$VERSION = '1.10';
|
||||
@ISA = qw(Exporter);
|
||||
|
||||
sub Read($$$);
|
||||
|
||||
# constants
|
||||
my $CHUNK_SIZE = 8192; # size of chunks to read from file (must be power of 2)
|
||||
my $SLURP_CHUNKS = 16; # read this many chunks at a time when slurping
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Create new RandomAccess object
|
||||
# Inputs: 0) reference to RandomAccess object or RandomAccess class name
|
||||
# 1) file reference or scalar reference
|
||||
# 2) flag set if file is already random access (disables automatic SeekTest)
|
||||
sub new($$;$)
|
||||
{
|
||||
my ($that, $filePt, $isRandom) = @_;
|
||||
my $class = ref($that) || $that;
|
||||
my $self;
|
||||
|
||||
if (ref $filePt eq 'SCALAR') {
|
||||
# string i/o
|
||||
$self = {
|
||||
BUFF_PT => $filePt,
|
||||
POS => 0,
|
||||
LEN => length($$filePt),
|
||||
TESTED => -1,
|
||||
};
|
||||
bless $self, $class;
|
||||
} else {
|
||||
# file i/o
|
||||
my $buff = '';
|
||||
$self = {
|
||||
FILE_PT => $filePt, # file pointer
|
||||
BUFF_PT => \$buff, # reference to file data
|
||||
POS => 0, # current position in file
|
||||
LEN => 0, # data length
|
||||
TESTED => 0, # 0=untested, 1=passed, -1=failed (requires buffering)
|
||||
};
|
||||
bless $self, $class;
|
||||
$self->SeekTest() unless $isRandom;
|
||||
}
|
||||
return $self;
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Enable DEBUG code
|
||||
# Inputs: 0) reference to RandomAccess object
|
||||
sub Debug($)
|
||||
{
|
||||
my $self = shift;
|
||||
$self->{DEBUG} = { };
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Perform seek test and turn on buffering if necessary
|
||||
# Inputs: 0) reference to RandomAccess object
|
||||
# Returns: 1 if seek test passed (ie. no buffering required)
|
||||
# Notes: Must be done before any other i/o
|
||||
sub SeekTest($)
|
||||
{
|
||||
my $self = shift;
|
||||
unless ($self->{TESTED}) {
|
||||
my $fp = $self->{FILE_PT};
|
||||
if (seek($fp, 1, 1) and seek($fp, -1, 1)) {
|
||||
$self->{TESTED} = 1; # test passed
|
||||
} else {
|
||||
$self->{TESTED} = -1; # test failed (requires buffering)
|
||||
}
|
||||
}
|
||||
return $self->{TESTED} == 1 ? 1 : 0;
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Get current position in file
|
||||
# Inputs: 0) reference to RandomAccess object
|
||||
# Returns: current position in file
|
||||
sub Tell($)
|
||||
{
|
||||
my $self = shift;
|
||||
my $rtnVal;
|
||||
if ($self->{TESTED} < 0) {
|
||||
$rtnVal = $self->{POS};
|
||||
} else {
|
||||
$rtnVal = tell($self->{FILE_PT});
|
||||
}
|
||||
return $rtnVal;
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Seek to position in file
|
||||
# Inputs: 0) reference to RandomAccess object
|
||||
# 1) position, 2) whence (0 or undef=from start, 1=from cur pos, 2=from end)
|
||||
# Returns: 1 on success
|
||||
# Notes: When buffered, this doesn't quite behave like seek() since it will return
|
||||
# success even if you seek outside the limits of the file. However if you
|
||||
# do this, you will get an error on your next Read().
|
||||
sub Seek($$;$)
|
||||
{
|
||||
my ($self, $num, $whence) = @_;
|
||||
$whence = 0 unless defined $whence;
|
||||
my $rtnVal;
|
||||
if ($self->{TESTED} < 0) {
|
||||
my $newPos;
|
||||
if ($whence == 0) {
|
||||
$newPos = $num; # from start of file
|
||||
} elsif ($whence == 1) {
|
||||
$newPos = $num + $self->{POS}; # relative to current position
|
||||
} else {
|
||||
$self->Slurp(); # read whole file into buffer
|
||||
$newPos = $num + $self->{LEN}; # relative to end of file
|
||||
}
|
||||
if ($newPos >= 0) {
|
||||
$self->{POS} = $newPos;
|
||||
$rtnVal = 1;
|
||||
}
|
||||
} else {
|
||||
$rtnVal = seek($self->{FILE_PT}, $num, $whence);
|
||||
}
|
||||
return $rtnVal;
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Read from the file
|
||||
# Inputs: 0) reference to RandomAccess object, 1) buffer, 2) bytes to read
|
||||
# Returns: Number of bytes read
|
||||
sub Read($$$)
|
||||
{
|
||||
my $self = shift;
|
||||
my $len = $_[1];
|
||||
my $rtnVal;
|
||||
|
||||
# protect against reading too much at once
|
||||
# (also from dying with a "Negative length" error)
|
||||
if ($len & 0xf8000000) {
|
||||
return 0 if $len < 0;
|
||||
# read in smaller blocks because Windows attempts to pre-allocate
|
||||
# memory for the full size, which can lead to an out-of-memory error
|
||||
my $maxLen = 0x4000000; # (MUST be less than bitmask in "if" above)
|
||||
my $num = Read($self, $_[0], $maxLen);
|
||||
return $num if $num < $maxLen;
|
||||
for (;;) {
|
||||
$len -= $maxLen;
|
||||
last if $len <= 0;
|
||||
my $l = $len < $maxLen ? $len : $maxLen;
|
||||
my $buff;
|
||||
my $n = Read($self, $buff, $l);
|
||||
last unless $n;
|
||||
$_[0] .= $buff;
|
||||
$num += $n;
|
||||
last if $n < $l;
|
||||
}
|
||||
return $num;
|
||||
}
|
||||
# read through our buffer if necessary
|
||||
if ($self->{TESTED} < 0) {
|
||||
my $buff;
|
||||
my $newPos = $self->{POS} + $len;
|
||||
# number of bytes to read from file
|
||||
my $num = $newPos - $self->{LEN};
|
||||
if ($num > 0 and $self->{FILE_PT}) {
|
||||
# read data from file in multiples of $CHUNK_SIZE
|
||||
$num = (($num - 1) | ($CHUNK_SIZE - 1)) + 1;
|
||||
$num = read($self->{FILE_PT}, $buff, $num);
|
||||
if ($num) {
|
||||
${$self->{BUFF_PT}} .= $buff;
|
||||
$self->{LEN} += $num;
|
||||
}
|
||||
}
|
||||
# number of bytes left in data buffer
|
||||
$num = $self->{LEN} - $self->{POS};
|
||||
if ($len <= $num) {
|
||||
$rtnVal = $len;
|
||||
} elsif ($num <= 0) {
|
||||
$_[0] = '';
|
||||
return 0;
|
||||
} else {
|
||||
$rtnVal = $num;
|
||||
}
|
||||
# return data from our buffer
|
||||
$_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
|
||||
$self->{POS} += $rtnVal;
|
||||
} else {
|
||||
# read directly from file
|
||||
$_[0] = '' unless defined $_[0];
|
||||
$rtnVal = read($self->{FILE_PT}, $_[0], $len) || 0;
|
||||
}
|
||||
if ($self->{DEBUG}) {
|
||||
my $pos = $self->Tell() - $rtnVal;
|
||||
unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
|
||||
$self->{DEBUG}->{$pos} = $rtnVal;
|
||||
}
|
||||
}
|
||||
return $rtnVal;
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Read a line from file (end of line is $/)
|
||||
# Inputs: 0) reference to RandomAccess object, 1) buffer
|
||||
# Returns: Number of bytes read
|
||||
sub ReadLine($$)
|
||||
{
|
||||
my $self = shift;
|
||||
my $rtnVal;
|
||||
my $fp = $self->{FILE_PT};
|
||||
|
||||
if ($self->{TESTED} < 0) {
|
||||
my ($num, $buff);
|
||||
my $pos = $self->{POS};
|
||||
if ($fp) {
|
||||
# make sure we have some data after the current position
|
||||
while ($self->{LEN} <= $pos) {
|
||||
$num = read($fp, $buff, $CHUNK_SIZE);
|
||||
return 0 unless $num;
|
||||
${$self->{BUFF_PT}} .= $buff;
|
||||
$self->{LEN} += $num;
|
||||
}
|
||||
# scan and read until we find the EOL (or hit EOF)
|
||||
for (;;) {
|
||||
$pos = index(${$self->{BUFF_PT}}, $/, $pos);
|
||||
if ($pos >= 0) {
|
||||
$pos += length($/);
|
||||
last;
|
||||
}
|
||||
$pos = $self->{LEN}; # have scanned to end of buffer
|
||||
$num = read($fp, $buff, $CHUNK_SIZE) or last;
|
||||
${$self->{BUFF_PT}} .= $buff;
|
||||
$self->{LEN} += $num;
|
||||
}
|
||||
} else {
|
||||
# string i/o
|
||||
$pos = index(${$self->{BUFF_PT}}, $/, $pos);
|
||||
if ($pos < 0) {
|
||||
$pos = $self->{LEN};
|
||||
$self->{POS} = $pos if $self->{POS} > $pos;
|
||||
} else {
|
||||
$pos += length($/);
|
||||
}
|
||||
}
|
||||
# read the line from our buffer
|
||||
$rtnVal = $pos - $self->{POS};
|
||||
$_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
|
||||
$self->{POS} = $pos;
|
||||
} else {
|
||||
$_[0] = <$fp>;
|
||||
if (defined $_[0]) {
|
||||
$rtnVal = length($_[0]);
|
||||
} else {
|
||||
$rtnVal = 0;
|
||||
}
|
||||
}
|
||||
if ($self->{DEBUG}) {
|
||||
my $pos = $self->Tell() - $rtnVal;
|
||||
unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
|
||||
$self->{DEBUG}->{$pos} = $rtnVal;
|
||||
}
|
||||
}
|
||||
return $rtnVal;
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Read whole file into buffer (without changing read pointer)
|
||||
# Inputs: 0) reference to RandomAccess object
|
||||
sub Slurp($)
|
||||
{
|
||||
my $self = shift;
|
||||
my $fp = $self->{FILE_PT} || return;
|
||||
# read whole file into buffer (in large chunks)
|
||||
my ($buff, $num);
|
||||
while (($num = read($fp, $buff, $CHUNK_SIZE * $SLURP_CHUNKS)) != 0) {
|
||||
${$self->{BUFF_PT}} .= $buff;
|
||||
$self->{LEN} += $num;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# set binary mode
|
||||
# Inputs: 0) reference to RandomAccess object
|
||||
sub BinMode($)
|
||||
{
|
||||
my $self = shift;
|
||||
binmode($self->{FILE_PT}) if $self->{FILE_PT};
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# close the file and free the buffer
|
||||
# Inputs: 0) reference to RandomAccess object
|
||||
sub Close($)
|
||||
{
|
||||
my $self = shift;
|
||||
|
||||
if ($self->{DEBUG}) {
|
||||
local $_;
|
||||
if ($self->Seek(0,2)) {
|
||||
$self->{DEBUG}->{$self->Tell()} = 0; # set EOF marker
|
||||
my $last;
|
||||
my $tot = 0;
|
||||
my $bad = 0;
|
||||
foreach (sort { $a <=> $b } keys %{$self->{DEBUG}}) {
|
||||
my $pos = $_;
|
||||
my $len = $self->{DEBUG}->{$_};
|
||||
if (defined $last and $last < $pos) {
|
||||
my $bytes = $pos - $last;
|
||||
$tot += $bytes;
|
||||
$self->Seek($last);
|
||||
my $buff;
|
||||
$self->Read($buff, $bytes);
|
||||
my $warn = '';
|
||||
if ($buff =~ /[^\0]/) {
|
||||
$bad += ($pos - $last);
|
||||
$warn = ' - NON-ZERO!';
|
||||
}
|
||||
printf "0x%.8x - 0x%.8x (%d bytes)$warn\n", $last, $pos, $bytes;
|
||||
}
|
||||
my $cur = $pos + $len;
|
||||
$last = $cur unless defined $last and $last > $cur;
|
||||
}
|
||||
print "$tot bytes missed";
|
||||
$bad and print ", $bad non-zero!";
|
||||
print "\n";
|
||||
} else {
|
||||
warn "File::RandomAccess DEBUG not working (file already closed?)\n";
|
||||
}
|
||||
delete $self->{DEBUG};
|
||||
}
|
||||
# close the file
|
||||
if ($self->{FILE_PT}) {
|
||||
close($self->{FILE_PT});
|
||||
delete $self->{FILE_PT};
|
||||
}
|
||||
# reset the buffer
|
||||
my $emptyBuff = '';
|
||||
$self->{BUFF_PT} = \$emptyBuff;
|
||||
$self->{LEN} = 0;
|
||||
$self->{POS} = 0;
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
1; # end
|
||||
231
bin/lib/File/RandomAccess.pod
Normal file
231
bin/lib/File/RandomAccess.pod
Normal file
@@ -0,0 +1,231 @@
|
||||
#------------------------------------------------------------------------------
|
||||
# File: RandomAccess.pod -- Documentation for File::RandomAccess
|
||||
#
|
||||
# Description: Buffer to support random access reading of sequential file
|
||||
#
|
||||
# Legal: Copyright (c) 2003-2018 Phil Harvey (phil at owl.phy.queensu.ca)
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the same terms as Perl itself.
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
=head1 NAME
|
||||
|
||||
File::RandomAccess - Random access reads of sequential file or scalar
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
use File::RandomAccess;
|
||||
|
||||
$raf = new File::RandomAccess(\*FILE, $disableSeekTest);
|
||||
|
||||
$raf = new File::RandomAccess(\$data);
|
||||
|
||||
$err = $raf->Seek($pos);
|
||||
$num = $raf->Read($buff, $bytes);
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
Allows random access to sequential file by buffering the file if necessary.
|
||||
Also allows access to data in memory to be accessed as if it were a file.
|
||||
|
||||
=head1 METHODS
|
||||
|
||||
=over 4
|
||||
|
||||
=item B<new>
|
||||
|
||||
Creates a new RandomAccess object given a file reference or
|
||||
reference to data in memory.
|
||||
|
||||
# Read from open file or pipe
|
||||
$raf = new File::RandomAccess(\*FILE);
|
||||
|
||||
# Read from data in memory
|
||||
$raf = new File::RandomAccess(\$data);
|
||||
|
||||
=over 4
|
||||
|
||||
=item Inputs:
|
||||
|
||||
0) Reference to RandomAccess object or RandomAccess class name.
|
||||
|
||||
1) File reference or scalar reference.
|
||||
|
||||
2) Flag set if file is already random access (disables automatic SeekTest).
|
||||
|
||||
=item Returns:
|
||||
|
||||
Reference to RandomAccess object.
|
||||
|
||||
=back
|
||||
|
||||
=item B<SeekTest>
|
||||
|
||||
Performs test seek() on file to determine if buffering is necessary. If
|
||||
the seek() fails, then the file is buffered to allow random access.
|
||||
B<SeekTest>() is automatically called from B<new> unless specified.
|
||||
|
||||
$result = $raf->SeekTest();
|
||||
|
||||
=over 4
|
||||
|
||||
=item Inputs:
|
||||
|
||||
0) Reference to RandomAccess object.
|
||||
|
||||
=item Returns:
|
||||
|
||||
1 if seek test passed (ie. no buffering required).
|
||||
|
||||
=item Notes:
|
||||
|
||||
Must be called before any other i/o.
|
||||
|
||||
=back
|
||||
|
||||
=item B<Tell>
|
||||
|
||||
Get current position in file
|
||||
|
||||
$pos = $raf->Tell();
|
||||
|
||||
=over 4
|
||||
|
||||
=item Inputs:
|
||||
|
||||
0) Reference to RandomAccess object.
|
||||
|
||||
=item Returns:
|
||||
|
||||
Current position in file
|
||||
|
||||
=back
|
||||
|
||||
=item B<Seek>
|
||||
|
||||
Seek to specified position in file. When buffered, this doesn't quite
|
||||
behave like seek() since it returns success even if you seek outside the
|
||||
limits of the file.
|
||||
|
||||
$success = $raf->Seek($pos, 0);
|
||||
|
||||
=over 4
|
||||
|
||||
=item Inputs:
|
||||
|
||||
0) Reference to RandomAccess object.
|
||||
|
||||
1) Position.
|
||||
|
||||
2) Whence (0=from start, 1=from cur pos, 2=from end).
|
||||
|
||||
=item Returns:
|
||||
|
||||
1 on success, 0 otherwise
|
||||
|
||||
=back
|
||||
|
||||
=item B<Read>
|
||||
|
||||
Read data from the file.
|
||||
|
||||
$num = $raf->Read($buff, 1024);
|
||||
|
||||
=over 4
|
||||
|
||||
=item Inputs:
|
||||
|
||||
0) Reference to RandomAccess object.
|
||||
|
||||
1) Buffer.
|
||||
|
||||
2) Number of bytes to read.
|
||||
|
||||
=item Returns:
|
||||
|
||||
Number of bytes actually read.
|
||||
|
||||
=back
|
||||
|
||||
=item B<ReadLine>
|
||||
|
||||
Read a line from file (end of line is $/).
|
||||
|
||||
=over 4
|
||||
|
||||
=item Inputs:
|
||||
|
||||
0) Reference to RandomAccess object.
|
||||
|
||||
1) Buffer.
|
||||
|
||||
=item Returns:
|
||||
|
||||
Number of bytes read.
|
||||
|
||||
=back
|
||||
|
||||
=item B<Slurp>
|
||||
|
||||
Read whole file into buffer, without changing read pointer.
|
||||
|
||||
=over 4
|
||||
|
||||
=item Inputs:
|
||||
|
||||
0) Reference to RandomAccess object.
|
||||
|
||||
=item Returns:
|
||||
|
||||
Nothing.
|
||||
|
||||
=back
|
||||
|
||||
=item B<BinMode>
|
||||
|
||||
Set binary mode for file.
|
||||
|
||||
=over 4
|
||||
|
||||
=item Inputs:
|
||||
|
||||
0) Reference to RandomAccess object.
|
||||
|
||||
=item Returns:
|
||||
|
||||
Nothing.
|
||||
|
||||
=back
|
||||
|
||||
=item B<Close>
|
||||
|
||||
Close the file and free the buffer.
|
||||
|
||||
=over 4
|
||||
|
||||
=item Inputs:
|
||||
|
||||
0) Reference to RandomAccess object.
|
||||
|
||||
=item Returns:
|
||||
|
||||
Nothing.
|
||||
|
||||
=back
|
||||
|
||||
=back
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Copyright 2003-2018 Phil Harvey (phil at owl.phy.queensu.ca)
|
||||
|
||||
This library is free software; you can redistribute it and/or modify it
|
||||
under the same terms as Perl itself.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
L<Image::ExifTool(3pm)|Image::ExifTool>
|
||||
|
||||
=cut
|
||||
|
||||
# end
|
||||
Reference in New Issue
Block a user