This has been used to produce a Texinfo file from the entire page database See ExtractTexinfo. -- MeatBall:AlexSchroeder
#! /usr/bin/perl -w
&rewrite("test");
sub rewrite {
my ($directory) = @_;
$FS = "\xb3";
# If you have set $NewFS to 1, then remove the # from the start of the next line.
# $FS = "\x1e\xff\xfe\x1e";
$FS1 = $FS . "1";
$FS2 = $FS . "2";
$FS3 = $FS . "3";
my @files = read_directory($directory);
foreach my $file (@files) {
print "Reading $file...\n";
my %page = &split_page(&read_file("$directory/$file"));
my %section = &split_section(%page);
my %text = &split_text(%section);
$file =~ s/.db$//;
print "Writing $file...\n";
&write_file("$directory/$file", $text{text});
}
print "Done.\n";
}
sub read_directory {
my ($dirname) = @_;
opendir(DIR, $dirname) or die "can't opendir $dirname: $!";
@dots = grep { /\.db$/ && -f "$dirname/$_" } readdir(DIR);
closedir DIR;
return @dots;
}
sub read_file {
my ($filename) = @_;
my ($data);
my (%page);
local $/ = undef; # Read complete files
open(IN, "<$filename") or die "can't read $filename: $!";
$data=<IN>;
close IN;
return $data;
}
sub split_page {
my ($data) = @_;
my (%page);
%page = split(/$FS1/, $data, -1); # -1 keeps trailing null fields
return %page;
}
sub split_section {
my (%page) = @_;
my (%section);
%section = split(/$FS2/, $page{text_default}, -1);
return %section;
}
sub split_text {
my (%section) = @_;
my (%text);
%text = split(/$FS3/, $section{data}, -1);
return %text;
}
sub write_file {
my ($file, $data) = @_;
open(F, ">" . $file) or die "can't open $file: $!";
print F $data;
close(F);
}
The script did not work for me, but when I changed line 8 from
$FS = "\xb3";
to
$FS = "\x1e\xff\xfe\x1e";
it was OK. Maybe the seperator has changed?
Lars