This commit is contained in:
2019-05-07 01:54:00 -07:00
parent b0861846be
commit 8f5ea6b62d

399
leaf.php
View File

@@ -2,10 +2,19 @@
<?php <?php
// Leaf - Tools for Book Scans // Leaf - Tools for Book Scans
$version = "0.7.1"; $version = "0.8.0";
$time_start = microtime(true); $time_start = microtime(true);
date_default_timezone_set("America/Los_Angeles"); date_default_timezone_set("America/Los_Angeles");
$xt = 3; // threads
// Detect cores
$reasonable_cores = array(1,2,3,4,6,8,16);
$cores = chop(shell_exec("system_profiler SPHardwareDataType 2> /dev/null | grep 'Total Number of Cores' | cut -d: -f2 | tr -d ' '"));
if (in_array($cores,$reasonable_cores)) {
$xt = $cores;
} else {
$xt = 2;
}
// Functions // Functions
@@ -152,22 +161,31 @@ USAGE: leaf [mode] [-options] directory
Modes: Modes:
clean remove scratch files clean remove scratch files
crop define EXIF crop values using template files crop define EXIF crop values using template files
-crops=<num> specify how many files to use -crops=<num> (6) specify how many files to use
desort remove image sequence prefix desort remove image sequence prefix
deskew detect rotation angles deskew detect rotation angles
-max=<num> angles greater than this value are ignored, default .4 -max=<num> (.4) angles greater than this value are ignored
-pad=<num> pixels to pad around crop area, default 80 -pad=<num> (80) pixels to pad around crop area
-contrast=<num> contrast boost, default 20 -contrast=<num> (20) levels attenuation
-size=<num> size in pixels of dmap, default 2200 -size=<num> (2200) dmap size in pixels
-nomap don't use dmaps -nomap don't use dmaps
divide wrapper for imagemagick Divide_Src divide wrapper for imagemagick Divide_Src
-map=<file> specify brightness file -map=<file> specify brightness file
-adjust=<params> levels adjustment (ex. \"0%,98%,.9\") -adjust=<params> levels adjustment (ex. \"0%,98%,.9\")
-q=<quality> quality out of 100 -q=<quality> (95) quality out of 100
-pages map file is for LR rotated pages -pages map file is for LR rotated pages
dupes Find duplicate images using computed PHASH on thumbnails dupes Find duplicate images using computed PHASH on thumbnails
-threshold=<num> match threshold -threshold=<num> (10) match threshold
-walk=<num> comparison scope (compare n image to n, n+1, n+2, etc) -walk=<num> (5) comparison scope (compare n image to n, n+1, n+2, etc)
fixpdf de-rotate all pages in a pdf
-file=<pdf> pdf file in target dir
generate create final jpg images for pdf creation
-pixels=<num> scale to x pixels on longest side
-inches=<num> set dpi to x inches on longest side
-q=<0-100> (90) jpeg quality
-adjust=<params> levels adjustment (ex. \"0%,98%,.9\")
-px=<num> png size multiplier
-padjust=<params> png levels adjustment (otherwise calculated from adjust)
makepdf combine images into a pdf with img2pdf makepdf combine images into a pdf with img2pdf
profile apply xmp profile to images (requires exiv2 > 0.25) profile apply xmp profile to images (requires exiv2 > 0.25)
-file=<file> xmp profile -file=<file> xmp profile
@@ -176,13 +194,13 @@ resort reorder image sequence by adding a new image
-x=<num> position of inserted file -x=<num> position of inserted file
review print a table of image dimension statistics review print a table of image dimension statistics
rotate batch transform rotate (lossy) rotate batch transform rotate (lossy)
-x=<angle> rotation angle, default=90 -x=<angle> (90) rotation angle
-q=<0-100> jpeg quality, default read from source or 95 -q=<0-100> (auto) jpeg quality, default read from source or 95
setdpi set image dpi with exiftool setdpi set image dpi with exiftool
-x=<dpi> specify dpi -x=<dpi> specify dpi
-height=<inches> calculate dpi from specified height -height=<inches> calculate dpi from specified height
sort sort files as AAABBB -> ABABAB sort sort files as AAABBB -> ABABAB
-m=<num> specify midpoint (cover image) -m=<num> (auto) specify midpoint (cover image)
strip strip exif crop values from images with exiftool strip strip exif crop values from images with exiftool
"; ";
@@ -196,20 +214,62 @@ fin();
echo Welcome("Generate final jpg images"); echo Welcome("Generate final jpg images");
///////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////
$dest = rtrim(args("dir"), '/')."_generated";
if (count(glob($dest."/*.*"))) {
msg("Files already exist in ".$dest.". Move to trash?",2);
exec("/opt/local/bin/trash -a ".$dest);
mkdir($dest);
} elseif (!is_dir($dest)) {
mkdir($dest);
}
if (args("pixels")) {
$pixels = args("pixels");
} else {
msg("No pixel value specified",1);
}
if (args("inches")) {
$inches = args("inches");
} else {
msg("No inches value specified",1);
}
if (args("q")) {
$quality = args("q");
} else {
$quality = 90;
}
if (args("px")) {
$multiplier = args("px");
} else {
$multiplier = 2;
}
$files = glob(args("dir")."*.{jpg,JPG}", GLOB_BRACE); $files = glob(args("dir")."*.{jpg,JPG}", GLOB_BRACE);
echo "Checking EXIF values: "; echo "Checking EXIF values: ";
foreach ($files as $file) { foreach ($files as $file) {
echo "."; echo ".";
$lines = shell_exec("exiftool -s -s -s -f -Label -ConvertToGrayscale ".$file); $lines = shell_exec("exiftool -s -s -s -f -Label -ConvertToGrayscale -ImageWidth -ImageHeight ".$file);
$parts = explode("\n", $lines); $parts = explode("\n", $lines);
if ($parts[0] == "bitmap") { $width = $parts[2];
$bitmap[] = $file; $height = $parts[3];
} elseif ($parts[1] == "True") { $ratio = $height/$width;
$grey[] = $file; if ($ratio > 3) {
$spine[] = array($file,$width,$height);
} else { } else {
$color[] = $file; $dims[$file] = $ratio;
}
if ($parts[0] == "bitmap") {
$bitmap[] = array($file,$width,$height);
} elseif ($parts[1] == "True") {
$grey[] = array($file,$width,$height);
} elseif ($ratio < 3) {
$color[] = array($file,$width,$height);
} }
} }
@@ -217,18 +277,151 @@ echo "\n\n";
echo "Bitmap files: "; echo "Bitmap files: ";
if (is_array($bitmap)) { echo count($bitmap); } else { echo "None"; } if (is_array($bitmap)) { echo count($bitmap); } else { echo "None"; }
echo "\n";
echo "\n\n";
echo "Greyscale files: "; echo "Greyscale files: ";
if (is_array($grey)) { echo count($grey); } else { echo "None"; } if (is_array($grey)) { echo count($grey); } else { echo "None"; }
echo "\n";
echo "\n\n";
echo "Color files: "; echo "Color files: ";
if (is_array($color)) { echo count($color); } else { echo "None"; } if (is_array($color)) { echo count($color); } else { echo "None"; }
echo "\n";
echo "Spine files: ";
if (is_array($spine)) { echo count($spine); } else { echo "None"; }
echo "\n\n";
if (count($spine) > 1) {
msg("More than one spine file found, meltdown",1);
}
$ratioavg = number_format(array_sum($dims)/count($dims),3);
echo "Average page ratio: ".$ratioavg;
if ($ratioavg > 1) { // page is tall
$page_height = $pixels;
$page_width = floor($page_height/$ratioavg);
$paper_height = $inches;
$paper_width = number_format($paper_height/$ratioavg,2);
} else {
$page_width = $pixels;
$page_height = floor($page_width*$ratioavg);
$paper_width = $inches;
$paper_height = number_format($paper_width*$ratioavg,2);
}
$dpi = $page_height / $paper_height;
$canonicalpngdpi = $dpi*$multiplier;
$pngdpi = $canonicalpngdpi*39.37007874016;
$dims = $page_width."x".$page_height;
$pngdims = ($page_width*$multiplier)."x".($page_height*$multiplier);
echo "\n";
echo "Page size: ".$paper_width."\" x ".$paper_height."\"";
echo "\n";
echo "Target pixel dimensions: ".$dims." (png=".$pngdims.")";
echo "\n";
echo "DPI = ".$dpi." (png=".$canonicalpngdpi.")";
foreach ($color as $parts) {
$jpg[] = $parts;
}
foreach ($grey as $parts) {
$parts[] = 1;
$jpg[] = $parts;
}
if (is_array($spine)) {
$spine[0][] = 2;
$jpg[] = $spine[0];
}
foreach ($jpg as $parts) {
$file = $parts[0];
$output = $dest."/".basename($file);
if (isset($parts[3]) && $parts[3] == 1) {
$msg = "Processing greyscale page ".$file." @ ".$dims."!";
$cmd = "convert -resize ".$dims."\! -colorspace gray ";
} elseif (isset($parts[3]) && $parts[3] == 2) {
$sdims = $page_height."x".$page_height;
$msg = "Processing spine ".$file." @ ".$sdims;
$cmd = "convert -resize ".$sdims." ";
} else {
$msg = "Processing color page ".$file." @ ".$dims."!";
$cmd = "convert -resize ".$dims."\! ";
}
if (args("adjust")) {
$msg .= " (".args("adjust").")";
$cmd .= "-level ".args("adjust")." ";
}
$msg .= " [Q=".$quality."]";
$cmd .= "-quality ".$quality." ".$file." ".$output;
$thread_a[] = array($msg, $cmd);
$thread_b[] = array(".","exiftool -overwrite_original -Xresolution=".$dpi." -Yresolution=".$dpi." -jfif:Xresolution=".$dpi." -jfif:Yresolution=".$dpi." ".$output);
}
echo "\n\n"; echo "\n\n";
msg("Beginning multithreaded convert with ".$xt." threads");
multiexec($thread_a,$xt);
echo "\n";
msg("Setting DPI: ");
multiexec($thread_b,$xt);
foreach ($bitmap as $parts) {
$file = $parts[0];
$ext = pathinfo($file,PATHINFO_EXTENSION);
$output = $dest."/".basename($file,$ext)."png";
$msg = "Processing bitmap page ".$file." @ ".$pngdims."!";
$cmd = "convert -resize ".$pngdims."\! ";
if (args("adjust")) {
$adjust = args("adjust");
} else {
$adjust = "0%,100%,1";
}
$parts = explode(",", $adjust);
$black = rtrim($parts[0],"%");
$white = rtrim($parts[1],"%");
$gamma = $parts[2];
if (args("padjust")) {
$radjust = args("padjust");
} else {
$radjust = ($black+60)."%".",".($white-15)."%".",".($gamma-.7);
}
$msg .= " (".$radjust.")";
$cmd .= "-level ".$radjust." ";
$cmd .= "-monochrome -colors 2 -depth 1 -negate ".$file." ".$output;
$thread_c[] = array($msg, $cmd);
$thread_d[] = array(".","exiftool -overwrite_original -PixelsPerUnitX=".$pngdpi." -PixelsPerUnitY=".$pngdpi." ".$output);
}
echo "\n\n";
msg("Beginning multithreaded convert with ".$xt." threads");
multiexec($thread_c,$xt);
echo "\n";
msg("Setting DPI: ");
multiexec($thread_d,$xt);
fin(); fin();
@@ -250,6 +443,43 @@ if (!is_dir("scratch")) {
fin(); fin();
////////////////////////////////////////////////////////////////////////////////////////////////
// Note: Edges
//////////////////////
} elseif (args("app") == "edges") {
echo Welcome("Edge detection");
/////////////////////////////////////////////////////////////////////////
$dest = rtrim(args("dir"), '/')."_edges";
if (count(glob($dest."/*.*"))) {
msg("Files already exist in ".$dest.". Move to trash?",2);
exec("/opt/local/bin/trash -a ".$dest);
mkdir($dest);
} elseif (!is_dir($dest)) {
mkdir($dest);
}
$files = glob(args("dir")."*.*");
$size = "1400";
$canny = "0x1+10%+30%";
$hough = "9x9+200";
foreach ($files as $file) {
$ext = pathinfo($file,PATHINFO_EXTENSION);
$output = $dest."/".basename($file,$ext)."jpg";
$msg = "Detecting edges in ".$file;
$cmd = "convert ".$file."[".$size."x".$size."] -auto-orient \( +clone -canny ".$canny." -write /tmp/".basename($file)."_canny.png ";
$cmd .= "-background none -fill red -stroke red -strokewidth 1 -hough-lines ".$hough." -write /tmp/".basename($file)."_lines.png \) ";
$cmd .= "-composite ".$output;
$thread[] = array($msg, $cmd);
}
msg("Beginning multithreaded convert with ".$xt." threads");
multiexec($thread,$xt);
fin();
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
// Note: Rotate // Note: Rotate
////////////////////// //////////////////////
@@ -269,7 +499,9 @@ if (args("x")) {
$dest = rtrim(args("dir"), '/')."_rotated"; $dest = rtrim(args("dir"), '/')."_rotated";
if (count(glob($dest."/*.*"))) { if (count(glob($dest."/*.*"))) {
msg("Files already exist in destination ".$dest,1); msg("Files already exist in ".$dest.". Move to trash?",2);
exec("/opt/local/bin/trash -a ".$dest);
mkdir($dest);
} elseif (!is_dir($dest)) { } elseif (!is_dir($dest)) {
mkdir($dest); mkdir($dest);
} }
@@ -412,7 +644,7 @@ if (ask("Create spreads? (Y/n)") != "n") {
exec("xattr -c ".implode(" ", $sheets)); exec("xattr -c ".implode(" ", $sheets));
$term = chop(shell_exec("env | grep 'TERM_PROGRAM=' | cut -f2- -d=")); $term = chop(shell_exec("env | grep 'TERM_PROGRAM=' | cut -f2- -d="));
if ($term == "iTerm.app") { if ($term == "iTerm.app") {
echo shell_exec("imgcat ".implode(" ", $sheets)); echo shell_exec("~/.iterm2/imgcat ".implode(" ", $sheets));
} else { } else {
exec("open ".implode(" ", $sheets)." -b com.apple.Preview"); exec("open ".implode(" ", $sheets)." -b com.apple.Preview");
} }
@@ -775,13 +1007,59 @@ if (file_exists($dest)) {
if (file_exists($dest)) { msg("Freak accident",1); } if (file_exists($dest)) { msg("Freak accident",1); }
} }
echo "Creating pdf...\n\n"; echo "Creating pdf...\n\n";
exec("img2pdf --verbose --viewer-page-layout twocolumnright --output ".$dest." ".$input); exec("img2pdf --verbose --viewer-page-layout twocolumnright --creator 'Leaf ".$version."' --output ".$dest." ".$input);
echo "\n"; echo "\n";
msg("Press return to open in Acrobat",2); msg("Press return to open in Acrobat",2);
exec("open ".$dest." -b com.adobe.Acrobat.Pro"); exec("open ".$dest." -b com.adobe.Acrobat.Pro");
fin(); fin();
////////////////////////////////////////////////////////////////////////////////////////////////
// Note: Fixpdf
//////////////////////
} elseif (args("app") == "fixpdf") {
echo Welcome("Systematically de-rotate pages in pdf file with pdftk");
/////////////////////////////////////////////////////////////////////////
$file = args("dir").args("file");
if (!file_exists($file)) {
msg("Problem reading file ".$file,1);
}
$info = shell_exec("pdftk ".$file." dump_data");
$chunks = explode("PageMediaBegin", $info);
$items = "";
$hits = 0;
foreach ($chunks as $page => $chunk) {
preg_match("/(PageMediaRotation: )(\d+)(\n)/",$chunk,$matches);
if (isset($matches[2]) && $matches[2] > 0) {
echo "Page ".$page." is rotated ".$matches[2]."\n";
$hits++;
}
if ($page) {
$items .= $page."north ";
}
}
if (!$hits) {
msg("No rotated pages found.",1);
}
$ext = pathinfo($file, PATHINFO_EXTENSION);
$output = basename($file,".".$ext)."_derotated.pdf";
echo "\n";
echo "Writing ".$output;
echo "\n";
exec("pdftk ".$file." cat ".$items."output ".$output);
echo "\n";
msg("Remove original file?",2);
exec("/opt/local/bin/trash -a ".$file."; mv ".$output." ".$file);
fin();
//////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////
// Note: Divide // Note: Divide
// We assume a .JPG file is an unmodified DCIM image and convert to a greyscale tif for overlay. // We assume a .JPG file is an unmodified DCIM image and convert to a greyscale tif for overlay.
@@ -817,7 +1095,9 @@ if (!args("map")) {
$dest = rtrim(args("dir"), '/')."_divided"; $dest = rtrim(args("dir"), '/')."_divided";
if (count(glob($dest."/*.*"))) { if (count(glob($dest."/*.*"))) {
msg("Files already exist in destination ".$dest,1); msg("Files already exist in ".$dest.". Move to trash?",2);
exec("/opt/local/bin/trash -a ".$dest);
mkdir($dest);
} elseif (!is_dir($dest)) { } elseif (!is_dir($dest)) {
mkdir($dest); mkdir($dest);
} }
@@ -828,38 +1108,65 @@ if (args("q")) {
$quality = 95; $quality = 95;
} }
if ($pages) {
$ext = pathinfo($map, PATHINFO_EXTENSION);
$mapleft = "scratch/".str_replace(".".$ext, "_left.".$ext, $map);
$mapright = "scratch/".str_replace(".".$ext, "_right.".$ext, $map);
exec("convert -rotate 90 -quality 95 ".$map." ".$mapleft);
exec("convert -rotate 270 -quality 95 ".$map." ".$mapright);
}
echo "Building threads: "; echo "Building threads: ";
$thread = array(); $thread = array();
foreach ($files as $file) { foreach ($files as $file) {
$msg = "Dividing ".$file." with ".$map.", Q=".$quality; $msg = "Dividing ".$file." with ".$map.", Q=".$quality;
list ($width, $height) = getimagesize($map); $cmd = "convert ".$file." ";
list ($twidth, $theight) = getimagesize($file); $output = $dest."/".basename($file);
if ($width != $twidth | $height != $theight) {
$tmap = $map."'[".$twidth."x".$theight."!]'";
$msg .= " (resize map) ";
} else {
$tmap = $map;
}
if (args("adjust")) {
$msg .= " (".args("adjust").") ";
$cmd = "convert ".$file." ".$tmap." -compose Divide_Src -composite -level ".args("adjust")." -quality ".$quality." ".$dest."/".basename($file);
} else {
$cmd = "convert ".$file." ".$tmap." -compose Divide_Src -composite -quality ".$quality." ".$dest."/".basename($file);
}
if ($pages) { if ($pages) {
$label = trim(shell_exec("exiftool -s -s -s -Label ".$file)); $label = trim(shell_exec("exiftool -s -s -s -Label ".$file));
if ($label == "non-page") { if ($label == "non-page") {
$msg .= " <Skipping non-page>"; $msg .= " <Skipping non-page>";
$cmd = "true"; $cmd = "cp ".$file." ".$output;
echo "o"; echo "o";
} else { $thread[] = array($msg, $cmd);
echo "."; continue;
} }
if (substr(basename($file), 0, 3) % 2 == 0) {
$use = $mapright;
} else { } else {
echo "."; $use = $mapleft;
} }
$cmd = "true"; list ($width, $height) = getimagesize($use);
list ($twidth, $theight) = getimagesize($file);
if ($width != $twidth | $height != $theight) {
$tmap = $use."'[".$twidth."x".$theight."!]'";
$msg .= " (resize map) ";
} else {
$tmap = $use;
}
} else {
$tmap = $map;
}
$cmd .= $tmap." -compose Divide_Src -composite -quality ".$quality." ";
if (args("adjust")) {
$msg .= " (".args("adjust").") ";
$cmd .= "-level ".args("adjust")." ";
}
$cmd .= $output;
echo ".";
$thread[] = array($msg, $cmd); $thread[] = array($msg, $cmd);
} }