#!/usr/bin/php "; die; } // Check for bundle if ($zpath == "/") { $blabel = "root"; } else { $blabel = preg_replace("/[^A-Za-z0-9\.]/", "_", basename($zpath)); } if (is_writable($zpath)) { $wopt_paranoid = 1; } else { $wopt_paranoid = 0; } $bpath = chop($bdest,"/")."/".substr(crc32($zpath),0,3)."_".$blabel.".bundle"; if (!is_dir($bpath)) { mkdir($bpath); } if (!is_dir($bpath."/thumbs")) { mkdir($bpath."/thumbs"); } $wopt_hash_limit = 1; // don't hash if exceeds in gigs, 0 for unlimited $wopt_thumb_size = "512"; // Treat these directories as files $wopt_bundles = array( "app", "bundle", "sparsebundle", "photoslibrary", "aplibrary", "apvault", "abbu", "calendar", "framework", "plugin", "kext", "rtfd" ); foreach ($wopt_bundles as $bundle) { $wopt_nodescend[] = "*.".$bundle; } // Ignore matching files and directories $wopt_ignore = array( ".DS_Store", ".DocumentRevisions-V100", ".Spotlight-V100", ".TemporaryItems", ".apdisk", ".com.apple.timemachine.donotpresent", ".fseventsd", ".metadata-never-index", ".neofinder.abemeda.volinfo.xml" ); $max_label = 50; // Metadata tools $bin_gfi = "/Applications/Xcode.app/Contents/Developer/usr/bin/GetFileInfo"; $bin_mediainfo = "/opt/local/bin/mediainfo"; $bin_exiftool = "/opt/local/bin/exiftool"; $bin_tq = "/opt/local/bin/ql-thumbnail-lossy"; $bin_tv = "/opt/local/bin/vipsthumbnail"; $bin_tf = "/usr/local/bin/ffmpegthumbnailer"; // Media extensions ////////////////////////////////////////// $t_files['ffmpeg'] = array( "mkv", "avi", "mpeg", "mpg", "vob", "mp4", "m4v", "m2v", "m2ts", "asf", "wmv", "rm", "divx", "fla", "flv", "webm" ); $t_files['vips'] = array( "jpg", "jpeg", "tif", "tiff", "gif", "psd", "png" ); $m_files = array( "mkv", "ogg", "avi", "wav", "mpeg", "mpg", "vob", "mp4", "m2v", "mp3", "asf", "wma", "wmv", "qt", "mov", "rm", "ifo", "ac3", "dts", "aac", "ape", "flac", "aiff", "m2ts" ); $e_files = array( "ai", "aiff", "ape", "asf", "avi", "bmp", "divx", "dng", "doc", "docx", "eps", "epub", "exe", "exif", "fla", "flac", "flv", "gif", "icc", "iso", "jpg", "jpeg", "m2ts", "m4a", "m4b", "m4v", "mkv", "mobi", "azw", "azw3", "mov", "qt", "mp3", "mp4", "mpeg", "mpg", "m2v", "nef", "numbers", "ogg", "pages", "pdf", "pict", "png", "ppm", "ppt", "psd", "psb", "qif", "raw", "rtf", "sr2", "srf", "svg", "swf", "tiff", "tif", "torrent", "vcf", "vob", "wav", "webm", "wma", "wmv", "xls", "xlsx", "xmp", "zip" ); foreach ($e_files as $ext) { $e_files[] = strtoupper($ext); } foreach ($m_files as $ext) { $m_files[] = strtoupper($ext); } foreach ($t_files['ffmpeg'] as $ext) { $t_files['ffmpeg'][] = strtoupper($ext); } foreach ($t_files['vips'] as $ext) { $t_files['vips'][] = strtoupper($ext); } // Functions ////////////////////////////////////////// /* function getParents($zpath, $pathname) { $path = dirname($pathname); $parts = explode("/",trim(substr($path,strlen(basename($zpath))),"/")); foreach ($parts as $index => $part) { $parents[] = array($part, md5($zpath."/".implode("/",array_slice($parts, 0, $index+1)))); } return $parents; } */ function shortlabel($pathname, $max, $min = null) { $basename = basename($pathname); $suffix = "(...).".pathinfo($basename,PATHINFO_EXTENSION); if (strlen($basename) > $max) { $return = substr($basename, 0, ($max-strlen($suffix))).$suffix; } else { $return = $basename; } if (strlen($return) < $min) { $out = $return.@str_repeat(" ", ($min-strlen($return))); } else { $out = $return; } return $out; } function human_filesize($bytes, $decimals = 2) { $size = array('B','kB','MB','GB','TB','PB','EB','ZB','YB'); $factor = floor((strlen($bytes) - 1) / 3); return sprintf("%.{$decimals}f", $bytes / pow(1024, $factor)) . @$size[$factor]; } function stringPrint($string) { echo $string.@str_repeat(" ", (10-strlen($string))); } function getWoptString() { global $wopt_bundles, $wopt_ignore, $wopt_hash, $wopt_hash_limit, $wopt_meta, $wopt_thumbs, $wopt_thumb_size, $wopt_paranoid; return array( array("bundles", $wopt_bundles), array("ignore", $wopt_ignore), array("hash", $wopt_hash), array("wopt_hash_limit", $wopt_hash_limit), array("metadata", $wopt_meta), array("thumbs", $wopt_thumbs), array("thumb_size", $wopt_thumb_size), array("wopt_paranoid", $wopt_paranoid), ); } class plistParser extends XMLReader { public function parseString($string) { $this->XML($string); return $this->process(); } private function process() { $this->read(); if($this->nodeType !== XMLReader::DOC_TYPE || $this->name !== "plist") { throw new Exception(sprintf("Error parsing plist. nodeType: %d -- Name: %s", $this->nodeType, $this->name), 2); } if(!$this->next("plist") || $this->nodeType !== XMLReader::ELEMENT || $this->name !== "plist") { throw new Exception(sprintf("Error parsing plist. nodeType: %d -- Name: %s", $this->nodeType, $this->name), 3); } $plist = array(); while($this->read()) { if($this->nodeType == XMLReader::ELEMENT) { $plist[] = $this->parse_node(); } } if(count($plist) == 1 && $plist[0]) { return $plist[0]; } else { return $plist; } } private function parse_node() { if($this->nodeType !== XMLReader::ELEMENT) return; switch($this->name) { case 'data': return base64_decode($this->getNodeText()); break; case 'real': return floatval($this->getNodeText()); break; case 'string': return $this->getNodeText(); break; case 'integer': return intval($this->getNodeText()); break; case 'date': return $this->getNodeText(); break; case 'true': return true; break; case 'false': return false; break; case 'array': return $this->parse_array(); break; case 'dict': return $this->parse_dict(); break; default: throw new Exception(sprintf("Not a valid plist. %s is not a valid type", $this->name), 4); } } private function parse_dict() { $array = array(); $this->nextOfType(XMLReader::ELEMENT); do { if($this->nodeType !== XMLReader::ELEMENT || $this->name !== "key") { if(!$this->next("key")) { return $array; } } $key = $this->getNodeText(); $this->nextOfType(XMLReader::ELEMENT); $array[$key] = $this->parse_node(); $this->nextOfType(XMLReader::ELEMENT, XMLReader::END_ELEMENT); } while($this->nodeType && !$this->isNodeOfTypeName(XMLReader::END_ELEMENT, "dict")); return $array; } private function parse_array() { $array = array(); $this->nextOfType(XMLReader::ELEMENT); do { $array[] = $this->parse_node(); $this->nextOfType(XMLReader::ELEMENT, XMLReader::END_ELEMENT); } while($this->nodeType && !$this->isNodeOfTypeName(XMLReader::END_ELEMENT, "array")); return $array; } private function getNodeText() { $string = $this->readString(); $this->nextOfType(XMLReader::END_ELEMENT); return $string; } private function nextOfType() { $types = func_get_args(); $this->read(); while($this->nodeType && !(in_array($this->nodeType, $types))) { $this->read(); } } private function isNodeOfTypeName($type, $name) { return $this->nodeType === $type && $this->name === $name; } } function parseMediaInfo ($xml) { $xml = simplexml_load_string($xml); $data = array(); $data['version'] = (string) $xml['version']; foreach ($xml->File->track as $track) { $trackType = strtolower($track['type']); $trackId = isset($track['streamid']) ? $track['streamid'] : 1; $trackId = (string)$trackId; $trackData = []; foreach ($track as $rawKey => $rawVal) { $key = strtolower($rawKey); $val = (string)$rawVal; if ($key == 'stream_identifier') { continue; } if (!array_key_exists($key, $trackData)) { $trackData[$key] = array($val); } elseif (!in_array($val, $trackData[$key])) { $trackData[$key][] = $val; } } if ($trackType == 'general') { $data['file']['general'] = $trackData; } else { $data['file'][$trackType][$trackId] = $trackData; } } return $data; } function bashcolor($str,$fgcolor="white",$bgcolor=null) { static $fgcolors = array('black' => '0;30', 'dark gray' => '1;30', 'blue' => '0;34', 'light blue' => '1;34', 'green' => '0;32', 'light green' => '1;32', 'cyan' => '0;36', 'light cyan' => '1;36', 'red' => '0;31', 'light red' => '1;31', 'purple' => '0;35', 'light purple' => '1;35', 'brown' => '0;33', 'yellow' => '1;33', 'light gray' => '0;37', 'white' => '1;37', 'underline' => '4'); static $bgcolors = array('black' => '40', 'red' => '41', 'green' => '42', 'yellow' => '43', 'blue' => '44', 'magenta' => '45', 'cyan' => '46', 'light gray' => '47'); $out=""; if (!isset($fgcolors[$fgcolor])) { $fgcolor='white'; } if (!isset($bgcolors[$bgcolor])) { $bgcolor=null; } if ($fgcolor) { $out .= "\033[{$fgcolors[$fgcolor]}m"; } if ($bgcolor) { $out .= "\033[{$bgcolors[$bgcolor]}m"; } $out .= $str."\033[0m"; return $out; } // Disk info ////////////////////////////////////////// $host = gethostname(); $disks = shell_exec("diskutil list 2>&1"); if (substr($zpath, 0, 9) != "/Volumes/") { $zbase = "/"; } else { $zparts = explode("/", $zpath); $zbase = "/Volumes/".$zparts[2]; } $diskutil = shell_exec("diskutil info ".$zbase." 2>&1"); $getstats = array( "Volume Name", "Protocol", "Volume UUID", "Device Location", "Volume Total Space", "Volume Available Space", "Level Type" ); foreach ($getstats as $stat) { preg_match("/(".$stat.":)(.*)(\n)/",$diskutil,$matches); if (isset($matches[2])) { if (substr($stat, -5, 5) == "Space") { $pieces = explode(" ", trim($matches[2])); $summary = $pieces[0]." ".$pieces[1]; $stats[$stat] = $summary; } else { $stats[$stat] = trim($matches[2]); } } } $dstats = serialize($stats); if ($zpath == "/") { $type = "Startup disk"; } elseif (strtolower($zpath) == strtolower("/Volumes/".$stats["Volume Name"])) { if ($stats["Protocol"] == "Disk Image") { $type = "Disk image"; } else { $type = "External disk"; } } else { $type = "Folder"; } $profile = shell_exec("system_profiler SPHardwareDataType SPStorageDataType SPThunderboltDataType SPUSBDataType 2>&1"); $qlmanage = shell_exec("qlmanage -m 2>&1"); $sysvers = shell_exec("sw_vers 2>&1"); // Database ////////////////////////////////////////// $stamp = date("Y-m-d_H-i-s", time()); $dbo = new PDO("sqlite:".$bpath."/".$stamp.".sqlite3"); $dbo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); /* $dbo->query("PRAGMA page_size = 4096"); $dbo->query("PRAGMA cache_size = 10000"); $dbo->query("PRAGMA locking_mode = EXCLUSIVE"); $dbo->query("PRAGMA synchronous = NORMAL"); $dbo->query("PRAGMA journal_mode = WAL"); */ $dbo->exec("CREATE TABLE _walkwalk ( version TEXT, opts TEXT, host TEXT, uid INTEGER, zpath TEXT, bpath TEXT, type TEXT, passed_file INTEGER, passed_dir INTEGER, passed_link INTEGER, passed_total INTEGER, nodescended INTEGER, ignored INTEGER, dupes INTEGER, stats TEXT, qlmanage TEXT, sysvers TEXT, diskutil TEXT, disks TEXT, profile TEXT, status TEXT )"); $dbo->exec("CREATE TABLE family ( pid TEXT, fid TEXT, children TEXT )"); /* Gather for each file: A. IDs pid (path ID) fid (file ID) B. splFileInfo methods Pathname Path Filename Extension Type Inode Perms Owner ATime CTime MTime LinkTarget RealPath C. External methods stat items newest gfi_type gfi_attr gfi_created D. Aggregates Size (splFileInfo, du) Title (exiftool, kMDItemTitle, mediainfo) PixelWidth (kMDItemPixelWidth, exiftool, mediainfo) PixelHeight (kMDItemPixelHeight, exiftool, mediainfo) Duration (kMDItemDurationSeconds, mediainfo, exiftool) DateTimeOriginal (exiftool[DateTimeOriginal],mediainfo[EncodedDate],exiftool[CreateDate,MediaCreateDate],kMDItemContentCreationDate) Origin (exiftool[CameraModelName,Producer,CreatorTool,WriterName,Software,Encoder],mediainfo[WritingApplication]) GPS (exiftool[GPSPosition], kMDItemLatitude.kMDItemLongitude) Author (exiftool[Author,Artist,Creator,By-line]) E. Spotlight spotlight (whole plist) kMDItemDateAdded kMDItemLastUsedDate kMDItemUseCount kMDItemContentModificationDate kMDItemContentType kMDItemCreator kMDItemFSCreatorCode kMDItemKind kMDItemFSTypeCode kMDItemUserTags kMDItemFSInvisible kMDItemNumberOfPages kMDItemPageHeight kMDItemPageWidth kMDItemWhereFroms kMDItemEncodingApplications F. Pool has_exif has_mediainfo has_hash thumb_filename thumb_width thumb_height G. Exiftool ProfileDescription BitDepth Compression WhiteBalance Orientation LensType H. Mediainfo VideoFormat AudioFormat Tracks Profile Bitrate */ $dbo->exec("CREATE TABLE files ( pid TEXT, fid TEXT, Pathname TEXT, Path TEXT, Filename TEXT, Extension TEXT, Type TEXT, Inode INTEGER, Perms INTEGER, Owner TEXT, ATime INTEGER, CTime INTEGER, MTime INTEGER, LinkTarget TEXT, RealPath TEXT, stat TEXT, items INTEGER, newest INTEGER, gfi_type TEXT, gfi_attr TEXT, gfi_created TEXT, Size INTEGER, Title TEXT, PixelWidth INTEGER, PixelHeight INTEGER, Duration INTEGER, DateTimeOriginal INTEGER, Origin TEXT, GPS TEXT, Author TEXT, spotlight TEXT, kMDItemDateAdded INTEGER, kMDItemLastUsedDate INTEGER, kMDItemUseCount INTEGER, kMDItemContentModificationDate INTEGER, kMDItemContentType TEXT, kMDItemCreator TEXT, kMDItemFSCreatorCode TEXT, kMDItemKind TEXT, kMDItemFSTypeCode TEXT, kMDItemUserTags TEXT, kMDItemFSInvisible INTEGER, kMDItemNumberOfPages INTEGER, kMDItemPageHeight INTEGER, kMDItemPageWidth INTEGER, kMDItemWhereFroms TEXT, kMDItemEncodingApplications TEXT, has_exif INTEGER, has_mediainfo INTEGER, has_hash INTEGER, thumb_filename TEXT, thumb_width INTEGER, thumb_height INTEGER, ProfileDescription TEXT, BitDepth INTEGER, Compression TEXT, Orientation INTEGER, LensType TEXT, VideoFormat TEXT, AudioFormat TEXT, Tracks INTEGER, Profile TEXT, Bitrate INTEGER )"); $stmt = $dbo->prepare("INSERT INTO _walkwalk VALUES (:version, :opts, :host, :uid, :zpath, :bpath, :type, :passed_file, :passed_dir, :passed_link, :passed_total, :nodescended, :ignored, :dupes, :stats, :qlmanage, :sysvers, :diskutil, :disks, :profile, :status)"); $stmt->BindValue(":version",$version); $stmt->BindValue(":opts",serialize(getWoptString())); $stmt->BindValue(":host",$host); $stmt->BindValue(":uid",posix_getuid()); $stmt->BindValue(":zpath",$zpath); $stmt->BindValue(":bpath",$bpath); $stmt->BindValue(":type",$type); $stmt->BindValue(":stats",$dstats); $stmt->BindValue(":qlmanage",$qlmanage); $stmt->BindValue(":sysvers",$sysvers); $stmt->BindValue(":diskutil",$diskutil); $stmt->BindValue(":disks",$disks); $stmt->BindValue(":profile",$profile); $stmt->BindValue(":status","aborted"); $stmt->execute(); // Iterator ////////////////////////////////////////// $passed_file = $passed_dir = $passed_link = $nodescended = $ignored = 0; $files = new RecursiveIteratorIterator( new RecursiveCallbackFilterIterator( new RecursiveDirectoryIterator( $zpath, RecursiveDirectoryIterator::SKIP_DOTS ), function ($current, $key, $iterator) use ($wopt_ignore, $wopt_nodescend) { global $nodescended, $ignored, $passed_file, $passed_dir, $passed_link; $clean = true; // identify ignore files if (is_array($wopt_ignore)) { foreach ($wopt_ignore as $wildcard) { if (fnmatch($wildcard, $current->getFilename())) { $clean = false; $ignored++; } } } // identify nodescend dirs if (is_array($wopt_nodescend)) { foreach ($wopt_nodescend as $wildcard) { if (fnmatch($wildcard, $current->getPath())) { $clean = false; $nodescended++; } } } //tally stats if ($clean) { if ($current->getType() == "file") { $passed_file++; } elseif ($current->getType() == "dir") { $passed_dir++; } elseif ($current->getType() == "link") { $passed_link++; } } return $clean; } ), RecursiveIteratorIterator::SELF_FIRST, RecursiveIteratorIterator::CATCH_GET_CHILD ); // Banner ////////////////////////////////////////// echo "Yuba ".$version."\n"; echo "-----------------------------------------------\n"; $banner = $zpath." -> ".$bpath; echo $banner."\n"; echo str_repeat("-", strlen($banner))."\n"; // Permissions ////////////////////////////////////////// if (posix_getuid()) { echo bashcolor("You are not root. Checking file readability: ", "red"); echo "\n"; $oops = 0; foreach ($files as $splFileInfo) { $path = $splFileInfo->getRealPath(); if (!is_readable($path)) { $oops = 1; echo "x"; } else { echo "."; } } echo "\n\n"; if ($oops) { echo "Some files could not be read. Continue? (Y/n)"; $line = trim(fgets(fopen("php://stdin","r"))); $line = $line ?: "y"; if($line != "y"){ echo "Exiting!\n"; die; } } } else { echo bashcolor("Running as root. Some QuickLook plugins may not be available.", "red"); echo "\n\n"; } $fixatimes = 0; if ($wopt_paranoid) { echo bashcolor("\nFilesystem is writable. You can choose:\n(c) Preserve ctimes (default)\n(a) Preserve atimes\n", "purple"); $line = trim(fgets(fopen("php://stdin","r"))) ?: "c"; if ($line == "a") { $fixatimes = 1; } } // Pool DB ////////////////////////////////////////// $dbp = new PDO("sqlite:".$bpath."/pool.sqlite3"); $dbp->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); $dbp->exec("CREATE TABLE IF NOT EXISTS md5 (fid TEXT, hash TEXT)"); $dbp->exec("CREATE TABLE IF NOT EXISTS exiftool (fid TEXT, tags TEXT)"); $dbp->exec("CREATE TABLE IF NOT EXISTS mediainfo (fid TEXT, info TEXT)"); $dbp->exec("CREATE TABLE IF NOT EXISTS thumbs (fid TEXT, created INTEGER, relative_path TEXT, width INTEGER, height INTEGER, tool TEXT)"); // Prescan ////////////////////////////////////////// $i = 0; $family = array(); $fids = array(); foreach ($files as $splFileInfo) { $pathname = $splFileInfo->getPathname(); $path = $splFileInfo->getPath(); $key = md5($pathname); $pkey = md5($path); if (array_key_exists($key, $family)) { echo "Duplicate key on ".$pathname."\n"; die; } $family[$key] = array(); // Path-agnostic Unique File ID (to prevent redundant hashes and thumbs) if ($splFileInfo->getType() != "dir" && $splFileInfo->getType() != "link") { $fid = md5($splFileInfo->getSize().$splFileInfo->getMtime().$splFileInfo->getBasename()); $dx[$fid][] = $pathname; $fx[] = array($fid, $pathname); $family[$key]['fid'] = $fid; } // Parents //$family[$key]['parents'] = getParents($zpath, $pathname); // Children $family[$pkey]['children'][] = $key; if ($i % 5000 == 0) { echo "\r\033[K\rPrescan: ".$pathname; } $i++; } echo "\r\033[K\rPrescan: done\n"; // Debug record of duplicate FIDs $dupes = array_filter($dx, function($a) { return count($a) > 2; }); ob_start(); var_dump($dupes); $dxo = ob_get_clean(); if (strlen($dxo)) { file_put_contents($bpath."/".$stamp."_dupes.txt",$dxo); $dupecount = count($dupes,COUNT_RECURSIVE) - count($dupes); echo "\n".bashcolor(floor(($dupecount/$i)*100)." percent of files look like duplicates","green")."\n\n"; } // Write family to DB $message = "Writing family to DB: "; $message .= $passed_file." files, "; $message .= $passed_dir." dirs, "; $message .= $nodescended." bundles, "; $message .= $passed_link." links, "; $message .= $ignored." ignored, "; $message .= ($dupecount ? $dupecount : 0)." dupes"; echo ProgressBar::start($i,$message); foreach ($family as $key => $item) { echo ProgressBar::next(); $stmt = $dbo->prepare("INSERT INTO family VALUES (:pid, :fid, :children)"); $stmt->BindValue(":pid",$key); if (@$item['fid']) { $stmt->BindValue(":fid",$item['fid']); } if (@$item['children'] && is_array(@$item['children'])) { $stmt->BindValue(":children",serialize($item['children'])); } $stmt->execute(); } echo ProgressBar::finish(); unset($dx, $dxo, $dupes); // stats $stmt = "UPDATE _walkwalk SET "; $stmt .= "passed_file=".$passed_file.", "; $stmt .= "passed_dir=".$passed_dir.", "; $stmt .= "passed_link=".$passed_link.", "; $stmt .= "passed_total=".$i.", "; $stmt .= "nodescended=".$nodescended.", "; $stmt .= "ignored=".$ignored.", "; $stmt .= "dupes=".($dupecount ? $dupecount : 0); $dbo->exec($stmt); // Thumbnails ////////////////////////////////////////// if ($wopt_thumbs) { $message = "Generating thumbnails..."; echo ProgressBar::start(count($fx),$message); $tempdir = "/tmp/".$blabel."_".$stamp; if (!is_dir($tempdir)) { mkdir($tempdir); } foreach ($fx as $array) { $fid = $array[0]; $pathname = $array[1]; $ext = pathinfo($pathname,PATHINFO_EXTENSION); $tpath = $bpath."/thumbs/".substr($fid, 0, 2); $tfile = $tpath."/".$fid.".jpg"; // HACK for ql-thumbnail bug $t_skip = array("emlx"); if (count($t_skip) && in_array($ext, $t_skip)) { echo ProgressBar::next(1, "Skipping ".shortlabel(basename($pathname),$max_label)); continue; } // if no thumb file, then poll database if (file_exists($tfile)) { echo ProgressBar::next(1, "Thumb file found for ".shortlabel(basename($pathname),$max_label)); continue; } elseif ($dbp->query("SELECT EXISTS(SELECT 1 FROM thumbs WHERE fid='".$fid."')")->fetch()[0]) { echo ProgressBar::next(1, "Thumb record found for ".shortlabel(basename($pathname),$max_label)); continue; } else { echo ProgressBar::next(1, "Generating thumb for ".shortlabel(basename($pathname),$max_label)); } $stmt = $dbp->prepare("INSERT INTO thumbs VALUES (:fid, :created, :relative_path, :width, :height, :tool)"); $stmt->BindValue(":fid",$fid); $stmt->BindValue(":created",time()); $shellpath = escapeshellarg($pathname); $tempfile = $tempdir."/".$fid.".jpg"; // first try to make a thumb with external tools $cmd = null; if (in_array($ext, $t_files['vips'])) { $cmd = $bin_tv." ".$shellpath." -o ".$tempfile."[Q=90,optimize_coding] --size=".$wopt_thumb_size; $stmt->BindValue(":tool","vips"); } elseif (in_array($ext, $t_files['ffmpeg'])) { $cmd = $bin_tf." -i ".$shellpath." -o ".$tempfile." -s ".$wopt_thumb_size." -c jpg -q 8.5"; $stmt->BindValue(":tool","ffmpeg"); } if ($cmd) { shell_exec($cmd." 2>&1"); } // if those tools failed, try quicklook if (!@filesize($tempfile)) { $cmd = $bin_tq." ".$shellpath." ".$tempfile." public.jpeg-2000 ".$wopt_thumb_size." ".$wopt_thumb_size." .8"; shell_exec($cmd." 2>&1"); $stmt->BindValue(":tool","quicklook"); } // success, move thumb into the bundle if (file_exists($tempfile) && @filesize($tempfile)) { if (!is_dir($tpath)) { mkdir($tpath); } rename($tempfile,$tfile); $stmt->BindValue(":relative_path",substr($tfile, strlen($bpath))); list($width, $height) = getimagesize($tfile); $stmt->BindValue(":width",$width); $stmt->BindValue(":height",$height); } $stmt->execute(); } echo ProgressBar::finish(); } // External metadata ////////////////////////////////////////// if ($wopt_meta) { $message = "Collecting external metadata..."; echo ProgressBar::start(count($fx),$message); foreach ($fx as $array) { $fid = $array[0]; $pathname = $array[1]; $shellpath = escapeshellarg($pathname); $ext = pathinfo($pathname,PATHINFO_EXTENSION); if (!in_array($ext, $e_files) && !in_array($ext, $m_files)) { echo ProgressBar::next(1, "Not a media file: ".shortlabel($pathname,$max_label)); continue; } else { echo ProgressBar::next(1, "Metadata: ".shortlabel($pathname,$max_label)); } if (in_array($ext, $e_files)) { $check = $dbp->query("SELECT EXISTS(SELECT 1 FROM exiftool WHERE fid='".$fid."')")->fetch()[0]; if (!$check) { $rawexif = eval("return ".`$bin_exiftool -php $shellpath`); $stmt = $dbp->prepare("INSERT INTO exiftool VALUES (:fid, :tags)"); $stmt->BindValue(":fid",$fid); $stmt->BindValue(":tags",serialize($rawexif[0])); $stmt->execute(); } } if (in_array($ext, $m_files)) { $check = $dbp->query("SELECT EXISTS(SELECT 1 FROM mediainfo WHERE fid='".$fid."')")->fetch()[0]; if (!$check) { $stmt = $dbp->prepare("INSERT INTO mediainfo VALUES (:fid, :info)"); $stmt->BindValue(":fid",$fid); $stmt->BindValue(":info",serialize(parseMediaInfo(shell_exec($bin_mediainfo." --Output=OLDXML ".$shellpath." 2>&1")))); $stmt->execute(); } } } echo ProgressBar::finish(); } // Hashes ////////////////////////////////////////// if ($wopt_hash) { if ($wopt_hash_limit) { $message = "Generating hashes for files under".$wopt_hash_limit."GB"; } else { $message = "Generating hashes for all files"; } echo ProgressBar::start(count($fx),$message); foreach ($fx as $array) { $fid = $array[0]; $pathname = $array[1]; $size = filesize($pathname); $limit = $wopt_hash_limit*1000000000; $check = $dbp->query("SELECT EXISTS(SELECT 1 FROM md5 WHERE fid='".$fid."')")->fetch()[0]; if ($check) { echo ProgressBar::next(1, "Hash already exists: ".shortlabel($pathname,$max_label)); } elseif ($wopt_hash_limit && ($size > $limit)) { echo ProgressBar::next(1, "Too big to hash: ".shortlabel($pathname,$max_label)." (".human_filesize($size).")"); } else { echo ProgressBar::next(1, "Generating hash: ".shortlabel($pathname,$max_label)); $stmt = $dbp->prepare("INSERT INTO md5 VALUES (:fid, :hash)"); $stmt->BindValue(":fid",$fid); $stmt->BindValue(":hash",md5_file($pathname)); $stmt->execute(); } } echo ProgressBar::finish(); } // Files ////////////////////////////////////////// echo ProgressBar::start($i); $j = 0; foreach ($files as $splFileInfo) { // DB $stmt = $dbo->prepare("INSERT INTO files VALUES (:pid, :fid, :Pathname, :Path, :Filename, :Extension, :Type, :Inode, :Perms, :Owner, :ATime, :CTime, :MTime, :LinkTarget, :RealPath, :stat, :items, :newest, :gfi_type, :gfi_attr, :gfi_created, :Size, :Title, :PixelWidth, :PixelHeight, :Duration, :DateTimeOriginal, :Origin, :GPS, :Author, :spotlight, :kMDItemDateAdded, :kMDItemLastUsedDate, :kMDItemUseCount, :kMDItemContentModificationDate, :kMDItemContentType, :kMDItemCreator, :kMDItemFSCreatorCode, :kMDItemKind, :kMDItemFSTypeCode, :kMDItemUserTags, :kMDItemFSInvisible, :kMDItemNumberOfPages, :kMDItemPageHeight, :kMDItemPageWidth, :kMDItemWhereFroms, :kMDItemEncodingApplications, :has_exif, :has_mediainfo, :has_hash, :thumb_filename, :thumb_width, :thumb_height, :ProfileDescription, :BitDepth, :Compression, :Orientation, :LensType, :VideoFormat, :AudioFormat, :Tracks, :Profile, :Bitrate)"); // Identify dir, file, link or bundle dir $type = $splFileInfo->getType(); if ($type == "dir") { foreach ($wopt_bundles as $bundle) { $check = ".".$bundle; if (substr($splFileInfo->getFilename(), -(strlen($check)), strlen($check)) == $check) { $type = "bundle"; } } } $stmt->BindValue(":Type",$type); // Cache atime before it gets modified if ($type != "link") { $atime = $splFileInfo->getATime(); } // Path basics $pathname = $splFileInfo->getPathname(); $path = $splFileInfo->getPath(); $filename = $splFileInfo->getFilename(); $extension = $splFileInfo->getExtension(); $shellpath = escapeshellarg($pathname); $stmt->BindValue(":Pathname",$pathname); $stmt->BindValue(":Path",$path); $stmt->BindValue(":Filename",$filename); $stmt->BindValue(":Extension",$extension); if ($type == "link") { $stmt->BindValue(":LinkTarget",$splFileInfo->getLinkTarget()); $stmt->BindValue(":RealPath",$splFileInfo->getRealPath()); } else { $stmt->BindValue(":Inode",$splFileInfo->getInode()); $stmt->BindValue(":Perms",$splFileInfo->getPerms()); $stmt->BindValue(":Owner",$splFileInfo->getOwner().":".$splFileInfo->getGroup()); $stmt->BindValue(":ATime",$atime); $stmt->BindValue(":CTime",$splFileInfo->getCTime()); $stmt->BindValue(":MTime",$splFileInfo->getMTime()); } stringPrint(shortlabel(basename($pathname),$max_label,$max_label+10)); // ------------------------------------------------ // // Get stat if ($type != "link") { $stat = chop(@shell_exec("stat -x ".$shellpath." 2>&1")); } else { $stat = null; } $stmt->BindValue(":stat",@$stat); // Cache stat if ($type != "link" && $wopt_paranoid) { $pre_access = null; $pre_modify = null; $pre_change = null; foreach (explode("\n", $stat) as $line) { $check = substr($line, 0, 6); if ($check == "Access") { $pre_access = $line; } if ($check == "Modify") { $pre_modify = $line; } if ($check == "Change") { $pre_change = $line; } } } // Generate PID and FID $pid = md5($pathname); $stmt->BindValue(":pid",$pid); if ($type == "file") { $fid = md5($splFileInfo->getSize().$splFileInfo->getMtime().$splFileInfo->getBasename()); $stmt->BindValue(":fid",$fid); } else { $stmt->BindValue(":fid",null); } // Size if ($type == "dir" || $type == "bundle") { $size = trim(shell_exec("du -ks ".$shellpath." | cut -f1"))*1024; } elseif ($type == "file") { $size = $splFileInfo->getSize(); } else { $size = null; } $stmt->BindValue(":Size",@$size); stringPrint(floor($size/1024)."k"); // ------------------------------------------------ // // Items if ($type == "dir" || $type == "bundle" ) { $items = chop(@shell_exec("find ".$shellpath." \( ! -regex '.*/\..*' \) | wc -l 2>&1"))-1; $stmt->BindValue(":items",@$items); stringPrint($items ? "ITEMS" : "items"); } else { $items = null; stringPrint(" "); } // ------------------------------------------------ // // Newest if ($type == "dir") { $newest = @filemtime(chop(shell_exec("find ".$shellpath." -type f -not -path '*/\.*' -print0 | xargs -0 stat -f \"%m %N\" | sort -rn 2>&1 | head -1 | cut -f2- -d\" \""))); $stmt->BindValue(":newest",@$newest); stringPrint($newest ? "NEWEST" : "newest"); } else { $newest = null; stringPrint(" "); } // ------------------------------------------------ // // GetFileInfo $gfiparts = explode("\n", chop(shell_exec($bin_gfi." -P ".$shellpath." 2>&1"))); if (is_array($gfiparts)) { foreach ($gfiparts as $line) { list($label, $value) = explode(": ", $line); $gfi[$label] = isset($value) ? trim($value,"\"") : null; } } $writegfitype = @$gfi['type'].":".@$gfi['creator']; if ($writegfitype == "\\0\\0\\0\\0:\\0\\0\\0\\0" || $writegfitype == ":") { $writegfitype = null; } $stmt->BindValue(":gfi_type",$writegfitype); $stmt->BindValue(":gfi_attr",@$gfi['attributes']); $stmt->BindValue(":gfi_created",strtotime($gfi['created'])); stringPrint("GFI"); // ------------------------------------------------ // // Spotlight $mdls = null; $mdls = shell_exec("mdls -plist - ".$shellpath." 2>&1"); if ($mdls != $pathname.": could not find ".$pathname.".\n") { $parser = new plistParser(); $spotlight = $parser->parseString($mdls); //$stmt->BindValue(":spotlight",serialize($spotlight)); $stmt->BindValue(":spotlight",$mdls); } else { $spotlight = array(); $stmt->BindValue(":spotlight",null); } stringPrint($mdls ? "MDLS" : "mdls"); unset($breakout, $schema, $item, $ready); $breakout[] = array ("kMDItemDateAdded", "date"); $breakout[] = array ("kMDItemLastUsedDate", "date"); $breakout[] = array ("kMDItemUseCount", 0); $breakout[] = array ("kMDItemContentModificationDate", "date"); $breakout[] = array ("kMDItemContentType", 0); $breakout[] = array ("kMDItemCreator", 0); $breakout[] = array ("kMDItemFSCreatorCode", 0); $breakout[] = array ("kMDItemKind", 0); $breakout[] = array ("kMDItemFSTypeCode", 0); $breakout[] = array ("kMDItemUserTags", "array"); $breakout[] = array ("kMDItemFSInvisible", 0); $breakout[] = array ("kMDItemNumberOfPages", 0); $breakout[] = array ("kMDItemPageHeight", 0); $breakout[] = array ("kMDItemPageWidth", 0); $breakout[] = array ("kMDItemWhereFroms", "array"); $breakout[] = array ("kMDItemEncodingApplications", "array"); foreach ($breakout as $schema) { if (!isset($spotlight[$schema[0]])) { $stmt->BindValue(":".$schema[0],null); continue; } if ($schema[1] === "date") { $ready = strtotime($spotlight[$schema[0]]); } elseif ($schema[1] === "array") { $ready = serialize($spotlight[$schema[0]]); } else { $ready = $spotlight[$schema[0]]; } $stmt->BindValue(":".$schema[0],$ready); } unset($breakout); // ------------------------------------------------ // // Pool if ($type == "file") { unset($fetch_exif, $fetch_media, $fetch_hash, $fetch_thumb, $yes_exif, $yes_media, $yes_hash); $fetch_exif = @unserialize($dbp->query("SELECT tags FROM exiftool WHERE fid='".$fid."'")->fetch()[0]); is_array($fetch_exif) ? $yes_exif = 1 : $yes_exif = 0; $stmt->BindValue(":has_exif",$yes_exif); stringPrint($yes_exif ? "EXIF" : "exif"); $fetch_media = @unserialize($dbp->query("SELECT info FROM mediainfo WHERE fid='".$fid."'")->fetch()[0]); is_array($fetch_media) ? $yes_media = 1 : $yes_media = 0; $stmt->BindValue(":has_mediainfo",$yes_media); stringPrint($yes_media ? "MEDIA" : "media"); $yes_hash = $dbp->query("SELECT EXISTS(SELECT 1 FROM md5 WHERE fid='".$fid."')")->fetch()[0]; $stmt->BindValue(":has_hash",$yes_hash); stringPrint($yes_hash ? "HASH" : "hash"); $fetch_thumb = $dbp->query("SELECT * FROM thumbs WHERE fid='".$fid."'")->fetch(); if (@$fetch_thumb['relative_path']) { $stmt->BindValue(":thumb_filename",$fetch_thumb['relative_path']); $stmt->BindValue(":thumb_width",$fetch_thumb['width']); $stmt->BindValue(":thumb_height",$fetch_thumb['height']); stringPrint("THUMB"); } else { $stmt->BindValue(":thumb_filename",null); stringPrint("thumb"); } $breakout[] = "ProfileDescription"; $breakout[] = "BitDepth_BitsPerSample"; $breakout[] = "Compression"; $breakout[] = "Aperture,LightSource,WhiteBalance"; $breakout[] = "Orientation"; $breakout[] = "LensType,FocalLength"; $breakout['profile'] = "ProfileDescription"; $breakout['bits'] = "BitDepth_BitsPerSample"; $breakout['compression'] = "Compression"; $breakout[] = "Aperture,LightSource,WhiteBalance"; $breakout[] = "Orientation"; $breakout[] = "LensType,FocalLength"; /* function parseConditionalItem($data, $item) { if (strpos($item, "_")) { $list = explode("_", $item); foreach ($list as $piece) { // left off here } } function parseItem($data, $item) { if (strpos($item, ",")) { $list = explode(",", $item); foreach ($list as $piece) { $cleared[] = parseConditionalItem($piece); } } else { $cleared[] = parseConditionalItem($item); } foreach ($cleared as $check) { if (isset($data[$check])) { $ready[] = $data[$check]; } } if (@count($ready) > 1) { return implode(", ",$ready); } elseif (@count($ready) == 1) { return $ready[0]; } else { return null; } } // left off here foreach ($breakout as $item) { unset($ready); if (strpos($item, ",")) { $list = explode(",", $item) { foreach ($list as $multi) { if (isset($fetch_exif[$multi])) { $ready[] = $fetch_exif[$multi]; } } } elseif (strpos($item, "_")) { $list = explode("_", $item) { foreach ($list as $multi) { if (isset($ready) { continue; } if (isset($fetch_exif[$multi])) { $ready[] = $fetch_exif[$multi]; } */ unset($breakout); print_r($fetch_exif); print_r($fetch_media); } else { stringPrint(" "); stringPrint(" "); stringPrint(" "); stringPrint(" "); } // Write to DB $stmt->execute(); stringPrint("DB"); // Set fileatime back to original value if ($type != "link" && is_writable($pathname) && $fixatimes) { @exec("touch -at `date -r ".$atime." +%Y%m%d%H%M.%S` ".$shellpath." 2>&1"); stringPrint("touch"); } echo "\n"; // Double check stat for file against pre-run value if ($type != "link" && $wopt_paranoid) { $restat = chop(@shell_exec("stat -x ".$shellpath." 2>&1")); $post_access = null; $post_modify = null; $post_change = null; foreach (explode("\n", $restat) as $line) { $check = substr($line, 0, 6); if ($check == "Access") { $post_access = $line; } if ($check == "Modify") { $post_modify = $line; } if ($check == "Change") { $post_change = $line; } } $message = array(); if ($pre_access != $post_access) { $message[] = "ATIME"; } if ($pre_modify != $post_modify) { $message[] = "MTIME"; } if ($pre_change != $post_change) { $message[] = "CTIME"; } if (count($message)) { stringPrint("Change: ".implode(", ", $message)); } } $update = ProgressBar::next(1, substr($pathname,0,80)); if (floor($j/100) == ($j/100)) { echo "\n\n-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-\n"; echo $update; echo "\n-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-\n\n"; } $j++; } echo ProgressBar::finish(); // Cleanup ////////////////////////////////////////// echo "\n"; $seconds = floor($time = microtime(true)-$_SERVER["REQUEST_TIME_FLOAT"]); $dbo->exec("UPDATE _walkwalk SET status='completed_in_".$seconds."'"); echo "Finished in ".$seconds." seconds\n\n"; unset($dbo, $dbp, $files, $family, $fx); ?>