".$bpath; echo msg($banner."\n".str_repeat("-", strlen($banner))); // Disk info ////////////////////////////////////////// echo msg("Gathering system info..."); if (substr($zpath, 0, 9) != "/Volumes/") { $zbase = "/"; } else { $zparts = explode("/", $zpath); $zbase = "/Volumes/".$zparts[2]; } $host = gethostname(); $disks = shell_exec("diskutil list -plist 2>&1"); $diskutil = shell_exec("diskutil info -plist ".$zbase." 2>&1"); $diskutil_parsed = $parser->parseString(utf8_for_xml($diskutil)); //print_r($diskutil_parsed); $vdisks = shell_exec("hdiutil info -plist 2>&1"); $vdisks_parsed = $parser->parseString(utf8_for_xml($vdisks)); //print_r($vdisks_parsed); $df = shell_exec("df 2>&1"); // old method, new method is to parse plist //$df_volume = trim(shell_exec("df ".escapeshellarg($zpath)." | tail -n 1 | rev | cut -d' ' -f1 | rev")); //$df_device = trim(shell_exec("df ".escapeshellarg($zpath)." | tail -n 1 | cut -d' ' -f1")); $df_volume = $diskutil_parsed['MountPoint']; $df_device = "/dev/".$diskutil_parsed['ParentWholeDisk']; $mdutil = shell_exec("mdutil -sv ".$df_volume); if (strpos($mdutil,"disabled")) { echo msg("Warning: spotlight indexing is disabled"); $p['spotlight'] = false; } ///////////////////////////////////////////////////////////////////////////////////////////// // rewrite below to use diskutil info -plist // // // $olddiskutil = shell_exec("diskutil info ".$zbase." 2>&1"); $getdetail = array( "Volume Name", "Protocol", "Volume UUID", "Device Location", "Volume Total Space", "Volume Available Space", "Level Type" ); foreach ($getdetail as $detail) { preg_match("/(".$detail.":)(.*)(\n)/",$olddiskutil,$matches); if (isset($matches[2])) { if (substr($detail, -5, 5) == "Space") { $pieces = explode(" ", trim($matches[2])); $summary = $pieces[0]." ".$pieces[1]; $details[$detail] = $summary; } else { $details[$detail] = trim($matches[2]); } } } $dstring = serialize($details); if ($zpath == "/") { $type = "Startup disk"; } elseif (strtolower($zpath) == strtolower("/Volumes/".$details["Volume Name"])) { if ($details["Protocol"] == "Disk Image") { $type = "Disk image"; } else { $type = "External disk"; } } else { $type = "Folder"; } // // // ///////////////////////////////////////////////////////////////////////////////////////////// if ($type == "Disk image") { $hdiutil = shell_exec("hdiutil imageinfo -plist ".$df_device." 2>&1"); foreach ($vdisks_parsed['images'] as $id => $disk) { if ($disk['system-entities'][0]['dev-entry'] == $df_device) { $image_file = $disk['image-path']; } } } else { $hdiutil = false; $image_file = false; } if ($p['profile']) { echo msg("system_profiler"); $profile = shell_exec("system_profiler SPHardwareDataType SPStorageDataType SPThunderboltDataType SPUSBDataType 2>&1"); } else { $profile = "disabled"; } $qlmanage = shell_exec("qlmanage -m 2>&1"); $sysvers = shell_exec("sw_vers 2>&1"); // Database ////////////////////////////////////////// echo msg("Building database..."); $dbo = new PDO("sqlite:".$bpath."/".$stamp.".sqlite3"); $dbo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); $dbo->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_ASSOC); $dbo->query("PRAGMA page_size = 4096"); $dbo->query("PRAGMA cache_size = 10000"); $dbo->query("PRAGMA locking_mode = EXCLUSIVE"); $dbo->query("PRAGMA synchronous = NORMAL"); $dbo->query("PRAGMA journal_mode = WAL"); $dbo->exec("CREATE TABLE _skim ( version TEXT, opts TEXT, host TEXT, uid INTEGER, zpath TEXT, bpath TEXT, type TEXT, passed_file INTEGER, passed_dir INTEGER, passed_link INTEGER, passed_total INTEGER, nodescended INTEGER, ignored INTEGER, dupes INTEGER, details TEXT, qlmanage TEXT, sysvers TEXT, disks TEXT, diskutil TEXT, vdisks TEXT, hdiutil TEXT, image_file TEXT, df TEXT, df_device TEXT, df_volume TEXT, mdutil TEXT, profile TEXT, status TEXT )"); $dbo->exec("CREATE TABLE family ( pid TEXT, fid TEXT, children TEXT )"); $dbo->exec("CREATE TABLE files ( pid TEXT, fid TEXT, Pathname TEXT, Path TEXT, Filename TEXT, Extension TEXT, Type TEXT, Size INTEGER, Inode INTEGER, Perms INTEGER, Owner TEXT, ATime INTEGER, MTime INTEGER, CTime INTEGER, LinkTarget TEXT, RealPath TEXT, stat TEXT, items INTEGER, newest INTEGER, fkind TEXT, gfi_type TEXT, gfi_attr TEXT, gfi_created TEXT, has_exif INTEGER, has_mediainfo INTEGER, has_hash INTEGER, thumb_filename TEXT, thumb_width INTEGER, thumb_height INTEGER, contents_filename TEXT )"); $stmt = $dbo->prepare("INSERT INTO _skim VALUES (:version, :opts, :host, :uid, :zpath, :bpath, :type, :passed_file, :passed_dir, :passed_link, :passed_total, :nodescended, :ignored, :dupes, :details, :qlmanage, :sysvers, :disks, :diskutil, :vdisks, :hdiutil, :image_file, :df, :df_device, :df_volume, :mdutil, :profile, :status)"); $stmt->BindValue(":version",$version); $stmt->BindValue(":opts",serialize($p)); $stmt->BindValue(":host",$host); $stmt->BindValue(":uid",posix_getuid()); $stmt->BindValue(":zpath",$zpath); $stmt->BindValue(":bpath",$bpath); $stmt->BindValue(":type",$type); $stmt->BindValue(":details",$dstring); $stmt->BindValue(":qlmanage",$qlmanage); $stmt->BindValue(":sysvers",$sysvers); $stmt->BindValue(":disks",$disks); $stmt->BindValue(":diskutil",$diskutil); $stmt->BindValue(":vdisks",$vdisks); $stmt->BindValue(":hdiutil",$hdiutil); $stmt->BindValue(":image_file",$image_file); $stmt->BindValue(":df",$df); $stmt->BindValue(":df_device",$df_device); $stmt->BindValue(":df_volume",$df_volume); $stmt->BindValue(":mdutil",$mdutil); $stmt->BindValue(":profile",$profile); $stmt->BindValue(":status","aborted"); $stmt->execute(); // Iterator ////////////////////////////////////////// $first_run = 1; $passed_file = $passed_dir = $passed_link = $passed_total = $nodescended = $ignored = 0; $files = new RecursiveIteratorIterator( new RecursiveCallbackFilterIterator( new RecursiveDirectoryIterator( $zpath, RecursiveDirectoryIterator::SKIP_DOTS ), function ($current, $key, $iterator) use ($p) { global $nodescended, $ignored, $passed_file, $passed_dir, $passed_link, $passed_total, $first_run; $clean = true; // identify ignore files if (is_array($p['ignore'])) { foreach ($p['ignore'] as $wildcard) { if (fnmatch($wildcard, $current->getFilename())) { $clean = false; if ($first_run) { $ignored++; } } } } // identify nodescend dirs if (is_array($p['nodescend'])) { foreach ($p['nodescend'] as $wildcard) { if (fnmatch($wildcard, $current->getPath())) { $clean = false; if ($first_run) { $nodescended++; } } } } //tally stats if ($clean && $first_run) { if ($current->getType() == "file") { $passed_file++; } elseif ($current->getType() == "dir") { $passed_dir++; } elseif ($current->getType() == "link") { $passed_link++; } $passed_total++; } return $clean; } ), RecursiveIteratorIterator::SELF_FIRST, RecursiveIteratorIterator::CATCH_GET_CHILD ); // Tally ////////////////////////////////////////// echo msg("Counting files..."); foreach ($files as $null) { } $first_run = 0; if (!$passed_total) { echo msg("Nothing was found, exiting"); die; } echo msg("Total files: ".$passed_total.""); // Pool DB ////////////////////////////////////////// $dbp = new PDO("sqlite:".$bpath."/pool.sqlite3"); $dbp->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); $dbp->query("PRAGMA page_size = 4096"); $dbp->query("PRAGMA cache_size = 10000"); $dbp->query("PRAGMA locking_mode = EXCLUSIVE"); $dbp->query("PRAGMA synchronous = NORMAL"); $dbp->query("PRAGMA journal_mode = WAL"); $dbp->exec("CREATE TABLE IF NOT EXISTS md5 (fid TEXT, hash TEXT)"); $dbp->exec("CREATE TABLE IF NOT EXISTS exiftool (fid TEXT, tags TEXT)"); $dbp->exec("CREATE TABLE IF NOT EXISTS mediainfo (fid TEXT, info TEXT)"); $dbp->exec("CREATE TABLE IF NOT EXISTS thumbs (fid TEXT, created INTEGER, relative_path TEXT, width INTEGER, height INTEGER, tool TEXT)"); $dbp->exec("CREATE TABLE IF NOT EXISTS contents (fid TEXT, created INTEGER, relative_path TEXT)"); // Prescan ////////////////////////////////////////// $i = 0; $family = array(); $fids = array(); $noread = array(); echo ProgressBar::start($passed_total,"Prescan (".stepString().")"); foreach ($files as $splFileInfo) { $path = $splFileInfo->getPath(); $pathname = $splFileInfo->getPathname(); $shellpath = escapeshellarg($splFileInfo->getPathname()); $realpath = $splFileInfo->getRealPath(); $key = md5($pathname); $pkey = md5($path); if (array_key_exists($key, $family)) { echo msg("Duplicate key on ".$pathname.""); die; } $family[$key] = array(); // Path-agnostic Unique File ID (to prevent redundant hashes and thumbs) if ($splFileInfo->getType() != "dir" && $splFileInfo->getType() != "link") { $fid = md5($splFileInfo->getSize().$splFileInfo->getMtime().$splFileInfo->getBasename()); $dx[$fid][] = $pathname; $fx[] = array($fid, $pathname); $family[$key]['fid'] = $fid; } // capture stat before values change /* if ($splFileInfo->getType() != "link") { $stx[$i] = array( $splFileInfo->getATime(), $splFileInfo->getMTime(), $splFileInfo->getCTime() ); } */ //$sty[$i] = "bypass"; $sty[$i] = statToArray(shell_exec("stat -s ".$shellpath." 2>&1")); // Check file can be read if ($realpath && !is_readable($realpath)) { $noread[] = $realpath; } // Children //$family[$pkey]['children'][] = $key; $family[$pkey]['children'][] = $i+1; echo ProgressBar::next(true); $i++; } echo ProgressBar::finish(); // Thow permissions error if (count($noread)) { echo msg("Current user (".posix_getuid().") does not have read access to the following files:\n").implode("\n",$noread); if ($p['readability']) { echo msg("Exiting..."); die; } } // Debug record of duplicate FIDs $dupes = array_filter($dx, function($a) { return count($a) > 1; }); $dxo = var_export($dupes, true); if (strlen($dxo)) { file_put_contents($bpath."/".$stamp."_dupes.txt",$dxo); $dupecount = count($dupes,COUNT_RECURSIVE) - count($dupes); echo floor(($dupecount/$passed_total)*100)." percent of files look like duplicates\n"; } // Write family to DB $message = "Writing family to DB: "; $message .= $passed_file." files, "; $message .= $passed_dir." dirs, "; $message .= $nodescended." bundles, "; $message .= $passed_link." links, "; $message .= $ignored." ignored, "; $message .= ($dupecount ? $dupecount : 0)." dupes"; echo ProgressBar::start(count($family),$message); foreach ($family as $key => $item) { $stmt = $dbo->prepare("INSERT INTO family VALUES (:pid, :fid, :children)"); $stmt->BindValue(":pid",$key); if (@$item['fid']) { $stmt->BindValue(":fid",$item['fid']); } if (@$item['children'] && is_array(@$item['children'])) { $stmt->BindValue(":children",serialize($item['children'])); } $stmt->execute(); echo ProgressBar::next(); } echo ProgressBar::finish(); // create an index for family db $dbo->exec("CREATE INDEX family_index ON family (pid)"); unset($dx, $dxo, $dupes); // stats $stmt = "UPDATE _skim SET "; $stmt .= "passed_file=".$passed_file.", "; $stmt .= "passed_dir=".$passed_dir.", "; $stmt .= "passed_link=".$passed_link.", "; $stmt .= "passed_total=".$passed_total.", "; $stmt .= "nodescended=".$nodescended.", "; $stmt .= "ignored=".$ignored.", "; $stmt .= "dupes=".($dupecount ? $dupecount : 0); $dbo->exec($stmt); $wopt_currstep++; // Thumbnails ////////////////////////////////////////// if ($p['thumbs']) { echo ProgressBar::start(count($fx),"Generating thumbnails (".stepString().")"); foreach ($fx as $array) { $fid = $array[0]; $pathname = $array[1]; $ext = pathinfo($pathname,PATHINFO_EXTENSION); $tpath = $bpath."/thumbs/".substr($fid, 0, 2); if (!is_dir($tpath)) { mkdir($tpath); } $tfile = $tpath."/".$fid.".jpg"; // HACK for ql-thumbnail bug $t_skip = array("emlx","flac"); if (count($t_skip) && in_array($ext, $t_skip)) { echo ProgressBar::next("Skipping ".shortlabel($pathname)); continue; } // if no thumb file, then poll database if (file_exists($tfile)) { echo ProgressBar::next("Thumb file found for ".shortlabel($pathname)); continue; } elseif ($dbp->query("SELECT EXISTS(SELECT 1 FROM thumbs WHERE fid='".$fid."')")->fetch()[0]) { echo ProgressBar::next("Thumb record found for ".shortlabel($pathname)); continue; } else { echo ProgressBar::next("Generating thumb for ".shortlabel($pathname)); } $stmt = $dbp->prepare("INSERT INTO thumbs VALUES (:fid, :created, :relative_path, :width, :height, :tool)"); $stmt->BindValue(":fid",$fid); $stmt->BindValue(":created",time()); $shellpath = escapeshellarg($pathname); // first try to make a thumb with external tools $cmd = null; if (in_array($ext, $p['t_files']['sips'])) { //$cmd = $bin_tv." ".$shellpath." -o ".$tfile."[Q=90,optimize_coding] --size=".$p['thumb_size']; $cmd = "sips -s format jpeg -s formatOptions 80 --resampleHeightWidthMax ".$p['thumb_size']." ".$shellpath." --out ".$tfile; $stmt->BindValue(":tool","sips"); } elseif (in_array($ext, $p['t_files']['ffmpeg'])) { //$cmd = $bin_tf." -i ".$shellpath." -o ".$tfile." -s ".$p['thumb_size']." -c jpg -q 8.5"; $cmd = $bin_ffmpeg." -ss $(( $(".$bin_mediainfo." --Inform='Video;%Duration%' ".$shellpath." | cut -d'.' -f1) / 10000 )) -i ".$shellpath." -vframes 1 -filter:v scale='400:-1' -q:v 3 ".$tfile; $stmt->BindValue(":tool","ffmpeg"); } if ($cmd) { shell_exec($cmd." 2>&1"); } // if those tools failed, try quicklook if (!@filesize($tfile)) { //$cmd = $bin_qlthumb." ".$shellpath." ".$tfile." public.jpeg-2000 ".$p['thumb_size']." ".$p['thumb_size']." .8"; $cmd = $bin_qlthumb." ".$shellpath." ".$tfile." public.jpeg ".$p['thumb_size']." ".$p['thumb_size']." .8"; shell_exec($cmd." 2>&1"); $stmt->BindValue(":tool","quicklook"); } // success, move thumb into the bundle // ignore generic music icon thumbs (7133) if (file_exists($tfile) && @filesize($tfile) && @filesize($tfile) != 7133) { $stmt->BindValue(":relative_path",substr($tfile, strlen($bpath))); list($width, $height) = getimagesize($tfile); $stmt->BindValue(":width",$width); $stmt->BindValue(":height",$height); } $stmt->execute(); } echo ProgressBar::finish(); } // Contents ////////////////////////////////////////// if ($p['contents']) { // should be rewritten to check against filemtimes echo ProgressBar::start(count($fx),"Gathering contents (".stepString().")"); foreach ($fx as $array) { $fid = $array[0]; $pathname = $array[1]; $ext = pathinfo($pathname,PATHINFO_EXTENSION); $cpath = $bpath."/contents/".substr($fid, 0, 2); $cfile = $cpath."/".$fid.".zip"; if (in_array($ext, $p['c_files'])) { if (!is_dir($cpath)) { mkdir($cpath); } if (!file_exists($cfile) && filesize($pathname) < 25000) { msg("Zipping ".$pathname); $zip = new ZipArchive(); $zip->open($cfile, ZipArchive::CREATE); $zip->addfile($pathname,basename($pathname)); $zip->close(); $stmt = $dbp->prepare("INSERT INTO contents VALUES (:fid, :created, :relative_path)"); $stmt->BindValue(":fid",$fid); $stmt->BindValue(":created",time()); $stmt->BindValue(":relative_path",substr($cfile, strlen($bpath))); $stmt->execute(); } } echo ProgressBar::next(true); } echo ProgressBar::finish(); } // External metadata ////////////////////////////////////////// if ($p['meta']) { echo ProgressBar::start(count($fx),"Collecting external metadata (".stepString().")"); foreach ($fx as $array) { $fid = $array[0]; $pathname = $array[1]; $shellpath = escapeshellarg($pathname); $ext = pathinfo($pathname,PATHINFO_EXTENSION); $found = 1; if (!in_array($ext, $p['e_files']) && !in_array($ext, $p['m_files'])) { echo ProgressBar::next("Not a media file: ".shortlabel($pathname)); continue; } if (in_array($ext, $p['e_files'])) { $check = $dbp->query("SELECT EXISTS(SELECT 1 FROM exiftool WHERE fid='".$fid."')")->fetch()[0]; if (!$check) { $arrstring = shell_exec($bin_exiftool." -php ".$shellpath); // $rawexif = eval("return ".`$bin_exiftool -php $shellpath`); // do an addtl check below to prevent "PHP Parse error: syntax error, unexpected end of file, expecting ';'" if (substr($arrstring,0,5) == "Array") { $rawexif = eval("return ".$arrstring); $stmt = $dbp->prepare("INSERT INTO exiftool VALUES (:fid, :tags)"); $stmt->BindValue(":fid",$fid); $stmt->BindValue(":tags",serialize($rawexif[0])); $stmt->execute(); $found = 0; } } } if (in_array($ext, $p['m_files'])) { $check = $dbp->query("SELECT EXISTS(SELECT 1 FROM mediainfo WHERE fid='".$fid."')")->fetch()[0]; if (!$check) { $stmt = $dbp->prepare("INSERT INTO mediainfo VALUES (:fid, :info)"); $stmt->BindValue(":fid",$fid); //$stmt->BindValue(":info",serialize(parseMediaInfo(shell_exec($bin_mediainfo." --Output=OLDXML ".$shellpath." 2>&1")))); //$stmt->BindValue(":info",shell_exec($bin_mediainfo." --Output=OLDXML ".$shellpath." 2>&1")); $stmt->BindValue(":info",shell_exec($bin_mediainfo." --Output=JSON ".$shellpath." 2>&1")); $stmt->execute(); $found = 0; } } if ($found) { echo ProgressBar::next("Metadata found: ".shortlabel($pathname)); } else { echo ProgressBar::next("Collecting metadata: ".shortlabel($pathname)); } } echo ProgressBar::finish(); } // Hashes ////////////////////////////////////////// if ($p['hash']) { if ($p['hash_limit']) { $message = "Generating hashes for files under ".$p['hash_limit']."GB"; } else { $message = "Generating hashes for all files"; } echo ProgressBar::start(count($fx),$message." (".stepString().")"); foreach ($fx as $array) { $fid = $array[0]; $pathname = $array[1]; $size = filesize($pathname); $limit = $p['hash_limit']*1000000000; $check = $dbp->query("SELECT EXISTS(SELECT 1 FROM md5 WHERE fid='".$fid."')")->fetch()[0]; if ($check) { echo ProgressBar::next("Hash already exists: ".shortlabel($pathname)); } elseif ($p['hash_limit'] && ($size > $limit)) { echo ProgressBar::next("Too big to hash: ".shortlabel($pathname)." (".human_filesize($size).")"); } else { echo ProgressBar::next("Generating hash: ".shortlabel($pathname)); $stmt = $dbp->prepare("INSERT INTO md5 VALUES (:fid, :hash)"); $stmt->BindValue(":fid",$fid); $stmt->BindValue(":hash",md5_file($pathname)); $stmt->execute(); } } echo ProgressBar::finish(); } // Pool Indices ////////////////////////////////////////// // We are done with the Pool DB, make sure there are indices $dbp->exec("CREATE INDEX IF NOT EXISTS contents_index ON contents (fid)"); $dbp->exec("CREATE INDEX IF NOT EXISTS exiftool_index ON exiftool (fid)"); $dbp->exec("CREATE INDEX IF NOT EXISTS md5_index ON md5 (fid)"); $dbp->exec("CREATE INDEX IF NOT EXISTS mediainfo_index ON mediainfo (fid)"); $dbp->exec("CREATE INDEX IF NOT EXISTS thumbs_index ON thumbs (fid)"); // Spotlight ////////////////////////////////////////// $mb['i'] = array( "PixelWidth", "PixelHeight", "Latitude", "Longitude", "DurationSeconds", "UseCount", "FSInvisible", "NumberOfPages", "PageHeight", "PageWidth", "TotalBitRate" ); $mb['t'] = array( "Title", "ContentType", "Creator", "FSCreatorCode", "Kind", "FSTypeCode" ); $mb['a'] = array( "UserTags", "WhereFroms", "EncodingApplications" ); $mb['d'] = array( "DateAdded", "LastUsedDate", "ContentModificationDate", "ContentCreationDate" ); $ibuild[] = ":pid, :spotlight"; $cbuild[] = "pid TEXT, spotlight TEXT"; foreach (array_merge($mb['i'],$mb['d']) as $item) { $cbuild[] = $item." INTEGER"; $ibuild[] = ":".$item; } foreach (array_merge($mb['t'],$mb['a']) as $item) { $cbuild[] = $item." TEXT"; $ibuild[] = ":".$item; } $p['spotlight'] = 1; if ($p['spotlight']) { echo ProgressBar::start($passed_total,"Spotlight (".stepString().")"); $dbo->exec("CREATE TABLE mdls (".implode(",",$cbuild).")"); foreach ($files as $splFileInfo) { $path = $splFileInfo->getPathname(); msg($path); $pid = md5($path); $shellpath = escapeshellarg($path); $mdls = shell_exec("mdls -plist - ".$shellpath." 2>&1"); if (substr_count(@$mdls,"\n") > 1) { $spotlight = $parser->parseString(utf8_for_xml($mdls)); } else { $spotlight = array(); } $stmt = $dbo->prepare("INSERT INTO mdls VALUES (".implode(",",$ibuild).")"); foreach ($mb as $key => $list) { foreach ($list as $item) { if (@$spotlight["kMDItem".$item]) { $stmt->BindValue(":pid",$pid); $stmt->BindValue(":spotlight",$mdls); switch($key) { case "i": case "t": $stmt->BindValue(":".$item,$spotlight["kMDItem".$item]); break; case "a": $stmt->BindValue(":".$item,serialize($spotlight["kMDItem".$item])); break; case "d": $stmt->BindValue(":".$item,strtotime($spotlight["kMDItem".$item])); break; } } } } $stmt->execute(); echo ProgressBar::next(true); } echo ProgressBar::finish(); } // Files ////////////////////////////////////////// $j = 0; echo ProgressBar::start($passed_total, "Skimming"); foreach ($files as $splFileInfo) { // DB $stmt = $dbo->prepare("INSERT INTO files VALUES (:pid, :fid, :Pathname, :Path, :Filename, :Extension, :Type, :Size, :Inode, :Perms, :Owner, :ATime, :MTime, :CTime, :LinkTarget, :RealPath, :stat, :items, :newest, :fkind, :gfi_type, :gfi_attr, :gfi_created, :has_exif, :has_mediainfo, :has_hash, :thumb_filename, :thumb_width, :thumb_height, :contents_filename)"); // Identify dir, file, link or bundle dir $type = $splFileInfo->getType(); if ($type == "dir") { foreach ($p['bundles'] as $bundle) { $check = ".".$bundle; if (substr($splFileInfo->getFilename(), -(strlen($check)), strlen($check)) == $check) { $type = "bundle"; } } } $stmt->BindValue(":Type",$type); // Path basics $pathname = $splFileInfo->getPathname(); $path = $splFileInfo->getPath(); $filename = $splFileInfo->getFilename(); $extension = $splFileInfo->getExtension(); $shellpath = escapeshellarg($pathname); $stmt->BindValue(":Pathname",$pathname); $stmt->BindValue(":Path",$path); $stmt->BindValue(":Filename",$filename); $stmt->BindValue(":Extension",$extension); //stat $stmt->BindValue(":stat",serialize($sty[$j])); if ($type == "link") { $stmt->BindValue(":LinkTarget",$splFileInfo->getLinkTarget()); $stmt->BindValue(":RealPath",$splFileInfo->getRealPath()); } else { $stmt->BindValue(":Inode",$splFileInfo->getInode()); $stmt->BindValue(":Perms",$splFileInfo->getPerms()); $stmt->BindValue(":Owner",$splFileInfo->getOwner().":".$splFileInfo->getGroup()); $stmt->BindValue(":ATime",$splFileInfo->getATime()); $stmt->BindValue(":MTime",$splFileInfo->getMTime()); $stmt->BindValue(":CTime",$splFileInfo->getCTime()); } // ------------------------------------------------ // // Generate PID and FID $pid = md5($pathname); $stmt->BindValue(":pid",$pid); if ($type == "file") { $fid = md5($splFileInfo->getSize().$splFileInfo->getMtime().$splFileInfo->getBasename()); $stmt->BindValue(":fid",$fid); } // Size if ($type == "dir" || $type == "bundle") { $size = trim(shell_exec("du -ks ".$shellpath." | cut -f1"))*1024; } elseif ($type == "file") { $size = $splFileInfo->getSize(); } else { $size = null; } $stmt->BindValue(":Size",@$size); // ------------------------------------------------ // // Items if ($type == "dir" || $type == "bundle" ) { // below commented out because it was causing -1 on dirs beginning with a dot //$items = chop(@shell_exec("find ".$shellpath." \( ! -regex '.*/\..*' \) | wc -l 2>&1"))-1; // below should be rewritten to use $wopt_ignore files $items = chop(@shell_exec("find ".$shellpath." \( ! -regex '.*/\.DS_Store' \) | wc -l 2>&1"))-1; $stmt->BindValue(":items",@$items); } // ------------------------------------------------ // // Newest if ($type == "dir") { $newest = @filemtime(chop(shell_exec("find ".$shellpath." -type f -not -path '*/\.*' -print0 | xargs -0 stat -f \"%m %N\" | sort -rn 2>&1 | head -1 | cut -f2- -d\" \""))); $stmt->BindValue(":newest",@$newest); } // ------------------------------------------------ // // GetFileInfo $gfiparts = explode("\n", chop(shell_exec($bin_gfi." -P ".$shellpath." 2>&1"))); if (is_array($gfiparts)) { foreach ($gfiparts as $line) { list($label, $value) = explode(": ", $line); $gfi[$label] = isset($value) ? trim($value,"\"") : null; } } $writegfitype = @$gfi['type'].":".@$gfi['creator']; if ($writegfitype == "\\0\\0\\0\\0:\\0\\0\\0\\0" || $writegfitype == ":") { $writegfitype = null; } $stmt->BindValue(":gfi_type",$writegfitype); $stmt->BindValue(":gfi_attr",@$gfi['attributes']); $stmt->BindValue(":gfi_created",strtotime(@$gfi['created'])); // ------------------------------------------------ // // Kind unset($fkind); if ($type == "file") { $fkind = trim(shell_exec("file -b -p ".$shellpath." | cut -f1 -d,")); $stmt->BindValue(":fkind",@$fkind); } // ------------------------------------------------ // // Pool if ($type == "file") { unset($fetch_exif, $fetch_media, $fetch_hash, $fetch_thumb, $yes_exif, $yes_media, $yes_hash); $yes_exif = $dbp->query("SELECT rowid FROM exiftool WHERE fid='".$fid."'")->fetch()[0]; $stmt->BindValue(":has_exif",$yes_exif); $yes_media = $dbp->query("SELECT rowid FROM mediainfo WHERE fid='".$fid."'")->fetch()[0]; $stmt->BindValue(":has_mediainfo",$yes_media); $yes_hash = $dbp->query("SELECT rowid FROM md5 WHERE fid='".$fid."'")->fetch()[0]; $stmt->BindValue(":has_hash",$yes_hash); $yes_contents = $dbp->query("SELECT relative_path FROM contents WHERE fid='".$fid."'")->fetch()[0]; $stmt->BindValue(":contents_filename",$yes_contents); if (!in_array($extension, $t_skip)) { $fetch_thumb = $dbp->query("SELECT * FROM thumbs WHERE fid='".$fid."'")->fetch(); } if (@$fetch_thumb['relative_path']) { $stmt->BindValue(":thumb_filename",$fetch_thumb['relative_path']); $stmt->BindValue(":thumb_width",$fetch_thumb['width']); $stmt->BindValue(":thumb_height",$fetch_thumb['height']); } else { $stmt->BindValue(":thumb_filename",null); } } // ------------------------------------------------ // // Write to DB $stmt->execute(); // Double check stat for file against pre-run value if ($p['verify_stat'] && $type != "link") { $restat = statToArray(shell_exec("stat -s ".$shellpath." 2>&1")); $message = array(); if ($sty[$j]['st_atime'] != $restat['st_atime']) { if ($p['fixatimes'] && $type != "link" && is_writable($pathname)) { exec("touch -at `date -r ".$sty[$j]['st_atime']." +%Y%m%d%H%M.%S` ".$shellpath." 2>&1"); $message[] = "atime (fix)"; } else { $message[] = "atime"; } } if ($sty[$j]['st_mtime'] != $restat['st_mtime']) { $message[] = "mtime"; } if ($sty[$j]['st_ctime'] != $restat['st_ctime']) { $message[] = "ctime"; } if (count($message)) { echo msg("FILE = ".$filename."; CHANGE = ".implode(", ", $message).""); } } echo ProgressBar::next($pathname); $j++; } echo ProgressBar::finish(); // Milk ////////////////////////////////////////// $milk['t*DocTitle'] = ["e^Title","k^Title","m^Track_name"]; $milk['t*Format'] = ["m^Format","e^Compression","e^MIMEType"]; $milk['t*Dimensions'] = ["k^PixelWidth.k^PixelHeight","e^PixelWidth.e^PixelHeight","m^SkimDims","k^SkimPageDims"]; $milk['s*Seconds'] = ["k^DurationSeconds","e^Duration","m^Duration"]; $milk['d*DateTime'] = ["e^DateTimeOriginal","m^EncodedDate","e^CreateDate","e^MediaCreateDate","k^ContentCreationDate"]; $milk['t*Origin'] = ["e^CameraModelName","e^Producer","e^CreatorTool","e^WriterName","e^Software","e^Encoder","k^Creator"]; $milk['t*GPS'] = ["k^Latitude.k^Longitude","e^GPSPosition"]; $milk['t*Author'] = ["e^Author","e^Artist","e^Creator","e^By-line","k^Copyright"]; $milk['i*Tracks'] = ["m^SkimTrackCount","k^NumberOfPages"]; $milk['t*Writer'] = ["m^Writing_application.m^Writing_library"]; $milk['t*Bitrate'] = ["m^Overall_bit_rate","e^AvgBitrate","k^TotalBitRate"]; //$milk['i*Orientation'] = ["e^Orientation"]; //$milk['t*Profile'] = ["e^Profile"]; //$milk['i*BitDepth'] = ["e^BitDepth"]; //$milk['t*LensType'] = ["e^LensType"]; //$milk['t*FocalLength'] = ["e^FocalLength"]; //$milk['t*Aperture'] = ["e^Aperture"]; //$milk['t*LightSource'] = ["e^LightSource"]; //$milk['t*WhiteBalance'] = ["e^WhiteBalance"]; $delimiter = ","; $display_delimiter = " x "; // Build DB $cbuild = $ibuild = array(); foreach (array_keys($milk) as $name) { list($kind,$item) = explode("*",$name); switch ($kind) { case "t": case "d": case "s": $cbuild[] = $item." TEXT"; break; case "i": $cbuild[] = $item." INTEGER"; break; } $ibuild[] = ":".$item; } $dbo->exec("CREATE TABLE milk (".implode(",",$cbuild).")"); $countrows = @reset($dbo->query("SELECT max(rowid) FROM files")->fetch()); echo msg("Milking ".$countrows." rows"); echo ProgressBar::start($countrows, "Milk"); $loop = $dbo->query("SELECT rowid, * FROM files"); while ($row_a = $loop->fetch()) { $stmt = $dbo->prepare("INSERT INTO milk VALUES (".implode(",",$ibuild).")"); $row_b = @$dbo->query("SELECT * FROM mdls WHERE (rowid='".$row_a['rowid']."')")->fetch(); if (count(@$row_b) > 1) { $m['k'] = $row_b; //custom values if ($m['k']['PageWidth'] && $m['k']['PageHeight']) { $m['k']['SkimPageDims'] = round($m['k']['PageWidth']/72,2)."in".$display_delimiter.round($m['k']['PageHeight']/72,2)."in"; } } else { $m['k'] = null; } if (isset($row_a['has_exif'])) { $row_c = $dbp->query("SELECT * FROM exiftool WHERE (rowid='".$row_a['has_exif']."')")->fetch(); $m['e'] = unserialize($row_c['tags']); } else { $m['e'] = null; } if (isset($row_a['has_mediainfo'])) { $row_d = $dbp->query("SELECT * FROM mediainfo WHERE (rowid='".$row_a['has_mediainfo']."')")->fetch(); if (substr($row_d['info'],0,5) == " $weighted) { list($type,$name) = explode("*",$value); $found = 0; foreach ($weighted as $dindex) { // concatenante 2 values if (!$found && strpos($dindex, ".")) { $parts = explode(".",$dindex); $out = array(); foreach ($parts as $part) { list($kind,$item) = explode("^",$part); if (@$m[$kind][$item]) { $out[] = sanitize($m[$kind][$item],$type); } } if (count($out)) { $stmt->BindValue(":".$name,implode($delimiter,$out)); $found = 1; } } elseif (!$found) { // find a single value list($kind,$item) = explode("^",$dindex); if (@$m[$kind][$item]) { $stmt->BindValue(":".$name,sanitize($m[$kind][$item],$type)); $found = 1; } } } } echo ProgressBar::next(true); $stmt->execute(); } echo ProgressBar::finish(); // Cleanup ////////////////////////////////////////// echo msg(""); if (file_exists($error_log_file)) { echo file_get_contents($error_log_file); } $seconds = floor($time = microtime(true)-$_SERVER["REQUEST_TIME_FLOAT"]); $dbo->exec("UPDATE _skim SET status='completed_in_".$seconds."'"); // rsync if ($p['rsync_dest']) { echo msg("rsync..."); $command = "rsync -avv -e ssh ".$bpath." ".$p['rsync_dest']; $count = trim(shell_exec("find ".escapeshellarg($bpath)." | wc -l")); echo ProgressBar::start($count,$p['rsync_dest']); $pipe = popen($command, "r"); while(fgets($pipe, 2048)) { echo ProgressBar::next(true); } pclose($pipe); echo ProgressBar::finish(); } $done = "Finished ".$zpath." in ".$seconds." seconds"; echo msg($done); ncenter($done); unset($dbo, $dbp, $files, $family, $fx); ?>