#!/usr/bin/php XML($string); return $this->process(); } private function process() { $this->read(); if($this->nodeType !== XMLReader::DOC_TYPE || $this->name !== "plist") { throw new Exception(sprintf("Error parsing plist. nodeType: %d -- Name: %s", $this->nodeType, $this->name), 2); } if(!$this->next("plist") || $this->nodeType !== XMLReader::ELEMENT || $this->name !== "plist") { throw new Exception(sprintf("Error parsing plist. nodeType: %d -- Name: %s", $this->nodeType, $this->name), 3); } $plist = array(); while($this->read()) { if($this->nodeType == XMLReader::ELEMENT) { $plist[] = $this->parse_node(); } } if(count($plist) == 1 && $plist[0]) { return $plist[0]; } else { return $plist; } } private function parse_node() { if($this->nodeType !== XMLReader::ELEMENT) return; switch($this->name) { case 'data': return base64_decode($this->getNodeText()); break; case 'real': return floatval($this->getNodeText()); break; case 'string': return $this->getNodeText(); break; case 'integer': return intval($this->getNodeText()); break; case 'date': return $this->getNodeText(); break; case 'true': return true; break; case 'false': return false; break; case 'array': return $this->parse_array(); break; case 'dict': return $this->parse_dict(); break; default: throw new Exception(sprintf("Not a valid plist. %s is not a valid type", $this->name), 4); } } private function parse_dict() { $array = array(); $this->nextOfType(XMLReader::ELEMENT); do { if($this->nodeType !== XMLReader::ELEMENT || $this->name !== "key") { if(!$this->next("key")) { return $array; } } $key = $this->getNodeText(); $this->nextOfType(XMLReader::ELEMENT); $array[$key] = $this->parse_node(); $this->nextOfType(XMLReader::ELEMENT, XMLReader::END_ELEMENT); } while($this->nodeType && !$this->isNodeOfTypeName(XMLReader::END_ELEMENT, "dict")); return $array; } private function parse_array() { $array = array(); $this->nextOfType(XMLReader::ELEMENT); do { $array[] = $this->parse_node(); $this->nextOfType(XMLReader::ELEMENT, XMLReader::END_ELEMENT); } while($this->nodeType && !$this->isNodeOfTypeName(XMLReader::END_ELEMENT, "array")); return $array; } private function getNodeText() { $string = $this->readString(); $this->nextOfType(XMLReader::END_ELEMENT); return $string; } private function nextOfType() { $types = func_get_args(); $this->read(); while($this->nodeType && !(in_array($this->nodeType, $types))) { $this->read(); } } private function isNodeOfTypeName($type, $name) { return $this->nodeType === $type && $this->name === $name; } } function parseMediaInfo ($xml) { $xml = simplexml_load_string($xml); $data = array(); $data['version'] = (string) $xml['version']; foreach ($xml->File->track as $track) { $trackType = strtolower($track['type']); $trackId = isset($track['streamid']) ? $track['streamid'] : 1; $trackId = (string)$trackId; $trackData = []; foreach ($track as $rawKey => $rawVal) { $key = strtolower($rawKey); $val = (string)$rawVal; if ($key == 'stream_identifier') { continue; } if (!array_key_exists($key, $trackData)) { $trackData[$key] = array($val); } elseif (!in_array($val, $trackData[$key])) { $trackData[$key][] = $val; } } if ($trackType == 'general') { $data['file']['general'] = $trackData; } else { $data['file'][$trackType][$trackId] = $trackData; } } return $data; } function bashcolor($str,$fgcolor="white",$bgcolor=null) { static $fgcolors = array('black' => '0;30', 'dark gray' => '1;30', 'blue' => '0;34', 'light blue' => '1;34', 'green' => '0;32', 'light green' => '1;32', 'cyan' => '0;36', 'light cyan' => '1;36', 'red' => '0;31', 'light red' => '1;31', 'purple' => '0;35', 'light purple' => '1;35', 'brown' => '0;33', 'yellow' => '1;33', 'light gray' => '0;37', 'white' => '1;37', 'underline' => '4'); static $bgcolors = array('black' => '40', 'red' => '41', 'green' => '42', 'yellow' => '43', 'blue' => '44', 'magenta' => '45', 'cyan' => '46', 'light gray' => '47'); $out=""; if (!isset($fgcolors[$fgcolor])) { $fgcolor='white'; } if (!isset($bgcolors[$bgcolor])) { $bgcolor=null; } if ($fgcolor) { $out .= "\033[{$fgcolors[$fgcolor]}m"; } if ($bgcolor) { $out .= "\033[{$bgcolors[$bgcolor]}m"; } $out .= $str."\033[0m"; return $out; } // Path arguments $zpath = realpath($argv[1]); if (!is_dir($zpath)) { echo "Usage: walk "; die; } if (isset($argv[2]) && is_dir($argv[2])) { $dbprefix = realpath($argv[2]); } else { $dbprefix = "."; } // File checks date_default_timezone_set("America/Los_Angeles"); $time_start = microtime(true); $stamp = date("Y-m-d_H-i-s", time()); $wopt_tmpdir = "/tmp/WalkWalk_".$stamp."/"; if (!is_dir($wopt_tmpdir)) { mkdir($wopt_tmpdir); } $base = preg_replace("/[^A-Za-z0-9\.]/", "_", basename($zpath)); $dbfile = $dbprefix."/".$stamp."_".$base.".sqlite3"; if (file_exists($dbfile)) { echo "File \"".$dbfile."\" already exists!"; die; } // Banner echo "Yuba ".$version."\n"; echo "-----------------------------------------------\n"; $banner = $zpath." -> ".$dbfile; echo $banner."\n"; echo str_repeat("-", strlen($banner))."\n"; // Disk info $host = gethostname(); $disks = shell_exec("diskutil list 2>&1"); if (substr($zpath, 0, 9) != "/Volumes/") { $zbase = "/"; } else { $zparts = explode("/", $zpath); $zbase = "/Volumes/".$zparts[2]; } $diskutil = shell_exec("diskutil info ".$zbase." 2>&1"); $getstats = array( "Volume Name", "Protocol", "Volume UUID", "Device Location", "Volume Total Space", "Volume Available Space", "Level Type" ); foreach ($getstats as $stat) { preg_match("/(".$stat.":)(.*)(\n)/",$diskutil,$matches); if (isset($matches[2])) { if (substr($stat, -5, 5) == "Space") { $pieces = explode(" ", trim($matches[2])); $summary = $pieces[0]." ".$pieces[1]; $stats[$stat] = $summary; } else { $stats[$stat] = trim($matches[2]); } } } $dstats = serialize($stats); if ($zpath == "/") { $type = "Startup disk"; } elseif (strtolower($zpath) == strtolower("/Volumes/".$stats["Volume Name"])) { if ($stats["Protocol"] == "Disk Image") { $type = "Disk image"; } else { $type = "External disk"; } } else { $type = "Folder"; } $profile = shell_exec("system_profiler SPHardwareDataType SPStorageDataType SPThunderboltDataType SPUSBDataType 2>&1"); // Database $dbo = new PDO("sqlite:".$dbfile); $dbo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); $dbo->exec("CREATE TABLE files ( id INTEGER PRIMARY KEY, parent INTEGER, Pathname TEXT, Path TEXT, Filename TEXT, Extension TEXT, Type TEXT, stat TEXT, LinkTarget TEXT, RealPath TEXT, Inode INTEGER, Size INTEGER, Perms INTEGER, Owner TEXT, ATime INTEGER, MTime INTEGER, CTime INTEGER, gfi_type TEXT, gfi_attr TEXT, gfi_created TEXT, hash TEXT, tinfo TEXT )"); $dbo->exec("CREATE TABLE mdls ( id INTEGER PRIMARY KEY, hasmeta INTEGER, DateAdded TEXT, ContentType TEXT, Creator TEXT, Kind TEXT, UserTags TEXT, FSInvisible INTEGER, PixelWidth INTEGER, PixelHeight INTEGER, spotlight TEXT )"); $dbo->exec("CREATE TABLE metadata ( id INTEGER PRIMARY KEY, duration TEXT, mediainfo TEXT, exiftool TEXT )"); $dbo->exec("CREATE TABLE thumbs ( id INTEGER PRIMARY KEY, thumb BLOB )"); $dbo->exec("CREATE TABLE IF NOT EXISTS _walkwalk ( version TEXT, opts TEXT, host TEXT, zpath TEXT, type TEXT, nodescended INTEGER, ignored INTEGER, stats TEXT, diskutil TEXT, disks TEXT, profile TEXT, status TEXT )"); $stmt = $dbo->prepare("INSERT INTO _walkwalk VALUES (:version, :opts, :host, :zpath, :type, :nodescended, :ignored, :stats, :diskutil, :disks, :profile, :status)"); $stmt->BindValue(":version",$version." (".posix_getuid().")"); $stmt->BindValue(":opts",serialize(getWoptString())); $stmt->BindValue(":host",$host); $stmt->BindValue(":zpath",$zpath); $stmt->BindValue(":type",$type); $stmt->BindValue(":nodescended",null); $stmt->BindValue(":ignored",null); $stmt->BindValue(":stats",$dstats); $stmt->BindValue(":diskutil",$diskutil); $stmt->BindValue(":disks",$disks); $stmt->BindValue(":profile",$profile); $stmt->BindValue(":status","aborted"); $stmt->execute(); // Iterate $nodescended = 0; $ignored = 0; $files = new RecursiveIteratorIterator( new RecursiveCallbackFilterIterator( new RecursiveDirectoryIterator( $zpath, RecursiveDirectoryIterator::SKIP_DOTS ), function ($current, $key, $iterator) use ($wopt_ignore, $wopt_nodescend) { global $nodescended, $ignored; $clean = true; if (is_array($wopt_ignore)) { foreach ($wopt_ignore as $wildcard) { if (fnmatch($wildcard, $current->getFilename())) { $clean = false; $ignored++; echo "\nSkipping: ".$current->getRealpath()."\n\n"; } } } if (is_array($wopt_nodescend)) { foreach ($wopt_nodescend as $wildcard) { if (fnmatch($wildcard, $current->getPath())) { $clean = false; $nodescended++; echo "\nNodescending: ".$current->getRealpath()."\n\n"; } } } return $clean; } ), RecursiveIteratorIterator::SELF_FIRST, RecursiveIteratorIterator::CATCH_GET_CHILD ); // Debug input array /* foreach ($files as $splFileInfo) { echo "====================================================================================\n"; print_r($splFileInfo); echo "====================================================================================\n"; echo "getRealPath = ".$splFileInfo->getRealPath()."\n"; echo "getPathname = ".$splFileInfo->getPathname()."\n"; echo "isLink = ".$splFileInfo->isLink()."\n"; if ($splFileInfo->isLink()) { echo "getLinkTarget = ".$splFileInfo->getLinkTarget()."\n"; } else { echo "getLinkTarget = N/A\n"; } echo "getType = ".$splFileInfo->getType()." (".filetype($splFileInfo->getPathname()).")\n"; echo "isDir = ".$splFileInfo->isDir()."\n"; echo "isFile = ".$splFileInfo->isFile()."\n"; echo "-----------------------------------------------------------------------\n"; echo "getPath = ".$splFileInfo->getPath()."\n"; echo "getFilename = ".$splFileInfo->getFilename()."\n"; echo "getBasename = ".$splFileInfo->getBasename()." (".basename($splFileInfo->getPathname()).")\n"; echo "getExtension = ".$splFileInfo->getExtension()."\n"; echo "-----------------------------------------------------------------------\n"; if (!$splFileInfo->isLink()) { echo "getATime = ".$splFileInfo->getAtime()." (".fileatime($splFileInfo->getPathname()).")\n"; echo "getMTime = ".$splFileInfo->getMtime()." (".filemtime($splFileInfo->getPathname()).")\n"; echo "getCTime = ".$splFileInfo->getCtime()." (".filectime($splFileInfo->getPathname()).")\n"; echo "getInode = ".$splFileInfo->getInode()." (".fileinode($splFileInfo->getPathname()).")\n"; echo "getPerms = ".$splFileInfo->getPerms()." (".fileperms($splFileInfo->getPathname()).")\n"; echo "getGroup = ".$splFileInfo->getGroup()." (".filegroup($splFileInfo->getPathname()).")\n"; echo "getOwner = ".$splFileInfo->getOwner()." (".fileowner($splFileInfo->getPathname()).")\n"; echo "getSize = ".$splFileInfo->getSize()." (".filesize($splFileInfo->getPathname()).")\n"; } } die; */ // Check perms if (posix_getuid()) { echo "You are not root. Checking file readability: "; $oops = 0; foreach ($files as $splFileInfo) { $path = $splFileInfo->getRealPath(); if (!is_readable($path)) { $oops = 1; echo "x"; } else { echo "."; } } echo "\n\n"; if ($oops) { echo "Some files could not be read. Continue? (Y/n)"; $line = trim(fgets(fopen("php://stdin","r"))); $line = $line ?: "y"; if($line != "y"){ echo "Exiting!\n"; die; } } } // Filetypes for special handling $m_files = array( "mkv", "ogg", "avi", "wav", "mpeg", "mpg", "vob", "mp4", "m2v", "mp3", "asf", "wma", "wmv", "qt", "mov", "rm", "ifo", "ac3", "dts", "aac", "ape", "flac", "aiff", "m2ts" ); $e_files = array( "ai", "aiff", "ape", "asf", "avi", "bmp", "divx", "dng", "doc", "docx", "eps", "epub", "exe", "exif", "fla", "flac", "flv", "gif", "icc", "iso", "jpg", "jpeg", "m2ts", "m4a", "m4b", "m4v", "mkv", "mobi", "azw", "azw3", "mov", "qt", "mp3", "mp4", "mpeg", "mpg", "m2v", "nef", "numbers", "ogg", "pages", "pdf", "pict", "png", "ppm", "ppt", "psd", "psb", "qif", "raw", "rtf", "sr2", "srf", "svg", "swf", "tiff", "tif", "torrent", "vcf", "vob", "wav", "webm", "wma", "wmv", "xls", "xlsx", "xmp", "zip" ); // Inserts foreach ($files as $splFileInfo) { $type = $splFileInfo->getType(); if ($type != "link") { $atime = $splFileInfo->getATime(); } $pathname = $splFileInfo->getPathname(); $path = $splFileInfo->getPath(); $filename = $splFileInfo->getFilename(); $extension = $splFileInfo->getExtension(); $shellpath = escapeshellarg($pathname); if (!$type) { echo "\nBREAK: can't determine type of ".$pathname; die; } if ($type != "link") { $stat = chop(@shell_exec("stat -x ".$shellpath." 2>&1")); if ($wopt_paranoid && !$atime) { echo "\nBREAK: Cannot determine atime of ".$pathname; die; } } else { $stat = null; } echo str_replace($zpath."/","",$pathname).": "; $pad = 140; if (strlen($pathname) < $pad) { echo str_repeat(" ",($pad-strlen($pathname))); } // Determine ID of parent dir by querying database $parent = $dbo->query("SELECT id FROM files WHERE (Pathname='".str_replace("'", "''", $path)."')")->fetch()['id']; stringPrint("parent"); // Gather file attributes $stmt = $dbo->prepare("INSERT INTO files VALUES (:id, :parent, :Pathname, :Path, :Filename, :Extension, :Type, :stat, :LinkTarget, :RealPath, :Inode, :Size, :Perms, :Owner, :ATime, :MTime, :CTime, :gfi_type, :gfi_attr, :gfi_created, :hash, :tinfo)"); if ($type == "dir") { $size = shell_exec("du -ks ".$shellpath)*1024; } elseif ($type == "file") { $size = $splFileInfo->getSize(); } else { $size = null; } $stmt->BindValue(":Size",@$size); stringPrint(floor($size/1024)."k"); if ($parent) { $stmt->BindValue(":parent",$parent); } else { $stmt->BindValue(":parent",0); } $stmt->BindValue(":stat",@$stat); $stmt->BindValue(":Pathname",$pathname); $stmt->BindValue(":Path",$path); $stmt->BindValue(":Filename",$filename); $stmt->BindValue(":Extension",$extension); $stmt->BindValue(":Type",$type); if ($type == "link") { $stmt->BindValue(":LinkTarget",$splFileInfo->getLinkTarget()); $stmt->BindValue(":RealPath",$splFileInfo->getRealPath()); } else { $stmt->BindValue(":Inode",$splFileInfo->getInode()); $stmt->BindValue(":Perms",$splFileInfo->getPerms()); $stmt->BindValue(":Owner",$splFileInfo->getOwner().":".$splFileInfo->getGroup()); $stmt->BindValue(":ATime",$atime); $stmt->BindValue(":CTime",$splFileInfo->getCTime()); $stmt->BindValue(":MTime",$splFileInfo->getMTime()); } stringPrint("attr"); $gfiparts = explode("\n", chop(shell_exec($bin_gfi." -P ".$shellpath." 2>&1"))); if (is_array($gfiparts)) { foreach ($gfiparts as $line) { list($label, $value) = explode(": ", $line); $gfi[$label] = isset($value) ? trim($value,"\"") : null; } } $writegfitype = @$gfi['type'].":".@$gfi['creator']; if ($writegfitype == "\\0\\0\\0\\0:\\0\\0\\0\\0" || $writegfitype == ":") { $writegfitype = null; } $stmt->BindValue("gfi_type",$writegfitype); $stmt->BindValue("gfi_attr",@$gfi['attributes']); $stmt->BindValue("gfi_created",strtotime($gfi['created'])); stringPrint("gfi"); if ($wopt_hash && $type != "link") { $stmt->BindValue(":hash",md5_file($pathname)); StringPrint("hash"); } else { $stmt->BindValue(":hash",null); StringPrint("(x)hash"); } $thumb = $wopt_tmpdir.$filename.".png"; @exec("qlmanage -t -f ".$wopt_thumb_factor." -o ".$wopt_tmpdir." ".$shellpath." 2>&1"); stringPrint("thumb"); if ($size && !file_exists($thumb) && (in_array($extension, $m_files) || in_array($extension, $e_files))) { @exec("ffmpegthumbnailer -i ".$shellpath." -o \"".$thumb."\" -s ".$wopt_thumb_size." -c png 2>&1"); stringPrint("fthumb"); } else { stringPrint("(x)fthumb"); } if (file_exists($thumb) && filesize($thumb)) { $stmt->BindValue(":tinfo",serialize(getimagesize($thumb))); } $stmt->execute(); stringPrint("->files"); // Gather spotlight metadata $stmt = $dbo->prepare("INSERT INTO mdls VALUES (:id, :hasmeta, :DateAdded, :ContentType, :Creator, :Kind, :UserTags, :FSInvisible, :PixelWidth, :PixelHeight, :spotlight)"); $mdls = shell_exec("mdls -plist - ".$shellpath." 2>&1"); stringPrint("mdls"); if ($mdls != $pathname.": could not find ".$pathname.".\n") { $stmt->BindValue(":hasmeta",1); $parser = new plistParser(); $spotlight = $parser->parseString($mdls); $stmt->BindValue(":DateAdded",@$spotlight['kMDItemDateAdded']); $stmt->BindValue(":ContentType",@$spotlight['kMDItemContentType']); $stmt->BindValue(":Creator",@$spotlight['kMDItemCreator']); $stmt->BindValue(":Kind",@$spotlight['kMDItemKind']); if (isset($spotlight['kMDItemUserTags'])) { $stmt->BindValue(":UserTags",serialize($spotlight['kMDItemUserTags'])); } $stmt->BindValue(":FSInvisible",@$spotlight['kMDItemFSInvisible']); $stmt->BindValue(":PixelWidth",@$spotlight['kMDItemPixelWidth']); $stmt->BindValue(":PixelHeight",@$spotlight['kMDItemPixelHeight']); $stmt->BindValue(":spotlight",serialize($spotlight)); } else { $stmt->BindValue(":hasmeta",0); } $stmt->execute(); stringPrint("->mdls"); // Gather external metadata $stmt = $dbo->prepare("INSERT INTO metadata VALUES (:id, :duration, :mediainfo, :exiftool)"); if ($type != "dir" && in_array($extension, $m_files)) { $minfo = parseMediaInfo(shell_exec($bin_mediainfo." --Output=XML ".$shellpath." 2>&1")); if ($minfo['file']['general']['duration'][0]) { $stmt->BindValue(":duration",$minfo['file']['general']['duration'][0]); } else { $stmt->BindValue(":duration",null); } $stmt->BindValue(":mediainfo",serialize($minfo)); stringPrint("minfo"); } else { $stmt->BindValue(":duration",null); $stmt->BindValue(":mediainfo",null); stringPrint("(x)info"); } if ($type != "dir" && $type != "link" && in_array($extension, $e_files)) { $stmt->BindValue(":exiftool",serialize(eval("return ".`$bin_exiftool -php $shellpath`))); stringPrint("etool"); } else { $stmt->BindValue(":exiftool",null); stringPrint("(x)etool"); } $stmt->execute(); stringPrint("->meta"); // Gather thumbnail $stmt = $dbo->prepare("INSERT INTO thumbs VALUES (:id, :thumb)"); if (file_exists($thumb) && filesize($thumb)) { $stmt->BindValue(":thumb",file_get_contents($thumb)); } else { $stmt->BindValue(":thumb",null); } $stmt->execute(); stringPrint("->thumb"); // Set fileatime back to original value if ($type != "link" && is_writable($pathname)) { @exec("touch -at `date -r ".$atime." +%Y%m%d%H%M.%S` ".$shellpath." 2>&1"); stringPrint("touch"); } echo "\n"; // Double check stat for file against pre-run value echo bashcolor(str_replace(array("\n"," "),array(" ",""),$stat)."\n","blue"); if ($wopt_paranoid && $type != "link") { $restat = chop(@shell_exec("stat -x ".$shellpath." 2>&1")); echo bashcolor(str_replace(array("\n"," "),array(" ",""),$restat)."\n","green"); if ($stat != $restat) { echo "\nBREAK: stat changed on ".$pathname; die; } } } // Footer $seconds = floor($time = microtime(true)-$_SERVER["REQUEST_TIME_FLOAT"]); $fbanner = "Finished in ".$seconds." seconds"; echo str_repeat("-", strlen($fbanner))."\n".$fbanner."\n"; echo "Files ignored: ".$ignored." / Files nodescended: ".$nodescended."\n"; // Write app summary values $dbo->exec("UPDATE _walkwalk SET nodescended=".$nodescended.", ignored=".$ignored.", status='completed_in_".$seconds."'"); ?>