0.7.12.6pre

Better dupes handling
This commit is contained in:
2019-06-11 00:07:31 -07:00
parent 052e089f1d
commit 0f49dfc62b
4 changed files with 65 additions and 23 deletions

View File

@@ -247,6 +247,11 @@ $dbo->exec("CREATE TABLE family (
children TEXT children TEXT
)"); )");
$dbo->exec("CREATE TABLE dupes (
fid TEXT,
dupes TEXT
)");
$dbo->exec("CREATE TABLE files ( $dbo->exec("CREATE TABLE files (
pid TEXT, pid TEXT,
fid TEXT, fid TEXT,
@@ -472,17 +477,6 @@ if (count($noread)) {
} }
} }
// Debug record of duplicate FIDs
$dupes = array_filter($dx, function($a) { return count($a) > 1; });
$dxo = var_export($dupes, true);
if (strlen($dxo)) {
file_put_contents($bpath."/".$stamp."_dupes.txt",$dxo);
$dupecount = count($dupes,COUNT_RECURSIVE) - count($dupes);
echo floor(($dupecount/$passed_total)*100)." percent of files look like duplicates\n";
}
// Write family to DB // Write family to DB
$message = "Writing family to DB: "; $message = "Writing family to DB: ";
@@ -491,7 +485,7 @@ $message .= $passed_dir." dirs, ";
$message .= $nodescended." bundles, "; $message .= $nodescended." bundles, ";
$message .= $passed_link." links, "; $message .= $passed_link." links, ";
$message .= $ignored." ignored, "; $message .= $ignored." ignored, ";
$message .= ($dupecount ? $dupecount : 0)." dupes"; //$message .= ($dupecount ? $dupecount : 0)." dupes";
echo ProgressBar::start(count($family),$message); echo ProgressBar::start(count($family),$message);
@@ -511,10 +505,27 @@ foreach ($family as $key => $item) {
} }
// Dupes
$dupes = array_filter($dx, function($a) { return count($a) > 1; });
if (count($dupes)) {
$dupecount = count($dupes,COUNT_RECURSIVE) - count($dupes);
$dupetotal = floor(($dupecount/$passed_total)*100);
echo ProgressBar::next("Recording dupes (".$dupetotal."%)");
foreach ($dupes as $fid => $array) {
$stmt = $dbo->prepare("INSERT INTO dupes VALUES (:fid, :array)");
$stmt->BindValue(":fid",$fid);
$stmt->BindValue(":array",serialize($array));
$stmt->execute();
}
}
echo ProgressBar::finish(); echo ProgressBar::finish();
// create an index for family db // create an index for family db
$dbo->exec("CREATE INDEX family_index ON family (pid)"); $dbo->exec("CREATE INDEX family_index ON family (pid)");
$dbo->exec("CREATE INDEX dupes_index ON dupes (fid)");
unset($dx, $dxo, $dupes); unset($dx, $dxo, $dupes);

View File

@@ -60,6 +60,6 @@ switch ($mode) {
} }
echo "\nHelper: ".basename($pathname)."\n"; echo "\nHelper (".$mode."): ".basename($pathname)."\n";
?> ?>

View File

@@ -1 +1 @@
0.7.12.5 0.7.12.6

View File

@@ -370,7 +370,9 @@ if ($db_file) {
$dbo = new PDO("sqlite:".$db_file); $dbo = new PDO("sqlite:".$db_file);
$dbp = new PDO("sqlite:".dirname($db_file)."/pool.sqlite3"); $dbp = new PDO("sqlite:".dirname($db_file)."/pool.sqlite3");
$dbo->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_ASSOC); $dbo->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_ASSOC);
$dbo->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
$dbp->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_ASSOC); $dbp->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_ASSOC);
$dbp->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
// Get zpath // Get zpath
@@ -599,6 +601,14 @@ if ($db_file) {
echo "</div>"; echo "</div>";
if ($dbo->query("SELECT name FROM sqlite_master WHERE name='dupes'")->fetch()) {
$dupes = $dbo->query("SELECT dupes FROM dupes WHERE (fid='".$row_a['fid']."')")->fetch();
if ($dupes) {
$count = count(unserialize($dupes['dupes']));
echo "<form action='' method='post'><input type='hidden' name='query' size='50' value='".$row_a['fid']."'><input type='submit' value='show ".$count." dupes'></form>";
}
}
echo "</td>"; echo "</td>";
echo "<td valign='top'>"; echo "<td valign='top'>";
@@ -767,16 +777,37 @@ if ($db_file) {
// Search // Search
$result = $dbo->query("SELECT * FROM files WHERE (Filename LIKE '%".$search."%')")->fetchAll(); if (@$dbo->query("SELECT name FROM sqlite_master WHERE name='dupes'")->fetch() && strlen($search) == 32) {
// this is a hash search with dupes table
if (count($result)) { $dupesearch = @$dbo->query("SELECT dupes FROM dupes WHERE (fid = '".$search."')")->fetch()['dupes'];
echo count($result)." results<br>"; if ($dupesearch) {
foreach ($result as $row) { foreach (unserialize($dupesearch) as $pathname) {
$pathbold = str_ireplace($search,"<b>".$search."</b>",$row['Pathname']); echo "\n<a href='?db=".$db_file."&pid=".md5($pathname)."'>".$pathname."</a><br>";
echo "\n<a href='?db=".$db_file."&pid=".$row['pid']."'>".$pathbold."</a><br>"; }
} }
} else { } else {
echo "No results for ".$search; // this is a text search
if (strlen($search) == 32) {
$result = $dbo->query("SELECT * FROM files WHERE (fid = '".$search."')")->fetchAll();
$label = "hash";
} else {
$result = $dbo->query("SELECT * FROM files WHERE (Filename LIKE '%".$search."%')")->fetchAll();
$label = "text";
}
if (count($result)) {
echo count($result)." ".$label." results<br>";
foreach ($result as $row) {
$pathbold = str_ireplace($search,"<b>".$search."</b>",$row['Pathname']);
echo "\n<a href='?db=".$db_file."&pid=".$row['pid']."'>".$pathbold."</a><br>";
}
} else {
echo "No results for ".$label." ".$search;
}
} }
} }