0.7.12.6pre

Better dupes handling
This commit is contained in:
2019-06-11 00:07:31 -07:00
parent 052e089f1d
commit 0f49dfc62b
4 changed files with 65 additions and 23 deletions

View File

@@ -247,6 +247,11 @@ $dbo->exec("CREATE TABLE family (
children TEXT
)");
$dbo->exec("CREATE TABLE dupes (
fid TEXT,
dupes TEXT
)");
$dbo->exec("CREATE TABLE files (
pid TEXT,
fid TEXT,
@@ -472,17 +477,6 @@ if (count($noread)) {
}
}
// Debug record of duplicate FIDs
$dupes = array_filter($dx, function($a) { return count($a) > 1; });
$dxo = var_export($dupes, true);
if (strlen($dxo)) {
file_put_contents($bpath."/".$stamp."_dupes.txt",$dxo);
$dupecount = count($dupes,COUNT_RECURSIVE) - count($dupes);
echo floor(($dupecount/$passed_total)*100)." percent of files look like duplicates\n";
}
// Write family to DB
$message = "Writing family to DB: ";
@@ -491,7 +485,7 @@ $message .= $passed_dir." dirs, ";
$message .= $nodescended." bundles, ";
$message .= $passed_link." links, ";
$message .= $ignored." ignored, ";
$message .= ($dupecount ? $dupecount : 0)." dupes";
//$message .= ($dupecount ? $dupecount : 0)." dupes";
echo ProgressBar::start(count($family),$message);
@@ -511,10 +505,27 @@ foreach ($family as $key => $item) {
}
// Dupes
$dupes = array_filter($dx, function($a) { return count($a) > 1; });
if (count($dupes)) {
$dupecount = count($dupes,COUNT_RECURSIVE) - count($dupes);
$dupetotal = floor(($dupecount/$passed_total)*100);
echo ProgressBar::next("Recording dupes (".$dupetotal."%)");
foreach ($dupes as $fid => $array) {
$stmt = $dbo->prepare("INSERT INTO dupes VALUES (:fid, :array)");
$stmt->BindValue(":fid",$fid);
$stmt->BindValue(":array",serialize($array));
$stmt->execute();
}
}
echo ProgressBar::finish();
// create an index for family db
$dbo->exec("CREATE INDEX family_index ON family (pid)");
$dbo->exec("CREATE INDEX dupes_index ON dupes (fid)");
unset($dx, $dxo, $dupes);