0.7.12.6pre
Better dupes handling
This commit is contained in:
35
Yuba.php
35
Yuba.php
@@ -247,6 +247,11 @@ $dbo->exec("CREATE TABLE family (
|
||||
children TEXT
|
||||
)");
|
||||
|
||||
$dbo->exec("CREATE TABLE dupes (
|
||||
fid TEXT,
|
||||
dupes TEXT
|
||||
)");
|
||||
|
||||
$dbo->exec("CREATE TABLE files (
|
||||
pid TEXT,
|
||||
fid TEXT,
|
||||
@@ -472,17 +477,6 @@ if (count($noread)) {
|
||||
}
|
||||
}
|
||||
|
||||
// Debug record of duplicate FIDs
|
||||
|
||||
$dupes = array_filter($dx, function($a) { return count($a) > 1; });
|
||||
$dxo = var_export($dupes, true);
|
||||
|
||||
if (strlen($dxo)) {
|
||||
file_put_contents($bpath."/".$stamp."_dupes.txt",$dxo);
|
||||
$dupecount = count($dupes,COUNT_RECURSIVE) - count($dupes);
|
||||
echo floor(($dupecount/$passed_total)*100)." percent of files look like duplicates\n";
|
||||
}
|
||||
|
||||
// Write family to DB
|
||||
|
||||
$message = "Writing family to DB: ";
|
||||
@@ -491,7 +485,7 @@ $message .= $passed_dir." dirs, ";
|
||||
$message .= $nodescended." bundles, ";
|
||||
$message .= $passed_link." links, ";
|
||||
$message .= $ignored." ignored, ";
|
||||
$message .= ($dupecount ? $dupecount : 0)." dupes";
|
||||
//$message .= ($dupecount ? $dupecount : 0)." dupes";
|
||||
|
||||
echo ProgressBar::start(count($family),$message);
|
||||
|
||||
@@ -511,10 +505,27 @@ foreach ($family as $key => $item) {
|
||||
|
||||
}
|
||||
|
||||
// Dupes
|
||||
|
||||
$dupes = array_filter($dx, function($a) { return count($a) > 1; });
|
||||
|
||||
if (count($dupes)) {
|
||||
$dupecount = count($dupes,COUNT_RECURSIVE) - count($dupes);
|
||||
$dupetotal = floor(($dupecount/$passed_total)*100);
|
||||
echo ProgressBar::next("Recording dupes (".$dupetotal."%)");
|
||||
foreach ($dupes as $fid => $array) {
|
||||
$stmt = $dbo->prepare("INSERT INTO dupes VALUES (:fid, :array)");
|
||||
$stmt->BindValue(":fid",$fid);
|
||||
$stmt->BindValue(":array",serialize($array));
|
||||
$stmt->execute();
|
||||
}
|
||||
}
|
||||
|
||||
echo ProgressBar::finish();
|
||||
|
||||
// create an index for family db
|
||||
$dbo->exec("CREATE INDEX family_index ON family (pid)");
|
||||
$dbo->exec("CREATE INDEX dupes_index ON dupes (fid)");
|
||||
|
||||
unset($dx, $dxo, $dupes);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user