' 38835EBD648651E1129A49C2C2B98F38870814378AE7C23BE3D8DF55C3D3072C138F33DBFB10A04735E7E7CB9D0523C75956E57F66895AA3E8BEC766E70042F3 ' sRipeTech ' 38AEFB807D 2024-10-08 17:22:50 ' A script to get information about a folder tree and its contents ' ---------------------------------------------------------------------- ' To get the best performance when looking for duplicate file, ' turn off any real-time vuris scanning (temporarily) ' ---------------------------------------------------------------------- ' code common to most scripts dbclose () ' close any open database newscript ' remove any previous user defined things report () ' empty the report panel report ' show the report panel ' ---------------------------------------------------------------------- ' THIS IS NOT STRICTLY NEEDED ' When working with tables containing path/file information I sometimes want to get a ' File Explorer view of the path, or send the file to its default application. ' So I try to arrange that the first two columns in a file orientated table consist of ' the path name and the file name. ' Then I use the following function so that when I double right click on a displayed ROW ' of the table, the function gets the content of the first two columns of that row, ' forms a fully qualified path/file name and sends it to the operating system to do default handling. ' on@drb is an application reserved name and ONLY works for the lower listview ' on the database panel! function on@drb ( sobid , row, col ) Dim fullName = sob(sobid,"GET","cell", row,1) & sob(sobid,"GET","cell", row,2) if ( isFile ( FullName ) ) then ' is a file or a folder ShellExecute ( fullName ) ELSE ' not a file or folder. Could add an error report here end if inherit on@drb ( sobid , row, col ) ' call the previous on@drb end function ' ---------------------------------------------------------------------- ProgressClear( ) ProgressText("Find Folders and Files", 0 ) dbopen ("memory") ' create an empty, memory only database ' ---------------------------------------------------------------------- ' SET the hash to be calculated dim HashAlgo ="sha1" ' Murmur , MD2 , MD4 . MD5 , SHA1 , SHA256 , SHA512 ' ---------------------------------------------------------------------- ' SET the file system info for some target folder tree ' for now I have hardcoded the target folder to: ' getFolderPath(0) & "\\" which is your desktop! ProgressText("Scan file system", 2 ) HiResTime ' sets an internal value marking the current time dbfilelist ( "main", "ffTbl" , getFolderPath(0) & "\\" , -1 ) dim fsoScanTime = cint( HiResDelta /HiResTick) ' -------------------- ' mark those FILES that have the same size as another file alter table ffTbl add column sSize default 0 update ffTbl set sSize = 1 where size in (select size from ffTbl where _ type like 'file' group by size having count (*) > 1 ) and type like 'file' ' -------------------- ' code used by an internal progress monitor/reporter select count(*) from ffTbl where sSize = 1 dim cnt = qrSingleValue * -1 ProgressTick( cnt ) ' set a value for use by the Progress monitor ProgressText("sha1", 1 ) alter table ffTbl add column hash default '' HiResTime ' sets an internal value marking the current time sql= replace ( "update ffTbl set hash=hashFile (path || name,'ALGO' ) where sSize=1", "ALGO" , HashAlgo) dim hashTime = cint( HiResDelta /HiResTick) ' -------------------- ' this simplifies later code. ' mark each file that has the same hash as another file! ProgressText("Create result table", 2 ) ProgressText(" ", 3 ) alter table ffTbl add column isDup default 0 update ffTbl set isDup = 1 where type like 'file' and hash in _ (select hash from ffTbl where not hash = '' group by hash having count (*) > 1 ) ' ---------------------------------------------------------------------- ' create a list of the folders i.e. type like 'folder' create table emptyFolders as select ( path || name || '\' ) as path , '' as name _ from ffTbl where type like 'folder' ' -------------------- ' create a summary of each unique path in the ffTbl: column=path create table contentFolders as select path, '' as name , 0 as nbrFolders, _ 0 as NbrFiles , sum(size) as totalSize , sum(isDup) as Dups from ffTbl group by path ' -------------------- ' this gives us a list of empty folders delete from emptyFolders where path in ( select path from contentFolders ) ' -------------------- ' the next 2 commands provide an overview of the content for non-empty folders update contentFolders _ SET nbrFolders = frmTbl.nbrFolders _ FROM ( select path, count(*) as nbrFolders from ffTbl where _ type like 'folder' group by path ) as frmTbl _ WHERE contentFolders.path = frmTbl.path update contentFolders _ SET nbrFiles = frmTbl.nbrFiles _ FROM ( select path, count(*) as nbrFiles from ffTbl where _ type like 'file' group by path ) as frmTbl _ WHERE contentFolders.path = frmTbl.path ' ---------------------------------------------------------------------- ' the following are queries to create the RESULTS table ' ---------------------------------------------------------------------- drop table if exists results create table results ( title text, value text ) insert into results values ( 'Time', date('now') || ' ' || time('now') ) insert into results values ( "Root" , _ ( select path from (select path , min(length(path)) from fftbl where type like 'folder' ) ) ) ; sql =replace ( "insert into results values ( 'Folder/file Scan time ' , printf('%,d', $val) || ' secs' )" , "$val" , cstr(fsoScanTime) ) sql( replace ( "insert into results values ( 'Hash algorithm' , '$val' )" , "$val" , HashAlgo ) ) sql( replace ( "insert into results values ( 'Hash time' , printf('%,d', $val ) || ' secs' )" , "$val" , cstr(HashTime) ) ) insert into results values ( 'Number of hidden files' , _ ( select printf("%,d", count(*)) from ffTbl where type like 'file' and (attribute & 0x02 ) > 0 ) ) insert into results values ( 'Number of files created this year' , _ ( select printf("%,d", count(*)) from ffTbl where type like 'file' and date(creation) > date( 'now' , 'start of year' , '-1 day' ) ) ) insert into results values ( 'Number of folders' , _ (select printf("%,d", count(*)) from fftbl where type like 'folder' ) ) insert into results values ( 'Number of empty folders' , _ (select printf("%,d", count(*)) from emptyFolders ) ) insert into results values ( 'Deepest nest relative to Root' , _ (select max(level) from fftbl ) ) insert into results values ( 'Number of files' , _ (select printf("%,d", count(*)) from fftbl where type like 'file' ) ) insert into results values ( 'Total size of all files' , _ (select tgmk(sum(size)) from fftbl where type like 'file' ) ) insert into results values ( 'Number of empty files' , _ (select printf("%,d", count(*)) from fftbl where type like 'file' and size = 0 ) ) insert into results values ( 'Number of files with a shared size' , _ (select printf("%,d", count (*) ) from fftbl where type like 'file' and sSize = 1 ) ) insert into results values ( 'Number of duplicate files (where size>0)' , _ (select printf("%,d", sum(isDup)) from fftbl where type like 'file' ) ) insert into results values ( 'Number of unique duplicate files' , _ ( select printf("%,d", count(*)) from (select distinct hash , size from fftbl where type ='file' and ( isdup=1 or size=0) ) ) ) insert into results values ( 'Total size files with a shared size' , _ (select tgmk(sum(size)) from fftbl where type like 'file' and sSize = 1 ) ) insert into results values ( 'Total size of all duplicate files' , _ (select tgmk(sum(size)) from fftbl where type like 'file' and isDup = 1 ) ) insert into results values ( 'Size of unique duplicate files' , _ ( select tgmk( sum(size)) from (select distinct hash , size from fftbl where type ='file' and ( isdup=1 or size=0) ) ) ) sql( replace ( "insert into results values ( 'Duplicate Hash time' , printf('%,d', $val || ' secs' ) )" , "$val" , cstr(HashTime) ) ) sql (replace ( "insert into results values ( 'Hashed size per second' , tgmk( ( (select sum(size) from fftbl where type like 'file' and isDup = 1 ) / $val ) ))" , "$val", cstr(HashTime) ) ) sql (replace ( "insert into results values ( 'Hashed files per second' , ( ( (select count(isDup) from fftbl where type like 'file' and isDup = 1 ) / $val ) ))" , "$val", cstr(HashTime) ) ) insert into results values ( 'Name of largest file' , _ ( select fName from (select path|| name as fName , max(size) from fftbl where type like 'file' ) ) ) insert into results values ( 'Size of largest file' , _ ( select tgmk(sum(size)) from (select path , max(size) as size from fftbl where type like 'file' ) ) ) insert into results values ( 'Folder with most files' , _ ( select path from (select path , max(nbrFiles) from contentFolders ) ) ) insert into results values ( 'Number files in folder with most files' , _ ( select printf("%,d", cnt) from (select path , max(nbrFiles) as cnt from contentFolders ) ) ) insert into results values ( 'Size of folder with most files' , _ ( select tgmk(totalSize) from (select path , max(nbrFiles) , totalSize from contentFolders ) ) ) insert into results values ( 'First hashed file' , _ ( select path ||name from ffTbl where not hash = '' limit 1 ) ) insert into results values ( 'First hash ' , _ ( select hash from ffTbl where not hash = '' limit 1 ) ) dbInfo select * from results