Reputation: 28602
There is a SQL Server undocumented extended stored procedure called xp_dirtree
, which can return all files and folders name (include subfolders) in a table format. To practice my understanding of recursive CTE, I decide to use it to get the full path of all files in a specified folder(include subfolders). However, after an hour of head scratch I still can't figure out the correct way to do it. The following code is what I have currently. Can this purpose be implemented with recursive CTE?
DECLARE @dir NVARCHAR(260) ;
SELECT @dir = N'c:\temp' ;
IF RIGHT(@dir, 1) <> '\'
SELECT @dir = @dir + '\' ;
IF OBJECT_ID('tempdb..#dirtree', 'U') IS NOT NULL
DROP TABLE #dirtree ;
CREATE TABLE #dirtree
(
id INT PRIMARY KEY
IDENTITY,
subdirectory NVARCHAR(260),
depth INT,
is_file BIT
) ;
INSERT INTO #dirtree
EXEC xp_dirtree
@dir,
0,
1 ;
SELECT *
FROM #dirtree ;
WITH files
AS (
SELECT id,
subdirectory,
depth,
is_file, subdirectory AS path
FROM #dirtree
WHERE is_file = 1
AND depth <> 1
UNION ALL
-- ...
)
SELECT *
FROM files ;
Suppose the xp_dirtree output is:
/*
id subdirectory depth is_file
--- -------------- ------- -------
1 abc.mdf 1 1
2 a 1 0
3 a.txt 2 1
4 b.txt 2 1
5 a.rb 1 1
6 aaa.flv 1 1
*/
What I want is:
/*
path
------------------
c:\temp\abc.mdf
c:\temp\a\a.txt
c:\temp\a\b.txt
c:\temp\a.rb
c:\temp\aaa.flv
*/
Upvotes: 4
Views: 8360
Reputation: 5763
Almost nine years later, and unfortunately there's no out-of-the-box solution that I know of. So I'm still looking into xp_dirtree
and need a solution to this.
I gave Arion's answer a try and found that it was producing results. However, with a large file system of more than 11K objects, it was running very slowly. I saw that it was very slow even from the get go with:
UPDATE #dirtree
SET ParentId = (SELECT MAX(Id) FROM #dirtree
WHERE Depth = T1.Depth - 1 AND Id < T1.Id)
FROM #dirtree T1
Although this isn't an islands-and-gaps problem, it has some similarities and the kind of thinking with those problems has helped me here. The code at the end is my stored procedure. The sections have some comments as to what the code is doing.
You would use it like this:
exec directory
@root = 'c:\somepath',
@depth = 3,
@outputTable = '##results';
select * from ##results;
Which results in output like:
+---------------------------------+------------+------------+-----------+--------+-----------+----------+
| path | name | nameNoExt | extension | isFile | runtimeId | parentId |
+---------------------------------+------------+------------+-----------+--------+-----------+----------+
| c:\somePath\DataMovers | DataMovers | DataMovers | NULL | 0 | 4854 | NULL |
| c:\somePath\DataMovers\main.ps1 | main.ps1 | main | ps1 | 1 | 4859 | 4854 |
+---------------------------------+------------+------------+-----------+--------+-----------+----------+
I had to build it this way because internally it takes xp_dirtree output and loads it into a temp table. This prevents the ability to take the results of the proc and load them into a table outside of the proc because of the ban on nested insert-exec statements. Don't expose @outputTable to untrusted users because it is susceptible to sql-injection. Of course re-work the proc to avoid this however it meets your needs.
/*
Summary: Lists file directory contents.
Remarks: - stackoverflow.com/q/10298910
- This assumes that the tree is put in order where
subfolders are listed right under their parent
folders. If this changes in the future, a
different logic will need to be implemented.
Example: exec directory 'c:\somepath', 3, '##results';
select * from ##results;
*/
create procedure directory
@root nvarchar(255),
@depth int,
@outputTable sysname
as
-- initializations
if @outputTable is null or not (left(@outputTable,2) = '##') or charindex(' ', @outputTable) > 0
throw 50000, '@outputTable must be a global temp table with no spaces in the name.', 1;
if exists (select 0 from tempdb.information_schema.tables where table_name = @outputTable)
begin
declare @msg nvarchar(255) = '''tempdb.dbo.' + @outputTable + ''' already exists.';
throw 50000, @msg, 1;
end
-- fetch the tree (it doesn't have full path names)
drop table if exists #dir;
create table #dir (
id int identity(1,1),
parentId int null,
path nvarchar(4000),
depth int,
isFile bit,
isLeader int default(0),
groupId int
)
insert #dir (path, depth, isFile)
exec xp_dirtree @root, @depth, 1;
-- identify the group leaders (based on a change in depth)
update d
set isLeader = _isLeader
from (
select id,
isLeader,
_isLeader = iif(depth - lag(depth) over(order by id) = 0, 0, 1)
from #dir
) d;
-- find the parents for each leader (subsetting just for leaders improves efficiency)
update #dir
set parentId = (
select max(sub.id)
from #dir sub
where sub.depth = d.depth - 1
and sub.id < d.id
and d.isLeader = 1
)
from #dir d
where d.isLeader = 1;
-- assign an identifier to each group (groups being objects that are 'siblings' of the leader)
update d
set groupId = _groupId
from (
select *, _groupId = sum(isLeader) over(order by id)
from #dir
) d;
-- set the parent id for each item based on the leader's parent id
update d
set d.parentId = leads.parentId
from #dir d
join #dir leads
on d.groupId = leads.groupId
and leads.parentId is not null;
-- convert the path names to full path names and calculate path parts
drop table if exists #pathBuilderResults;
with pathBuilder as (
select id, parentId, origId = id, path, pseudoDepth = depth
from #dir
union all
select par.id,
par.parentId,
pb.origId,
path = par.path + '\' + pb.path,
pseudoDepth = pb.pseudoDepth - 1
from pathBuilder pb
join #dir par on pb.parentId = par.id
where pb.pseudoDepth >= 2
)
select path = @root + '\' + pb.path,
name = d.path,
nameNoExt = iif(ext.value is null, d.path, left(d.path, len(d.path) - len(ext.value) - 1)),
extension = ext.value,
d.isFile,
runtimeId = pb.origId,
parentId = d.parentId
into #pathBuilderResults
from pathBuilder pb
join #dir d on pb.origId = d.id
cross apply (select value = charindex('.', d.path)) dotPos
cross apply (select value = right(d.path, len(d.path) - dotPos.value)) pseudoExt
cross apply (select value = iif(d.isFile = 1 and dotPos.value > 0, pseudoExt.value, null)) ext
where pb.pseudoDepth = 1
order by pb.origId;
-- terminate
declare @sql nvarchar(max) = 'select * into ' + @outputTable + ' from #pathBuilderResults';
exec (@sql);
Upvotes: 1
Reputation: 1
Create and use sp_dirtree @Path = 'c:\', @FileOnly = 1
create or alter proc sp_dirtree
@Path nvarchar(4000)
, @Depth int = 0
, @FileOnly bit = 0
as -- Dir tree with fullpath. sergkog 2018-11-14
set nocount on
declare @Sep nchar(1) = iif(patindex('%/%',@Path) > 0,'/','\') -- windows or posix
set @Path += iif(right(@Path,1) <> @Sep, @Sep,'')
declare @dirtree table(
Id int identity(1,1)
, subdirectory nvarchar(4000) not null
, depth int not null
, is_file bit not null
, parentId int null
)
insert @dirtree(subdirectory, depth, is_file)
exec xp_dirtree @Path, @Depth, 1
update @dirtree
set ParentId = (select max(id) from @dirtree where Depth = t1.Depth - 1 and Id < t1.Id)
from @dirtree t1
;with cte as(
select t.*
from @dirtree t
where is_file=0
union all
select t.id
, convert(nvarchar(4000), cte.subdirectory+ @Sep + t.subdirectory)
, t.depth
, t.is_file
, t.parentId
from
@dirtree t join cte on cte.id = t.parentId
)
select @Path + cte.subdirectory as FullPath
, cte.is_file as IsFile
from cte
where cte.is_file = iif(@FileOnly = 1, 1,cte.is_file)
union all
select @Path + t.subdirectory
, t.is_file
from @dirtree t
where
t.is_file = iif(@FileOnly = 1, 1,t.is_file)
and not exists(select null from cte
where cte.id=t.id
)
order by FullPath, IsFile
go
Upvotes: -2
Reputation: 1
/*
traverse directory tree and get back complete list of filenames w/ their paths
*/
declare
@dirRoot varchar(255)='\\yourdir'
declare
@sqlCmd varchar(255),
@idx int,
@dirSearch varchar(255)
declare @directories table(directoryName varchar(255), depth int, isfile int, rootName varchar(255),rowid int identity(1,1))
insert into @directories(directoryName, depth,isFile)
exec master.sys.xp_dirtree @dirRoot,1,1
if not exists(select * from @directories)
return
update @directories
set rootName = @dirRoot + '\' + directoryName
-- traverse from root directory
select @idx=min(rowId) from @directories
-- forever always ends too soon
while 1=1
begin
select @dirSearch = rootName
from @directories
where rowid=@idx
insert into @directories(directoryName, depth,isfile)
exec master.sys.xp_dirtree @dirSearch,1,1
update @directories
set rootName = @dirSearch + '\' + directoryName
where rootName is null
set @idx = @idx + 1
-- you see what i mean don't you?
if @idx > (select max(rowid) from @directories) or @idx is null
break
end
select
case isFile when 0 then 'Directory' else 'File' end [attribute],
rootName [filePath]
from @directories
order by filePath
Upvotes: 0
Reputation: 31249
If I understand you correct you want something like this:
Test data:
CREATE TABLE #dirtree
(
id INT,
subdirectory NVARCHAR(260),
depth INT ,
is_file BIT,
parentId INT
)
INSERT INTO #dirtree(id,subdirectory,depth,is_file)
VALUES
(1,'abc.mdf',1,1),(2,'a',1,0),(3,'a.txt',2,1),
(4,'b.txt',2,1),(5,'a.rb',1,1),(6,'aaa.flv',1,1)
Updated the parent id
UPDATE #dirtree
SET ParentId = (SELECT MAX(Id) FROM #dirtree
WHERE Depth = T1.Depth - 1 AND Id < T1.Id)
FROM #dirtree T1
Query
;WITH CTE
AS
(
SELECT
t.id,
t.subdirectory,
t.depth,
t.is_file
FROM
#dirtree AS t
WHERE
is_file=0
UNION ALL
SELECT
t.id,
CAST(CTE.subdirectory+'\'+t.subdirectory AS NVARCHAR(260)),
t.depth,
t.is_file
FROM
#dirtree AS t
JOIN CTE
ON CTE.id=t.parentId
)
SELECT
'c:\temp\'+CTE.subdirectory AS [path]
FROM
CTE
WHERE
CTE.is_file=1
UNION ALL
SELECT
'c:\temp\'+t.subdirectory
FROM
#dirtree AS t
WHERE
is_file=1
AND NOT EXISTS
(
SELECT
NULL
FROM
CTE
WHERE
CTE.id=t.id
)
Result
path
---------------
c:\temp\a\a.txt
c:\temp\a\b.txt
c:\temp\abc.mdf
c:\temp\a.rb
c:\temp\aaa.flv
EDIT
Changed the tables used in the example to more look like the ones in your question
Upvotes: 4