Reputation: 495
We have an svn repository with lots of directories and files and our build system needs to be able to find all of the svn:externals properties, recursively for a branch in the repository, before checking it out. Currently we use:
svn propget svn:externals -R http://url.of.repo/Branch
This has proved extremely time consuming and is a real bandwidth hog. It appears that the client is receiving all of the props for everything in the repo and doing the filtering locally (though I haven't confirmed this with wireshark). Is there a faster way to do this? Preferably some way of getting the server to only return the desired data.
Upvotes: 27
Views: 25165
Reputation: 1619
As you mentioned, it does consume network bandwidth. However, if you have access to the server where those repositories are hosted, you may run it via file://
protocol. It is proven to be faster and not network consuming.
svn propget svn:externals -R file:///path/to/repo/Branch
Also, if you had the entire working copy in place, you may also run it within your WC.
svn propget svn:externals -R /path/to/WC
Hope it helps you to achieve the results faster!
Upvotes: 30
Reputation: 484
not sure where I found this gem, but it is quite useful to see the externals with own externals:
Windows:
svn status . | findstr /R "^X"
Linux/Unix:
svn status . | grep -E "^X"
Upvotes: 4
Reputation: 339
If you don't mind using Python and the pysvn library, here is a complete command line program I'm using for SVN externals:
"""
@file
@brief SVN externals utilities.
@author Lukasz Matecki
"""
import sys
import os
import pysvn
import argparse
class External(object):
def __init__(self, parent, remote_loc, local_loc, revision):
self.parent = parent
self.remote_loc = remote_loc
self.local_loc = local_loc
self.revision = revision
def __str__(self):
if self.revision.kind == pysvn.opt_revision_kind.number:
return """\
Parent: {0}
Source: {1}@{2}
Local name: {3}""".format(self.parent, self.remote_loc, self.revision.number, self.local_loc)
else:
return """\
Parent: {0}
Source: {1}
Local name: {2}""".format(self.parent, self.remote_loc, self.local_loc)
def find_externals(client, repo_path, external_path=None):
"""
@brief Find SVN externals.
@param client (pysvn.Client) The client to use.
@param repo_path (str) The repository path to analyze.
@param external_path (str) The URL of the external to find; if omitted, all externals will be searched.
@returns [External] The list of externals descriptors or empty list if none found.
"""
repo_root = client.root_url_from_path(repo_path)
def parse(ext_prop):
for parent in ext_prop:
external = ext_prop[parent]
for line in external.splitlines():
path, name = line.split()
path = path.replace("^", repo_root)
parts = path.split("@")
if len(parts) > 1:
url = parts[0]
rev = pysvn.Revision(pysvn.opt_revision_kind.number, int(parts[1]))
else:
url = parts[0]
rev = pysvn.Revision(pysvn.opt_revision_kind.head)
retval = External(parent, url, name, rev)
if external_path and not external_path == url:
continue
else:
yield retval
for entry in client.ls(repo_path, recurse=True):
if entry["kind"] == pysvn.node_kind.dir and entry["has_props"] == True:
externals = client.propget("svn:externals", entry["name"])
if externals:
for e in parse(externals):
yield e
def check_externals(client, externals_list):
for i, e in enumerate(externals_list):
url = e.remote_loc
rev = e.revision
try:
info = client.info2(url, revision=rev, recurse=False)
props = info[0][1]
url = props.URL
print("[{0}] Existing:\n{1}".format(i + 1, "\n".join([" {0}".format(line) for line in str(e).splitlines()])))
except:
print("[{0}] Not found:\n{1}".format(i + 1, "\n".join([" {0}".format(line) for line in str(e).splitlines()])))
def main(cmdargs):
parser = argparse.ArgumentParser(description="SVN externals processing.",
formatter_class=argparse.RawDescriptionHelpFormatter,
prefix_chars='-+')
SUPPORTED_COMMANDS = ("check", "references")
parser.add_argument(
"action",
type=str,
default="check",
choices=SUPPORTED_COMMANDS,
help="""\
the operation to execute:
'check' to validate all externals in a given location;
'references' to print all references to a given location""")
parser.add_argument(
"url",
type=str,
help="the URL to operate on")
parser.add_argument(
"--repo", "-r",
dest="repo",
type=str,
default=None,
help="the repository (or path within) to perform the operation on, if omitted is inferred from url parameter")
args = parser.parse_args()
client = pysvn.Client()
if args.action == "check":
externals = find_externals(client, args.url)
check_externals(client, externals)
elif args.action == "references":
if args.repo:
repo_root = args.repo
else:
repo_root = client.root_url_from_path(args.url)
for i, e in enumerate(find_externals(client, repo_root, args.url)):
print("[{0}] Reference:\n{1}".format(i + 1, "\n".join([" {0}".format(line) for line in str(e).splitlines()])))
if __name__ == "__main__":
sys.exit(main(sys.argv))
This should work in both Python 2 and Python 3. You can use it like this (actual addresses removed):
python svn_externals.py references https://~~~~~~~~~~~~~~/cmd_utils.py
[1] Reference:
Parent: https://~~~~~~~~~~~~~~/BEFORE_MK2/scripts/utils
Source: https://~~~~~~~~~~~~~~/tools/python/cmd_utils.py
Local name: cmd_utils.py
[2] Reference:
Parent: https://~~~~~~~~~~~~~~/VTB-1425_PCU/scripts/utils
Source: https://~~~~~~~~~~~~~~/tools/python/cmd_utils.py
Local name: cmd_utils.py
[3] Reference:
Parent: https://~~~~~~~~~~~~~~/scripts/utils
Source: https://~~~~~~~~~~~~~~/tools/python/cmd_utils.py
Local name: cmd_utils.py
As for the performance, this works quite fast (although my repository is quite small). You have to check it for yourself.
Upvotes: 0
Reputation: 97280
Not ideal solution (may have side-effects) and not answer on your problem, but
You can rewrite all externals definitions and add (rewritten) in one common, known place - this way you'll eliminate recursion in pg after change
Upvotes: 0
Reputation: 495
I finally came up with a solution. I decided to break up the request into multiple small svn requests and then make each of those a task to be run by a thread pool. This kind of slams the svn server, but in our case the svn server is on the LAN and this query is only made during full builds so it doesn't seem to be an issue.
import os
import sys
import threading
import ThreadPool
thread_pool = ThreadPool.ThreadPool(8)
externs_dict = {}
externs_lock = threading.Lock()
def getExternRev( path, url ):
cmd = 'svn info "%s"' % url
pipe = os.popen(cmd, 'r')
data = pipe.read().splitlines()
#Harvest last changed rev
for line in data:
if "Last Changed Rev" in line:
revision = line.split(":")[1].strip()
externs_lock.acquire()
externs_dict[path] = (url, revision)
externs_lock.release()
def getExterns(url, base_dir):
cmd = 'svn propget svn:externals "%s"' % url
pipe = os.popen(cmd, 'r')
data = pipe.read().splitlines()
pipe.close()
for line in data:
if line:
line = line.split()
path = base_dir + line[0]
url = line[1]
thread_pool.add_task( getExternRev, path, url )
def processDir(url, base_dir):
thread_pool.add_task( getExterns, url, base_dir )
cmd = 'svn list "%s"' % url
pipe = os.popen(cmd, 'r')
listing = pipe.read().splitlines()
pipe.close()
dir_list = []
for node in listing:
if node.endswith('/'):
dir_list.append(node)
for node in dir_list:
#externs_data.extend( analyzePath( url + node, base_dir + node ) )
thread_pool.add_task( processDir, url+node, base_dir+node )
def analyzePath(url, base_dir = ''):
thread_pool.add_task( processDir, url, base_dir )
thread_pool.wait_completion()
analyzePath( "http://url/to/repository" )
print externs_dict
Upvotes: 3
Reputation: 1353
It is slow because of the -R switch; all directories within your repository path are searched for the property recursively, which is a lot of work.
Upvotes: 0