Reputation: 11
We have a self-hosted Git LFS server that uses an AWS S3 bucket for storage, currently holding around 700 GB of data. I also have 20 client servers (read-only) that use a script to fetch data from this Git server.
π§** Current Architecture:** Dev Team (commit/push) β Self-hosted Git LFS server (AWS) β Data stored on S3 β 20 EC2 client instances running script.sh to fetch data
ποΈ** Previous SVN Architecture:** Dev Team (commit/push) β Self-hosted SVN server (AWS) β 20 EC2 client instances running script.sh using rsync to sync data We moved away from SVN because rsync took too long to synchronize the 700 GB of dataβit checked every file before updating modified ones, leading to significant delays. By switching to Git LFS, the goal was to reduce sync time since Git should only fetch differences between versions.
Issues We're Facing: Git LFS synchronization takes over 4 hours, which is unacceptable, especially since rsync previously handled the task in 5 to 10 minutes. The current script is supposed to download only modified files, but it doesn't seem to be working as expected.
Looking for Suggestions: Why is Git taking so long to sync, even though it should only download differences? How can I ensure that only modified LFS files are being downloaded efficiently with their data not only metadata? Is there a better strategy to handle large datasets (~700 GB) with multiple read-only clients? Any help or insights would be greatly appreciated! π
Scripts Used: rsynchMotionRessources.sh (Takes more than 4 hours to run)
# Main script setup
MOTION_DIR=/Users/ec2-user/Motion_Design
useGit() {
echo "Using Git"
init() {
git config --global pull.rebase true
git config --global credential.helper store
}
init_submodules() {
echo "Started pulling modules at $(date +'%H:%M:%S')"
git submodule update --init --recursive || { echo "ERROR: Git submodule update failed"; exit 1; }
}
clone() {
echo "Started cloning at $(date +'%H:%M:%S')"
rm -rf "$MOTION_DIR"
git clone --depth=1 "https://oauth2:$MOTION_TOKEN@$GITLAB_ENDPOINT/Motion/$REPO.git" "$MOTION_DIR" --progress || { echo "ERROR: Cloning failed"; exit 1; }
cd "$MOTION_DIR" || exit 1
init_submodules
}
pull() {
rm -rf "$MOTION_DIR/.git/modules/Ressources/index.lock"
rm -rf "$MOTION_DIR/.git/index.lock"
cd "$MOTION_DIR" || exit
git reset --hard
git checkout main
git pull -X theirs origin main
}
if [ ! -d "$MOTION_DIR/.git" ]; then
clone
else
cd "$MOTION_DIR" || exit 1
pull
fi
}
useGit
revised_rsynchMotionRessources.sh (Only fetches LFS metadata, not the actual files)
MOTION_DIR=/Users/ec2-user/Motion_Design
useGit() {
echo "Using Git"
init() {
git config --global pull.rebase true
git config --global credential.helper store
}
init_submodules() {
echo "Started pulling modules at $(date +'%H:%M:%S')"
git submodule update --init --recursive || { echo "Error: Git submodule update failed"; exit 1; }
}
clone() {
echo "Started cloning at $(date +'%H:%M:%S')"
rm -rf "$MOTION_DIR"
git clone --depth=1 "https://oauth2:$MOTION_TOKEN@$GITLAB_ENDPOINT/Motion/$REPO.git" "$MOTION_DIR" --progress || { echo "Error: Cloning failed"; exit 1; }
cd "$MOTION_DIR" || exit 1
init_submodules
}
pull() {
echo "Started fetching at $(date +'%H:%M:%S')"
export GIT_LFS_SKIP_SMUDGE=1
git fetch --depth=1 origin main --progress || { echo "Error: Git fetch failed"; exit 1; }
git reset --hard origin/main || { echo "Error: Git reset failed"; exit 1; }
export CHANGED_FILES=$(git diff --name-status --diff-filter=AMRDCT origin/main HEAD)
echo "Changed files: $CHANGED_FILES"
IFS=$'\n' read -rd '' -a FILE_ARRAY <<< "$CHANGED_FILES"
for file in "${FILE_ARRAY[@]}"; do
file_status=$(echo "$file" | awk '{print $1}')
old_file=$(echo "$file" | awk '{print $2}')
new_file=$(echo "$file" | awk '{print $3}')
case $file_status in
R) git checkout origin/main -- "$new_file" || { echo "Error checking out renamed file $new_file"; exit 1; } ;;
D) [ -f "$old_file" ] && git rm "$old_file" ;;
A|M|C|T) git checkout origin/main -- "$old_file" ;;
esac
done
}
if [ ! -d "$MOTION_DIR/.git" ]; then
clone
else
cd "$MOTION_DIR" || exit 1
pull
fi
}
useGit
Upvotes: 1
Views: 28