diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 4017ed82ca4341..c19530b086311a 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -1,8 +1,10 @@ - - [ ] I was not able to find an [open](https://github.com/git-for-windows/git/issues?q=is%3Aopen) or [closed](https://github.com/git-for-windows/git/issues?q=is%3Aclosed) issue matching what I'm seeing + - [ ] I was not able to find an [open](https://github.com/microsoft/git/issues?q=is%3Aopen) + or [closed](https://github.com/microsoft/git/issues?q=is%3Aclosed) issue matching + what I'm seeing, including in [the `git-for-windows/git` tracker](https://github.com/git-for-windows/git/issues). ### Setup - - Which version of Git for Windows are you using? Is it 32-bit or 64-bit? + - Which version of `microsoft/git` are you using? Is it 32-bit or 64-bit? ``` $ git --version --build-options @@ -10,24 +12,22 @@ $ git --version --build-options ** insert your machine's response here ** ``` - - Which version of Windows are you running? Vista, 7, 8, 10? Is it 32-bit or 64-bit? +Are you using Scalar or VFS for Git? + +** insert your answer here ** + +If VFS for Git, then what version? ``` -$ cmd.exe /c ver +$ gvfs version ** insert your machine's response here ** ``` - - What options did you set as part of the installation? Or did you choose the - defaults? + - Which version of Windows are you running? Vista, 7, 8, 10? Is it 32-bit or 64-bit? ``` -# One of the following: -> type "C:\Program Files\Git\etc\install-options.txt" -> type "C:\Program Files (x86)\Git\etc\install-options.txt" -> type "%USERPROFILE%\AppData\Local\Programs\Git\etc\install-options.txt" -> type "$env:USERPROFILE\AppData\Local\Programs\Git\etc\install-options.txt" -$ cat /etc/install-options.txt +$ cmd.exe /c ver ** insert your machine's response here ** ``` @@ -58,7 +58,11 @@ $ cat /etc/install-options.txt ** insert here ** - - If the problem was occurring with a specific repository, can you provide the - URL to that repository to help us with testing? + - If the problem was occurring with a specific repository, can you specify + the repository? -** insert URL here ** + * [ ] Public repo: **insert URL here** + * [ ] Windows monorepo + * [ ] Office monorepo + * [ ] Other Microsoft-internal repo: **insert name here** + * [ ] Other internal repo. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 831ef6f19f1d11..3cb48d8582f31c 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,19 +1,10 @@ Thanks for taking the time to contribute to Git! -Those seeking to contribute to the Git for Windows fork should see -http://gitforwindows.org/#contribute on how to contribute Windows specific -enhancements. - -If your contribution is for the core Git functions and documentation -please be aware that the Git community does not use the github.com issues -or pull request mechanism for their contributions. - -Instead, we use the Git mailing list (git@vger.kernel.org) for code and -documentation submissions, code reviews, and bug reports. The -mailing list is plain text only (anything with HTML is sent directly -to the spam folder). - -Nevertheless, you can use GitGitGadget (https://gitgitgadget.github.io/) -to conveniently send your Pull Requests commits to our mailing list. - -Please read the "guidelines for contributing" linked above! +This fork contains changes specific to monorepo scenarios. If you are an +external contributor, then please detail your reason for submitting to +this fork: + +* [ ] This is an early version of work already under review upstream. +* [ ] This change only applies to interactions with Azure DevOps and the + GVFS Protocol. +* [ ] This change only applies to the virtualization hook and VFS for Git. diff --git a/.github/macos-installer/Makefile b/.github/macos-installer/Makefile new file mode 100644 index 00000000000000..df339bd921df23 --- /dev/null +++ b/.github/macos-installer/Makefile @@ -0,0 +1,116 @@ +SHELL := /bin/bash +SUDO := sudo +C_INCLUDE_PATH := /usr/include +CPLUS_INCLUDE_PATH := /usr/include +LD_LIBRARY_PATH := /usr/lib + +OSX_VERSION := $(shell sw_vers -productVersion) +TARGET_FLAGS := -mmacosx-version-min=$(OSX_VERSION) -DMACOSX_DEPLOYMENT_TARGET=$(OSX_VERSION) + +ARCH := x86_64 +ARCH_CODE := x86_64 +ARCH_FLAGS_x86_64 := -arch x86_64 + +CFLAGS := $(TARGET_FLAGS) $(ARCH_FLAGS_${ARCH_CODE}) +LDFLAGS := $(TARGET_FLAGS) $(ARCH_FLAGS_${ARCH_CODE}) + +PREFIX := /usr/local +GIT_PREFIX := $(PREFIX)/git + +BUILD_CODE := intel-$(ARCH_CODE) +BUILD_DIR := $(GITHUB_WORKSPACE)/payload +DESTDIR := $(PWD)/stage/git-$(BUILD_CODE)-$(VERSION) +ARTIFACTDIR := build_artifacts +SUBMAKE := $(MAKE) C_INCLUDE_PATH="$(C_INCLUDE_PATH)" CPLUS_INCLUDE_PATH="$(CPLUS_INCLUDE_PATH)" LD_LIBRARY_PATH="$(LD_LIBRARY_PATH)" TARGET_FLAGS="$(TARGET_FLAGS)" CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" NO_GETTEXT=1 NO_DARWIN_PORTS=1 prefix=$(GIT_PREFIX) GIT_BUILT_FROM_COMMIT="$(GIT_BUILT_FROM_COMMIT)" DESTDIR=$(DESTDIR) +CORES := $(shell bash -c "sysctl hw.ncpu | awk '{print \$$2}'") + +.PHONY: image pkg payload + +.SECONDARY: + +$(DESTDIR)$(GIT_PREFIX)/VERSION-$(VERSION)-$(BUILD_CODE): + rm -f $(BUILD_DIR)/git-$(VERSION)/osx-installed* + mkdir -p $(DESTDIR)$(GIT_PREFIX) + touch $@ + +$(BUILD_DIR)/git-$(VERSION)/osx-built-keychain: + cd $(BUILD_DIR)/git-$(VERSION)/contrib/credential/osxkeychain; $(SUBMAKE) CFLAGS="$(CFLAGS) -g -O2 -Wall" + touch $@ + +$(BUILD_DIR)/git-$(VERSION)/osx-built: + [ -d $(DESTDIR)$(GIT_PREFIX) ] && $(SUDO) rm -rf $(DESTDIR) || echo ok + cd $(BUILD_DIR)/git-$(VERSION); $(SUBMAKE) -j $(CORES) all strip + touch $@ + +$(BUILD_DIR)/git-$(VERSION)/osx-installed-bin: $(BUILD_DIR)/git-$(VERSION)/osx-built $(BUILD_DIR)/git-$(VERSION)/osx-built-keychain + cd $(BUILD_DIR)/git-$(VERSION); $(SUBMAKE) install + cp $(BUILD_DIR)/git-$(VERSION)/contrib/credential/osxkeychain/git-credential-osxkeychain $(DESTDIR)$(GIT_PREFIX)/bin/git-credential-osxkeychain + mkdir -p $(DESTDIR)$(GIT_PREFIX)/contrib/completion + cp $(BUILD_DIR)/git-$(VERSION)/contrib/completion/git-completion.bash $(DESTDIR)$(GIT_PREFIX)/contrib/completion/ + cp $(BUILD_DIR)/git-$(VERSION)/contrib/completion/git-completion.zsh $(DESTDIR)$(GIT_PREFIX)/contrib/completion/ + cp $(BUILD_DIR)/git-$(VERSION)/contrib/completion/git-prompt.sh $(DESTDIR)$(GIT_PREFIX)/contrib/completion/ + # This is needed for Git-Gui, GitK + mkdir -p $(DESTDIR)$(GIT_PREFIX)/lib/perl5/site_perl + [ ! -f $(DESTDIR)$(GIT_PREFIX)/lib/perl5/site_perl/Error.pm ] && cp $(BUILD_DIR)/git-$(VERSION)/perl/private-Error.pm $(DESTDIR)$(GIT_PREFIX)/lib/perl5/site_perl/Error.pm || echo done + touch $@ + +$(BUILD_DIR)/git-$(VERSION)/osx-installed-man: $(BUILD_DIR)/git-$(VERSION)/osx-installed-bin + mkdir -p $(DESTDIR)$(GIT_PREFIX)/share/man + cp -R $(GITHUB_WORKSPACE)/manpages/ $(DESTDIR)$(GIT_PREFIX)/share/man + touch $@ + +$(BUILD_DIR)/git-$(VERSION)/osx-built-subtree: + cd $(BUILD_DIR)/git-$(VERSION)/contrib/subtree; $(SUBMAKE) XML_CATALOG_FILES="$(XML_CATALOG_FILES)" all git-subtree.1 + touch $@ + +$(BUILD_DIR)/git-$(VERSION)/osx-installed-subtree: $(BUILD_DIR)/git-$(VERSION)/osx-built-subtree + mkdir -p $(DESTDIR) + cd $(BUILD_DIR)/git-$(VERSION)/contrib/subtree; $(SUBMAKE) XML_CATALOG_FILES="$(XML_CATALOG_FILES)" install install-man + touch $@ + +$(BUILD_DIR)/git-$(VERSION)/osx-installed-assets: $(BUILD_DIR)/git-$(VERSION)/osx-installed-bin + mkdir -p $(DESTDIR)$(GIT_PREFIX)/etc + cat assets/etc/gitconfig.osxkeychain >> $(DESTDIR)$(GIT_PREFIX)/etc/gitconfig + cp assets/uninstall.sh $(DESTDIR)$(GIT_PREFIX)/uninstall.sh + sh -c "echo .DS_Store >> $(DESTDIR)$(GIT_PREFIX)/share/git-core/templates/info/exclude" + +symlinks: + mkdir -p $(ARTIFACTDIR)$(PREFIX)/bin + cd $(ARTIFACTDIR)$(PREFIX)/bin; find ../git/bin -type f -exec ln -sf {} \; + for man in man1 man3 man5 man7; do mkdir -p $(ARTIFACTDIR)$(PREFIX)/share/man/$$man; (cd $(ARTIFACTDIR)$(PREFIX)/share/man/$$man; ln -sf ../../../git/share/man/$$man/* ./); done + ruby ../scripts/symlink-git-hardlinks.rb $(ARTIFACTDIR) + touch $@ + +$(BUILD_DIR)/git-$(VERSION)/osx-installed: $(DESTDIR)$(GIT_PREFIX)/VERSION-$(VERSION)-$(BUILD_CODE) $(BUILD_DIR)/git-$(VERSION)/osx-installed-man $(BUILD_DIR)/git-$(VERSION)/osx-installed-assets $(BUILD_DIR)/git-$(VERSION)/osx-installed-subtree + find $(DESTDIR)$(GIT_PREFIX) -type d -exec chmod ugo+rx {} \; + find $(DESTDIR)$(GIT_PREFIX) -type f -exec chmod ugo+r {} \; + touch $@ + +$(BUILD_DIR)/git-$(VERSION)/osx-built-assert-$(ARCH_CODE): $(BUILD_DIR)/git-$(VERSION)/osx-built +ifeq ("$(ARCH_CODE)", "universal") + File $(BUILD_DIR)/git-$(VERSION)/git + File $(BUILD_DIR)/git-$(VERSION)/contrib/credential/osxkeychain/git-credential-osxkeychain +else + [ "$$(File $(BUILD_DIR)/git-$(VERSION)/git | cut -f 5 -d' ')" == "$(ARCH_CODE)" ] + [ "$$(File $(BUILD_DIR)/git-$(VERSION)/contrib/credential/osxkeychain/git-credential-osxkeychain | cut -f 5 -d' ')" == "$(ARCH_CODE)" ] +endif + touch $@ + +disk-image/VERSION-$(VERSION)-$(ARCH_CODE): + rm -f disk-image/*.pkg disk-image/VERSION-* disk-image/.DS_Store + mkdir disk-image + touch "$@" + +disk-image/git-$(VERSION)-$(BUILD_CODE).pkg: disk-image/VERSION-$(VERSION)-$(ARCH_CODE) symlinks + pkgbuild --identifier com.git.pkg --version $(VERSION) --root $(ARTIFACTDIR)$(PREFIX) --scripts assets/scripts --install-location $(PREFIX) --component-plist ./assets/git-components.plist disk-image/git-$(VERSION)-$(BUILD_CODE).pkg + +git-%-$(BUILD_CODE).dmg: + hdiutil create git-$(VERSION)-$(BUILD_CODE).uncompressed.dmg -fs HFS+ -srcfolder disk-image -volname "Git $(VERSION) Intel $(ARCH)" -ov + hdiutil convert -format UDZO -o $@ git-$(VERSION)-$(BUILD_CODE).uncompressed.dmg + rm -f git-$(VERSION)-$(BUILD_CODE).uncompressed.dmg + +payload: $(BUILD_DIR)/git-$(VERSION)/osx-installed $(BUILD_DIR)/git-$(VERSION)/osx-built-assert-$(ARCH_CODE) + +pkg: disk-image/git-$(VERSION)-$(BUILD_CODE).pkg + +image: git-$(VERSION)-$(BUILD_CODE).dmg diff --git a/.github/macos-installer/assets/etc/gitconfig.osxkeychain b/.github/macos-installer/assets/etc/gitconfig.osxkeychain new file mode 100644 index 00000000000000..788266b3a40a9d --- /dev/null +++ b/.github/macos-installer/assets/etc/gitconfig.osxkeychain @@ -0,0 +1,2 @@ +[credential] + helper = osxkeychain diff --git a/.github/macos-installer/assets/git-components.plist b/.github/macos-installer/assets/git-components.plist new file mode 100644 index 00000000000000..78db36777df3ed --- /dev/null +++ b/.github/macos-installer/assets/git-components.plist @@ -0,0 +1,18 @@ + + + + + + BundleHasStrictIdentifier + + BundleIsRelocatable + + BundleIsVersionChecked + + BundleOverwriteAction + upgrade + RootRelativeBundlePath + git/share/git-gui/lib/Git Gui.app + + + diff --git a/.github/macos-installer/assets/scripts/postinstall b/.github/macos-installer/assets/scripts/postinstall new file mode 100755 index 00000000000000..94056db9b7b864 --- /dev/null +++ b/.github/macos-installer/assets/scripts/postinstall @@ -0,0 +1,62 @@ +#!/bin/bash +INSTALL_DST="$2" +SCALAR_C_CMD="$INSTALL_DST/git/bin/scalar" +SCALAR_DOTNET_CMD="/usr/local/scalar/scalar" +SCALAR_UNINSTALL_SCRIPT="/usr/local/scalar/uninstall_scalar.sh" + +function cleanupScalar() +{ + echo "checking whether Scalar was installed" + if [ ! -f "$SCALAR_C_CMD" ]; then + echo "Scalar not installed; exiting..." + return 0 + fi + echo "Scalar is installed!" + + echo "looking for Scalar.NET" + if [ ! -f "$SCALAR_DOTNET_CMD" ]; then + echo "Scalar.NET not found; exiting..." + return 0 + fi + echo "Scalar.NET found!" + + currentUser=$(echo "show State:/Users/ConsoleUser" | scutil | awk '/Name :/ { print $3 }') + + # Re-register Scalar.NET repositories with the newly-installed Scalar + for repo in $($SCALAR_DOTNET_CMD list); do + ( + PATH="$INSTALL_DST/git/bin:$PATH" + sudo -u "$currentUser" scalar register $repo || \ + echo "warning: skipping re-registration of $repo" + ) + done + + # Uninstall Scalar.NET + echo "removing Scalar.NET" + + # Add /usr/local/bin to path - default install location of Homebrew + PATH="/usr/local/bin:$PATH" + if (sudo -u "$currentUser" brew list --cask scalar); then + # Remove from Homebrew + sudo -u "$currentUser" brew remove --cask scalar || echo "warning: Scalar.NET uninstall via Homebrew completed with code $?" + echo "Scalar.NET uninstalled via Homebrew!" + elif (sudo -u "$currentUser" brew list --cask scalar-azrepos); then + sudo -u "$currentUser" brew remove --cask scalar-azrepos || echo "warning: Scalar.NET with GVFS uninstall via Homebrew completed with code $?" + echo "Scalar.NET with GVFS uninstalled via Homebrew!" + elif [ -f $SCALAR_UNINSTALL_SCRIPT ]; then + # If not installed with Homebrew, manually remove package + sudo -S sh $SCALAR_UNINSTALL_SCRIPT || echo "warning: Scalar.NET uninstall completed with code $?" + echo "Scalar.NET uninstalled!" + else + echo "warning: Scalar.NET uninstall script not found" + fi + + # Re-create the Scalar symlink, in case it was removed by the Scalar.NET uninstall operation + mkdir -p $INSTALL_DST/bin + /bin/ln -Fs "$SCALAR_C_CMD" "$INSTALL_DST/bin/scalar" +} + +# Run Scalar cleanup (will exit if not applicable) +cleanupScalar + +exit 0 \ No newline at end of file diff --git a/.github/macos-installer/assets/uninstall.sh b/.github/macos-installer/assets/uninstall.sh new file mode 100755 index 00000000000000..4fc79fbaa2e652 --- /dev/null +++ b/.github/macos-installer/assets/uninstall.sh @@ -0,0 +1,34 @@ +#!/bin/bash -e +if [ ! -r "/usr/local/git" ]; then + echo "Git doesn't appear to be installed via this installer. Aborting" + exit 1 +fi + +if [ "$1" != "--yes" ]; then + echo "This will uninstall git by removing /usr/local/git/, and symlinks" + printf "Type 'yes' if you are sure you wish to continue: " + read response +else + response="yes" +fi + +if [ "$response" == "yes" ]; then + # remove all of the symlinks we've created + pkgutil --files com.git.pkg | grep bin | while read f; do + if [ -L /usr/local/$f ]; then + sudo rm /usr/local/$f + fi + done + + # forget receipts. + pkgutil --packages | grep com.git.pkg | xargs -I {} sudo pkgutil --forget {} + echo "Uninstalled" + + # The guts all go here. + sudo rm -rf /usr/local/git/ +else + echo "Aborted" + exit 1 +fi + +exit 0 diff --git a/.github/scripts/run-esrp-signing.py b/.github/scripts/run-esrp-signing.py new file mode 100644 index 00000000000000..725bf4580f5f1b --- /dev/null +++ b/.github/scripts/run-esrp-signing.py @@ -0,0 +1,135 @@ +import argparse +import json +import os +import glob +import pprint +import subprocess +import sys +import re + +parser = argparse.ArgumentParser(description='Sign binaries for macOS') +parser.add_argument('path', help='Path to file for signing') +parser.add_argument('keycode', help='Platform-specific key code for signing') +parser.add_argument('opcode', help='Platform-specific operation code for signing') +# Setting nargs=argparse.REMAINDER allows us to pass in params that begin with `--` +parser.add_argument('--params', nargs=argparse.REMAINDER, help='Parameters for signing') +args = parser.parse_args() + +esrp_tool = os.path.join("esrp", "tools", "EsrpClient.exe") + +aad_id = os.environ['AZURE_AAD_ID'].strip() +workspace = os.environ['GITHUB_WORKSPACE'].strip() + +source_location = args.path +files = glob.glob(os.path.join(source_location, "*")) + +print("Found files:") +pprint.pp(files) + +auth_json = { + "Version": "1.0.0", + "AuthenticationType": "AAD_CERT", + "TenantId": "72f988bf-86f1-41af-91ab-2d7cd011db47", + "ClientId": f"{aad_id}", + "AuthCert": { + "SubjectName": f"CN={aad_id}.microsoft.com", + "StoreLocation": "LocalMachine", + "StoreName": "My", + "SendX5c" : "true" + }, + "RequestSigningCert": { + "SubjectName": f"CN={aad_id}", + "StoreLocation": "LocalMachine", + "StoreName": "My" + } +} + +input_json = { + "Version": "1.0.0", + "SignBatches": [ + { + "SourceLocationType": "UNC", + "SourceRootDirectory": source_location, + "DestinationLocationType": "UNC", + "DestinationRootDirectory": workspace, + "SignRequestFiles": [], + "SigningInfo": { + "Operations": [ + { + "KeyCode": f"{args.keycode}", + "OperationCode": f"{args.opcode}", + "Parameters": {}, + "ToolName": "sign", + "ToolVersion": "1.0", + } + ] + } + } + ] +} + +# add files to sign +for f in files: + name = os.path.basename(f) + input_json["SignBatches"][0]["SignRequestFiles"].append( + { + "SourceLocation": name, + "DestinationLocation": os.path.join("signed", name), + } + ) + +# add parameters to input.json (e.g. enabling the hardened runtime for macOS) +if args.params is not None: + i = 0 + while i < len(args.params): + input_json["SignBatches"][0]["SigningInfo"]["Operations"][0]["Parameters"][args.params[i]] = args.params[i + 1] + i += 2 + +policy_json = { + "Version": "1.0.0", + "Intent": "production release", + "ContentType": "binary", +} + +configs = [ + ("auth.json", auth_json), + ("input.json", input_json), + ("policy.json", policy_json), +] + +for filename, data in configs: + with open(filename, 'w') as fp: + json.dump(data, fp) + +# Run ESRP Client +esrp_out = "esrp_out.json" +result = subprocess.run( + [esrp_tool, "sign", + "-a", "auth.json", + "-i", "input.json", + "-p", "policy.json", + "-o", esrp_out, + "-l", "Verbose"], + capture_output=True, + text=True, + cwd=workspace) + +# Scrub log before printing +log = re.sub(r'^.+Uploading.*to\s*destinationUrl\s*(.+?),.+$', + '***', + result.stdout, + flags=re.IGNORECASE|re.MULTILINE) +print(log) + +if result.returncode != 0: + print("Failed to run ESRPClient.exe") + sys.exit(1) + +if os.path.isfile(esrp_out): + print("ESRP output json:") + with open(esrp_out, 'r') as fp: + pprint.pp(json.load(fp)) + +for file in files: + if os.path.isfile(os.path.join("signed", file)): + print(f"Success!\nSigned {file}") \ No newline at end of file diff --git a/.github/scripts/set-up-esrp.ps1 b/.github/scripts/set-up-esrp.ps1 new file mode 100644 index 00000000000000..ca56266e33f553 --- /dev/null +++ b/.github/scripts/set-up-esrp.ps1 @@ -0,0 +1,12 @@ +# Install ESRP client +az storage blob download --file esrp.zip --auth-mode login --account-name esrpsigningstorage --container signing-resources --name microsoft.esrpclient.1.2.76.nupkg +Expand-Archive -Path esrp.zip -DestinationPath .\esrp + +# Install certificates +az keyvault secret download --vault-name "$env:AZURE_VAULT" --name "$env:AUTH_CERT" --file out.pfx +certutil -f -importpfx out.pfx +Remove-Item out.pfx + +az keyvault secret download --vault-name "$env:AZURE_VAULT" --name "$env:REQUEST_SIGNING_CERT" --file out.pfx +certutil -f -importpfx out.pfx +Remove-Item out.pfx \ No newline at end of file diff --git a/.github/scripts/symlink-git-hardlinks.rb b/.github/scripts/symlink-git-hardlinks.rb new file mode 100644 index 00000000000000..174802ccc85d93 --- /dev/null +++ b/.github/scripts/symlink-git-hardlinks.rb @@ -0,0 +1,19 @@ +#!/usr/bin/env ruby + +install_prefix = ARGV[0] +puts install_prefix +git_binary = File.join(install_prefix, '/usr/local/git/bin/git') + +[ + ['git' , File.join(install_prefix, '/usr/local/git/bin')], + ['../../bin/git', File.join(install_prefix, '/usr/local/git/libexec/git-core')] +].each do |link, path| + Dir.glob(File.join(path, '*')).each do |file| + next if file == git_binary + puts "#{file} #{File.size(file)} == #{File.size(git_binary)}" + next unless File.size(file) == File.size(git_binary) + puts "Symlinking #{file}" + puts `ln -sf #{link} #{file}` + exit $?.exitstatus if $?.exitstatus != 0 + end +end \ No newline at end of file diff --git a/.github/workflows/build-git-installers.yml b/.github/workflows/build-git-installers.yml new file mode 100644 index 00000000000000..0fd5df2888540c --- /dev/null +++ b/.github/workflows/build-git-installers.yml @@ -0,0 +1,840 @@ +name: build-git-installers + +on: + push: + tags: + - 'v[0-9]*vfs*' # matches "vvfs" + +jobs: + # Check prerequisites for the workflow + prereqs: + runs-on: ubuntu-latest + env: + AZ_SUB: ${{ secrets.AZURE_SUBSCRIPTION }} + AZ_CREDS: ${{ secrets.AZURE_CREDENTIALS }} + outputs: + tag_name: ${{ steps.tag.outputs.name }} # The full name of the tag, e.g. v2.32.0.vfs.0.0 + tag_version: ${{ steps.tag.outputs.version }} # The version number (without preceding "v"), e.g. 2.32.0.vfs.0.0 + deb_signable: ${{ steps.deb.outputs.signable }} # Whether the credentials needed to sign the .deb package are available + steps: + - name: Determine tag to build + run: | + echo "::set-output name=name::${GITHUB_REF#refs/tags/}" + echo "::set-output name=version::${GITHUB_REF#refs/tags/v}" + id: tag + - name: Determine whether signing certificates are present + run: echo "::set-output name=signable::$([[ $AZ_SUB != '' && $AZ_CREDS != '' ]] && echo 'true' || echo 'false')" + id: deb + - name: Clone git + uses: actions/checkout@v2 + - name: Validate the tag identified with trigger + run: | + die () { + echo "::error::$*" >&2 + exit 1 + } + + # `actions/checkout` only downloads the peeled tag (i.e. the commit) + git fetch origin +$GITHUB_REF:$GITHUB_REF + + # Verify that the tag is annotated + test $(git cat-file -t "$GITHUB_REF") == "tag" || die "Tag ${{ steps.tag.outputs.name }} is not annotated" + + # Verify tag follows rules in GIT-VERSION-GEN (i.e., matches the specified "DEF_VER" in + # GIT-VERSION-FILE) and matches tag determined from trigger + make GIT-VERSION-FILE + test "${{ steps.tag.outputs.version }}" == "$(sed -n 's/^GIT_VERSION = //p'< GIT-VERSION-FILE)" || die "GIT-VERSION-FILE tag does not match ${{ steps.tag.outputs.name }}" + # End check prerequisites for the workflow + + # Build Windows installers (x86_64 installer & portable) + windows_pkg: + runs-on: windows-2019 + needs: prereqs + env: + GPG_OPTIONS: "--batch --yes --no-tty --list-options no-show-photos --verify-options no-show-photos --pinentry-mode loopback" + HOME: "${{github.workspace}}\\home" + USERPROFILE: "${{github.workspace}}\\home" + steps: + - name: Configure user + shell: bash + run: + USER_NAME="${{github.actor}}" && + USER_EMAIL="${{github.actor}}@users.noreply.github.com" && + mkdir -p "$HOME" && + git config --global user.name "$USER_NAME" && + git config --global user.email "$USER_EMAIL" && + echo "PACKAGER=$USER_NAME <$USER_EMAIL>" >>$GITHUB_ENV + - uses: git-for-windows/setup-git-for-windows-sdk@v1 + with: + flavor: build-installers + - name: Clone build-extra + shell: bash + run: | + git clone --filter=blob:none --single-branch -b main https://github.com/git-for-windows/build-extra /usr/src/build-extra + - name: Clone git + shell: bash + run: | + # Since we cannot directly clone a specified tag (as we would a branch with `git clone -b `), + # this clone has to be done manually (via init->fetch->reset). + + tag_name="${{ needs.prereqs.outputs.tag_name }}" && + git -c init.defaultBranch=main init && + git remote add -f origin https://github.com/git-for-windows/git && + git fetch "https://github.com/${{github.repository}}" refs/tags/${tag_name}:refs/tags/${tag_name} && + git reset --hard ${tag_name} + - name: Prepare home directory for code-signing + env: + CODESIGN_P12: ${{secrets.CODESIGN_P12}} + CODESIGN_PASS: ${{secrets.CODESIGN_PASS}} + if: env.CODESIGN_P12 != '' && env.CODESIGN_PASS != '' + shell: bash + run: | + cd home && + mkdir -p .sig && + echo -n "$CODESIGN_P12" | tr % '\n' | base64 -d >.sig/codesign.p12 && + echo -n "$CODESIGN_PASS" >.sig/codesign.pass + git config --global alias.signtool '!sh "/usr/src/build-extra/signtool.sh"' + - name: Prepare home directory for GPG signing + if: env.GPGKEY != '' + shell: bash + run: | + # This section ensures that the identity for the GPG key matches the git user identity, otherwise + # signing will fail + + echo '${{secrets.PRIVGPGKEY}}' | tr % '\n' | gpg $GPG_OPTIONS --import && + info="$(gpg --list-keys --with-colons "${GPGKEY%% *}" | cut -d : -f 1,10 | sed -n '/^uid/{s|uid:||p;q}')" && + git config --global user.name "${info% <*}" && + git config --global user.email "<${info#*<}" + env: + GPGKEY: ${{secrets.GPGKEY}} + - name: Build mingw-w64-x86_64-git + env: + GPGKEY: "${{secrets.GPGKEY}}" + shell: bash + run: | + set -x + + # Make sure that there is a `/usr/bin/git` that can be used by `makepkg-mingw` + printf '#!/bin/sh\n\nexec /mingw64/bin/git.exe "$@"\n' >/usr/bin/git && + + # Restrict `PATH` to MSYS2 and to Visual Studio (to let `cv2pdb` find the relevant DLLs) + PATH="/mingw64/bin:/usr/bin:/C/Program Files (x86)/Microsoft Visual Studio 14.0/VC/bin/amd64:/C/Windows/system32" + + type -p mspdb140.dll || exit 1 + + sh -x /usr/src/build-extra/please.sh build-mingw-w64-git --only-64-bit --build-src-pkg -o artifacts HEAD && + if test -n "$GPGKEY" + then + for tar in artifacts/*.tar* + do + /usr/src/build-extra/gnupg-with-gpgkey.sh --detach-sign --no-armor $tar + done + fi && + + b=$PWD/artifacts && + version=${{ needs.prereqs.outputs.tag_name }} && + (cd /usr/src/MINGW-packages/mingw-w64-git && + cp PKGBUILD.$version PKGBUILD && + git commit -s -m "mingw-w64-git: new version ($version)" PKGBUILD && + git bundle create "$b"/MINGW-packages.bundle origin/main..main) + - name: Publish mingw-w64-x86_64-git + uses: actions/upload-artifact@v2 + with: + name: pkg-x86_64 + path: artifacts + windows_artifacts: + runs-on: windows-2019 + needs: [prereqs, windows_pkg] + env: + HOME: "${{github.workspace}}\\home" + strategy: + matrix: + artifact: + - name: installer + fileprefix: Git + - name: portable + fileprefix: PortableGit + fail-fast: false + steps: + - name: Download pkg-x86_64 + uses: actions/download-artifact@v2 + with: + name: pkg-x86_64 + path: pkg-x86_64 + - uses: git-for-windows/setup-git-for-windows-sdk@v1 + with: + flavor: build-installers + - name: Clone build-extra + shell: bash + run: | + git clone --filter=blob:none --single-branch -b main https://github.com/git-for-windows/build-extra /usr/src/build-extra + - name: Prepare home directory for code-signing + env: + CODESIGN_P12: ${{secrets.CODESIGN_P12}} + CODESIGN_PASS: ${{secrets.CODESIGN_PASS}} + if: env.CODESIGN_P12 != '' && env.CODESIGN_PASS != '' + shell: bash + run: | + mkdir -p home/.sig && + echo -n "$CODESIGN_P12" | tr % '\n' | base64 -d >home/.sig/codesign.p12 && + echo -n "$CODESIGN_PASS" >home/.sig/codesign.pass && + git config --global alias.signtool '!sh "/usr/src/build-extra/signtool.sh"' + - name: Retarget auto-update to microsoft/git + shell: bash + run: | + set -x + + b=/usr/src/build-extra && + + filename=$b/git-update-git-for-windows.config + tr % '\t' >$filename <<-\EOF && + [update] + %fromFork = microsoft/git + EOF + + sed -i -e '/^#include "file-list.iss"/a\ + Source: {#SourcePath}\\..\\git-update-git-for-windows.config; DestDir: {app}\\mingw64\\bin; Flags: replacesameversion; AfterInstall: DeleteFromVirtualStore' \ + -e '/^Type: dirifempty; Name: {app}\\{#MINGW_BITNESS}$/i\ + Type: files; Name: {app}\\{#MINGW_BITNESS}\\bin\\git-update-git-for-windows.config\ + Type: dirifempty; Name: {app}\\{#MINGW_BITNESS}\\bin' \ + $b/installer/install.iss + - name: Set alerts to continue until upgrade is taken + shell: bash + run: | + set -x + + b=/mingw64/bin && + + sed -i -e '6 a use_recently_seen=no' \ + $b/git-update-git-for-windows + - name: Set the installer Publisher to the Git Fundamentals team + shell: bash + run: | + b=/usr/src/build-extra && + sed -i -e 's/^\(AppPublisher=\).*/\1The Git Fundamentals Team at GitHub/' $b/installer/install.iss + - name: Let the installer configure Visual Studio to use the installed Git + shell: bash + run: | + set -x + + b=/usr/src/build-extra && + + sed -i -e '/^ *InstallAutoUpdater();$/a\ + CustomPostInstall();' \ + -e '/^ *UninstallAutoUpdater();$/a\ + CustomPostUninstall();' \ + $b/installer/install.iss && + + cat >>$b/installer/helpers.inc.iss <<\EOF + + procedure CustomPostInstall(); + begin + if not RegWriteStringValue(HKEY_CURRENT_USER,'Software\Microsoft\VSCommon\15.0\TeamFoundation\GitSourceControl','GitPath',ExpandConstant('{app}')) or + not RegWriteStringValue(HKEY_CURRENT_USER,'Software\Microsoft\VSCommon\16.0\TeamFoundation\GitSourceControl','GitPath',ExpandConstant('{app}')) or + not RegWriteStringValue(HKEY_CURRENT_USER,'Software\Microsoft\VSCommon\17.0\TeamFoundation\GitSourceControl','GitPath',ExpandConstant('{app}')) or + not RegWriteStringValue(HKEY_CURRENT_USER,'Software\Microsoft\VSCommon\18.0\TeamFoundation\GitSourceControl','GitPath',ExpandConstant('{app}')) or + not RegWriteStringValue(HKEY_CURRENT_USER,'Software\Microsoft\VSCommon\19.0\TeamFoundation\GitSourceControl','GitPath',ExpandConstant('{app}')) or + not RegWriteStringValue(HKEY_CURRENT_USER,'Software\Microsoft\VSCommon\20.0\TeamFoundation\GitSourceControl','GitPath',ExpandConstant('{app}')) then + LogError('Could not register TeamFoundation\GitSourceControl'); + end; + + procedure CustomPostUninstall(); + begin + if not RegDeleteValue(HKEY_CURRENT_USER,'Software\Microsoft\VSCommon\15.0\TeamFoundation\GitSourceControl','GitPath') or + not RegDeleteValue(HKEY_CURRENT_USER,'Software\Microsoft\VSCommon\16.0\TeamFoundation\GitSourceControl','GitPath') or + not RegDeleteValue(HKEY_CURRENT_USER,'Software\Microsoft\VSCommon\17.0\TeamFoundation\GitSourceControl','GitPath') or + not RegDeleteValue(HKEY_CURRENT_USER,'Software\Microsoft\VSCommon\18.0\TeamFoundation\GitSourceControl','GitPath') or + not RegDeleteValue(HKEY_CURRENT_USER,'Software\Microsoft\VSCommon\19.0\TeamFoundation\GitSourceControl','GitPath') or + not RegDeleteValue(HKEY_CURRENT_USER,'Software\Microsoft\VSCommon\20.0\TeamFoundation\GitSourceControl','GitPath') then + LogError('Could not register TeamFoundation\GitSourceControl'); + end; + EOF + - name: Enable Scalar/C and the auto-updater in the installer by default + shell: bash + run: | + set -x + + b=/usr/src/build-extra && + + sed -i -e "/ChosenOptions:=''/a\\ + if (ExpandConstant('{param:components|/}')='/') then begin\n\ + WizardSelectComponents('autoupdate');\n\ + #ifdef WITH_SCALAR\n\ + WizardSelectComponents('scalar');\n\ + #endif\n\ + end;" $b/installer/install.iss + - name: Build 64-bit ${{matrix.artifact.name}} + shell: bash + run: | + set -x + + # Copy the PDB archive to the directory where `--include-pdbs` expects it + b=/usr/src/build-extra && + mkdir -p $b/cached-source-packages && + cp pkg-x86_64/*-pdb* $b/cached-source-packages/ && + + # Build the installer, embedding PDBs + eval $b/please.sh make_installers_from_mingw_w64_git --include-pdbs \ + --version=${{ needs.prereqs.outputs.tag_version }} \ + -o artifacts --${{matrix.artifact.name}} \ + --pkg=pkg-x86_64/mingw-w64-x86_64-git-[0-9]*.tar.xz \ + --pkg=pkg-x86_64/mingw-w64-x86_64-git-doc-html-[0-9]*.tar.xz && + + if test portable = '${{matrix.artifact.name}}' && test -n "$(git config alias.signtool)" + then + git signtool artifacts/PortableGit-*.exe + fi && + openssl dgst -sha256 artifacts/${{matrix.artifact.fileprefix}}-*.exe | sed "s/.* //" >artifacts/sha-256.txt + - name: Verify that .exe files are code-signed + if: env.CODESIGN_P12 != '' && env.CODESIGN_PASS != '' + shell: bash + run: | + PATH=$PATH:"/c/Program Files (x86)/Windows Kits/10/App Certification Kit/" \ + signtool verify //pa artifacts/${{matrix.artifact.fileprefix}}-*.exe + - name: Publish ${{matrix.artifact.name}}-x86_64 + uses: actions/upload-artifact@v2 + with: + name: win-${{matrix.artifact.name}}-x86_64 + path: artifacts + # End build Windows installers + + # Build and sign Mac OSX installers & upload artifacts + osx_build: + runs-on: macos-latest + needs: prereqs + env: + # `gettext` is keg-only + LDFLAGS: -L/usr/local/opt/gettext/lib + CFLAGS: -I/usr/local/opt/gettext/include + # To make use of the catalogs... + XML_CATALOG_FILES: /usr/local/etc/xml/catalog + VERSION: "${{ needs.prereqs.outputs.tag_version }}" + steps: + - name: Check out repository + uses: actions/checkout@v3 + with: + path: 'git' + + - name: Install Git dependencies + run: | + set -x + brew install automake asciidoc xmlto docbook + brew link --force gettext + + - name: Build payload + run: | + # Configure the environment + set -x + PATH=/usr/local/bin:$PATH + export CURL_LDFLAGS=$(curl-config --libs) + + # Write to "version" file to force match with trigger payload version + echo "${{ needs.prereqs.outputs.tag_version }}" >>git/version + make -C git -j$(sysctl -n hw.physicalcpu) GIT-VERSION-FILE dist dist-doc + + export GIT_BUILT_FROM_COMMIT=$(gunzip -c git/git-$VERSION.tar.gz | git get-tar-commit-id) || + die "Could not determine commit for build" + + # Extract tarballs + mkdir payload manpages + tar -xvf git/git-$VERSION.tar.gz -C payload + tar -xvf git/git-manpages-$VERSION.tar.gz -C manpages + + # Lay out payload + make -C git/.github/macos-installer V=1 payload + + # This step is necessary because we cannot use the $VERSION + # environment variable or the tag_version output from the prereqs + # job in the upload-artifact task. + mkdir -p build_artifacts + cp -R stage/git-intel-x86_64-$VERSION/ build_artifacts + + # We keep a list of executable files because their executable bits are + # removed when they are zipped, and we need to re-add. + find build_artifacts -type f -a -perm -u=x >executable-files.txt + + - name: Upload macOS artifacts + uses: actions/upload-artifact@v3 + with: + name: tmp.osx-build + path: | + build_artifacts + + - name: Upload list of executable files + uses: actions/upload-artifact@v3 + with: + name: tmp.executable-files + path: | + executable-files.txt + + osx_sign_payload: + # ESRP service requires signing to run on Windows + runs-on: windows-latest + needs: osx_build + steps: + - name: Check out repository + uses: actions/checkout@v3 + with: + path: 'git' + + - name: Download unsigned build artifiacts + uses: actions/download-artifact@v3 + with: + name: tmp.osx-build + path: build_artifacts + + - name: Zip unsigned build artifacts + shell: pwsh + run: | + Compress-Archive -Path build_artifacts build_artifacts/build_artifacts.zip + cd build_artifacts + Get-ChildItem -Exclude build_artifacts.zip | Remove-Item -Recurse -Force + + - uses: azure/login@v1 + with: + creds: ${{ secrets.AZURE_CREDENTIALS }} + + - name: Set up ESRP client + shell: pwsh + env: + AZURE_VAULT: ${{ secrets.AZURE_VAULT }} + AUTH_CERT: ${{ secrets.AZURE_VAULT_AUTH_CERT_NAME }} + REQUEST_SIGNING_CERT: ${{ secrets.AZURE_VAULT_REQUEST_SIGNING_CERT_NAME }} + run: | + git\.github\scripts\set-up-esrp.ps1 + + - name: Run ESRP client + shell: pwsh + env: + AZURE_AAD_ID: ${{ secrets.AZURE_AAD_ID }} + APPLE_KEY_CODE: ${{ secrets.APPLE_KEY_CODE }} + APPLE_SIGNING_OP_CODE: ${{ secrets.APPLE_SIGNING_OPERATION_CODE }} + run: | + python git\.github\scripts\run-esrp-signing.py build_artifacts ` + $env:APPLE_KEY_CODE $env:APPLE_SIGNING_OP_CODE ` + --params 'Hardening' '--options=runtime' + + - name: Unzip signed build artifacts + shell: pwsh + run: | + Expand-Archive signed/build_artifacts.zip -DestinationPath signed + Remove-Item signed/build_artifacts.zip + + - name: Upload signed payload + uses: actions/upload-artifact@v3 + with: + name: osx-signed-payload + path: | + signed + + osx_pack: + runs-on: macos-latest + needs: [prereqs, osx_sign_payload] + steps: + - name: Check out repository + uses: actions/checkout@v3 + with: + path: 'git' + + - name: Download signed artifacts + uses: actions/download-artifact@v3 + with: + name: osx-signed-payload + + - name: Download list of executable files + uses: actions/download-artifact@v3 + with: + name: tmp.executable-files + + - name: Build macOS pkg + env: + VERSION: "${{ needs.prereqs.outputs.tag_version }}" + run: | + # Install findutils to use gxargs below + brew install findutils + + # Configure the environment + export CURL_LDFLAGS=$(curl-config --libs) + + # Add executable bits and move build_artifacts into + # the same directory as Makefile (so that executable bits + # will be recognized). + gxargs -r -d '\n' chmod a+x &2 + exit 1 + } + + echo "${{ needs.prereqs.outputs.tag_version }}" >>git/version + make -C git GIT-VERSION-FILE + + VERSION="${{ needs.prereqs.outputs.tag_version }}" + + ARCH="$(dpkg-architecture -q DEB_HOST_ARCH)" + if test -z "$ARCH"; then + die "Could not determine host architecture!" + fi + + PKGNAME="microsoft-git_$VERSION" + PKGDIR="$(dirname $(pwd))/$PKGNAME" + + rm -rf "$PKGDIR" + mkdir -p "$PKGDIR" + + DESTDIR="$PKGDIR" make -C git -j5 V=1 DEVELOPER=1 \ + USE_LIBPCRE=1 \ + NO_CROSS_DIRECTORY_HARDLINKS=1 \ + ASCIIDOC8=1 ASCIIDOC_NO_ROFF=1 \ + ASCIIDOC='TZ=UTC asciidoc' \ + prefix=/usr/local \ + gitexecdir=/usr/local/lib/git-core \ + libexecdir=/usr/local/lib/git-core \ + htmldir=/usr/local/share/doc/git/html \ + install install-doc install-html + + cd .. + mkdir "$PKGNAME/DEBIAN" + + # Based on https://packages.ubuntu.com/xenial/vcs/git + cat >"$PKGNAME/DEBIAN/control" < + Description: Git client built from the https://github.com/microsoft/git repository, + specialized in supporting monorepo scenarios. Includes the Scalar CLI. + EOF + + dpkg-deb --build "$PKGNAME" + + mkdir $GITHUB_WORKSPACE/artifacts + mv "$PKGNAME.deb" $GITHUB_WORKSPACE/artifacts/ + - name: Publish unsigned .deb package + uses: actions/upload-artifact@v2 + with: + name: deb-package-unsigned + path: artifacts/ + ubuntu_sign-artifacts: + runs-on: windows-latest # Must be run on Windows due to ESRP executable OS compatibility + needs: [ubuntu_build, prereqs] + if: needs.prereqs.outputs.deb_signable == 'true' + env: + ARTIFACTS_DIR: artifacts + steps: + - name: Clone repository + uses: actions/checkout@v2 + with: + path: 'git' + - name: Download unsigned packages + uses: actions/download-artifact@v2 + with: + name: deb-package-unsigned + path: unsigned + - uses: azure/login@v1 + with: + creds: ${{ secrets.AZURE_CREDENTIALS }} + - name: Set up ESRP client + shell: pwsh + env: + AZURE_VAULT: ${{ secrets.AZURE_VAULT }} + AUTH_CERT: ${{ secrets.AZURE_VAULT_AUTH_CERT_NAME }} + REQUEST_SIGNING_CERT: ${{ secrets.AZURE_VAULT_REQUEST_SIGNING_CERT_NAME }} + run: | + git\.github\scripts\set-up-esrp.ps1 + - name: Sign package + shell: pwsh + env: + AZURE_AAD_ID: ${{ secrets.AZURE_AAD_ID }} + LINUX_KEY_CODE: ${{ secrets.LINUX_KEY_CODE }} + LINUX_OP_CODE: ${{ secrets.LINUX_OPERATION_CODE }} + run: | + python git\.github\scripts\run-esrp-signing.py unsigned $env:LINUX_KEY_CODE $env:LINUX_OP_CODE + - name: Upload signed artifact + uses: actions/upload-artifact@v2 + with: + name: deb-package-signed + path: signed + # End build & sign Ubuntu package + + # Validate installers + validate-installers: + name: Validate installers + strategy: + matrix: + component: + - os: ubuntu-latest + artifact: deb-package-signed + command: git + - os: macos-latest + artifact: osx-signed-pkg + command: git + - os: windows-latest + artifact: win-installer-x86_64 + command: $PROGRAMFILES\Git\cmd\git.exe + runs-on: ${{ matrix.component.os }} + needs: [prereqs, windows_artifacts, osx_publish_dmg, ubuntu_sign-artifacts] + steps: + - name: Download artifacts + uses: actions/download-artifact@v2 + with: + name: ${{ matrix.component.artifact }} + + - name: Install Windows + if: contains(matrix.component.os, 'windows') + shell: pwsh + run: | + $exePath = Get-ChildItem -Path ./*.exe | %{$_.FullName} + Start-Process -Wait -FilePath "$exePath" -ArgumentList "/SILENT /VERYSILENT /NORESTART /SUPPRESSMSGBOXES /ALLOWDOWNGRADE=1" + + - name: Install Linux + if: contains(matrix.component.os, 'ubuntu') + run: | + debpath=$(find ./*.deb) + sudo apt install $debpath + + - name: Install macOS + if: contains(matrix.component.os, 'macos') + run: | + pkgpath=$(find ./*.pkg) + sudo installer -pkg $pkgpath -target / + + - name: Validate + shell: bash + run: | + "${{ matrix.component.command }}" --version | sed 's/git version //' >actual + echo ${{ needs.prereqs.outputs.tag_version }} >expect + cmp expect actual || exit 1 + # End validate installers + + create-github-release: + runs-on: ubuntu-latest + needs: [validate-installers] + if: | + success() || + (needs.ubuntu_sign-artifacts.result == 'skipped' && + needs.osx_publish_dmg.result == 'success' && + needs.windows_artifacts.result == 'success') + steps: + - name: Download Windows portable installer + uses: actions/download-artifact@v2 + with: + name: win-portable-x86_64 + path: win-portable-x86_64 + - name: Download Windows x86_64 installer + uses: actions/download-artifact@v2 + with: + name: win-installer-x86_64 + path: win-installer-x86_64 + - name: Download Mac dmg + uses: actions/download-artifact@v2 + with: + name: osx-dmg + path: osx-dmg + - name: Download Mac pkg + uses: actions/download-artifact@v2 + with: + name: osx-signed-pkg + path: osx-pkg + - name: Download Ubuntu package (signed) + if: needs.prereqs.outputs.deb_signable == 'true' + uses: actions/download-artifact@v2 + with: + name: deb-package-signed + path: deb-package + - name: Download Ubuntu package (unsigned) + if: needs.prereqs.outputs.deb_signable != 'true' + uses: actions/download-artifact@v2 + with: + name: deb-package-unsigned + path: deb-package + - uses: actions/github-script@v4 + with: + script: | + const fs = require('fs'); + const path = require('path'); + + var releaseMetadata = { + owner: context.repo.owner, + repo: context.repo.repo + }; + + // Create the release + var tagName = "${{ needs.prereqs.outputs.tag_name }}"; + var createdRelease = await github.repos.createRelease({ + ...releaseMetadata, + draft: true, + tag_name: tagName, + name: tagName + }); + releaseMetadata.release_id = createdRelease.data.id; + + // Uploads contents of directory to the release created above + async function uploadDirectoryToRelease(directory, includeExtensions=[]) { + return fs.promises.readdir(directory) + .then(async(files) => Promise.all( + files.filter(file => { + return includeExtensions.length==0 || includeExtensions.includes(path.extname(file).toLowerCase()); + }) + .map(async (file) => { + var filePath = path.join(directory, file); + github.repos.uploadReleaseAsset({ + ...releaseMetadata, + name: file, + headers: { + "content-length": (await fs.promises.stat(filePath)).size + }, + data: fs.createReadStream(filePath) + }); + })) + ); + } + + await Promise.all([ + // Upload Windows artifacts + uploadDirectoryToRelease('win-installer-x86_64', ['.exe']), + uploadDirectoryToRelease('win-portable-x86_64', ['.exe']), + + // Upload Mac artifacts + uploadDirectoryToRelease('osx-dmg'), + uploadDirectoryToRelease('osx-pkg'), + + // Upload Ubuntu artifacts + uploadDirectoryToRelease('deb-package') + ]); diff --git a/.github/workflows/monitor-components.yml b/.github/workflows/monitor-components.yml deleted file mode 100644 index 9c78d6d3e92445..00000000000000 --- a/.github/workflows/monitor-components.yml +++ /dev/null @@ -1,86 +0,0 @@ -name: Monitor component updates - -# Git for Windows is a slightly modified subset of MSYS2. Some of its -# components are maintained by Git for Windows, others by MSYS2. To help -# keeping the former up to date, this workflow monitors the Atom/RSS feeds -# and opens new tickets for each new component version. - -on: - schedule: - - cron: "23 8,11,14,17 * * *" - workflow_dispatch: - -env: - CHARACTER_LIMIT: 5000 - MAX_AGE: 48h - -jobs: - job: - runs-on: ubuntu-latest - environment: monitor-components - strategy: - matrix: - component: - - label: git - feed: https://github.com/git/git/tags.atom - - label: git-lfs - feed: https://github.com/git-lfs/git-lfs/tags.atom - - label: gcm-core - feed: https://github.com/microsoft/git-credential-manager-core/tags.atom - - label: tig - feed: https://github.com/jonas/tig/tags.atom - - label: cygwin - feed: https://github.com/cygwin/cygwin/releases.atom - title-pattern: ^(?!.*newlib) - - label: msys2-runtime-package - feed: https://github.com/msys2/MSYS2-packages/commits/master/msys2-runtime.atom - - label: msys2-runtime - feed: https://github.com/msys2/msys2-runtime/commits/HEAD.atom - aggregate: true - - label: openssh - feed: https://github.com/openssh/openssh-portable/tags.atom - - label: openssl - feed: https://github.com/openssl/openssl/tags.atom - title-pattern: ^(?!.*alpha) - - label: gnutls - feed: https://gnutls.org/news.atom - - label: heimdal - feed: https://github.com/heimdal/heimdal/tags.atom - - label: git-sizer - feed: https://github.com/github/git-sizer/tags.atom - - label: gitflow - feed: https://github.com/petervanderdoes/gitflow-avh/tags.atom - - label: curl - feed: https://github.com/curl/curl/tags.atom - - label: libgpg-error - feed: https://github.com/gpg/libgpg-error/releases.atom - title-pattern: ^libgpg-error-[0-9\.]*$ - - label: libgcrypt - feed: https://github.com/gpg/libgcrypt/releases.atom - title-pattern: ^libgcrypt-[0-9\.]*$ - - label: gpg - feed: https://github.com/gpg/gnupg/releases.atom - - label: mintty - feed: https://github.com/mintty/mintty/releases.atom - - label: p7zip - feed: https://sourceforge.net/projects/p7zip/rss?path=/p7zip - - label: bash - feed: https://git.savannah.gnu.org/cgit/bash.git/atom/?h=master - aggregate: true - - label: perl - feed: https://github.com/Perl/perl5/tags.atom - title-pattern: ^(?!.*(5\.[0-9]+[13579]|RC)) - - label: pcre2 - feed: https://github.com/PCRE2Project/pcre2/tags.atom - fail-fast: false - steps: - - uses: git-for-windows/rss-to-issues@v0 - with: - feed: ${{matrix.component.feed}} - prefix: "[New ${{matrix.component.label}} version]" - labels: component-update - github-token: ${{ secrets.MONITOR_COMPONENTS_PAT }} - character-limit: ${{ env.CHARACTER_LIMIT }} - max-age: ${{ env.MAX_AGE }} - aggregate: ${{matrix.component.aggregate}} - title-pattern: ${{matrix.component.title-pattern}} diff --git a/.github/workflows/release-apt-get.yml b/.github/workflows/release-apt-get.yml new file mode 100644 index 00000000000000..454cdde2a6e50d --- /dev/null +++ b/.github/workflows/release-apt-get.yml @@ -0,0 +1,92 @@ +name: "release-apt-get" +on: + release: + types: [released] + + workflow_dispatch: + inputs: + release: + description: 'Release Id' + required: true + default: 'latest' + +jobs: + release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - uses: azure/login@v1 + with: + creds: ${{ secrets.AZURE_CREDENTIALS }} + + - name: "Download Repo Client" + env: + AZ_SUB: ${{ secrets.AZURE_SUBSCRIPTION }} + run: | + az storage blob download --subscription "$AZ_SUB" --account-name esrpsigningstorage -c signing-resources -n azure-repoapi-client_2.0.1_amd64.deb -f repoclient.deb --auth-mode login + + - name: "Install Repo Client" + run: | + sudo apt-get install python3-adal --yes + sudo dpkg -i repoclient.deb + rm repoclient.deb + + - name: "Configure Repo Client" + uses: actions/github-script@v3 + env: + AZURE_AAD_ID: ${{ secrets.AZURE_AAD_ID }} + AAD_CLIENT_SECRET: ${{ secrets.AAD_CLIENT_SECRET }} + with: + script: | + for (const key of ['AZURE_AAD_ID', 'AAD_CLIENT_SECRET']) { + if (!process.env[key]) throw new Error(`Required env var ${key} is missing!`) + } + const config = { + AADResource: 'https://microsoft.onmicrosoft.com/945999e9-da09-4b5b-878f-b66c414602c0', + AADTenant: '72f988bf-86f1-41af-91ab-2d7cd011db47', + AADAuthorityUrl: 'https://login.microsoftonline.com', + server: 'azure-apt-cat.cloudapp.net', + port: '443', + AADClientId: process.env.AZURE_AAD_ID, + AADClientSecret: process.env.AAD_CLIENT_SECRET, + repositoryId: '' + } + const fs = require('fs') + fs.writeFileSync('config.json', JSON.stringify(config, null, 2)) + + - name: "Get Release Asset" + id: get-asset + env: + RELEASE: ${{ github.event.inputs.release }} + uses: actions/github-script@v3 + with: + github-token: ${{secrets.GITHUB_TOKEN}} + script: | + const { data } = await github.repos.getRelease({ + owner: context.repo.owner, + repo: context.repo.repo, + release_id: process.env.RELEASE || 'latest' + }) + const assets = data.assets.filter(asset => asset.name.endsWith('.deb')) + if (assets.length !== 1) { + throw new Error(`Unexpected number of .deb assets: ${assets.length}`) + } + const fs = require('fs') + const buffer = await github.repos.getReleaseAsset({ + headers: { + accept: 'application/octet-stream' + }, + owner: context.repo.owner, + repo: context.repo.repo, + asset_id: assets[0].id + }) + console.log(buffer) + fs.writeFileSync(assets[0].name, Buffer.from(buffer.data)) + core.setOutput('name', assets[0].name) + + - name: "Publish to apt feed" + env: + RELEASE: ${{ github.event.inputs.release }} + run: | + repoclient -v v3 -c config.json package add --check --wait 300 ${{steps.get-asset.outputs.name}} -r ${{ secrets.HIRSUTE_REPO_ID }} diff --git a/.github/workflows/release-homebrew.yml b/.github/workflows/release-homebrew.yml new file mode 100644 index 00000000000000..a41ff6127f592d --- /dev/null +++ b/.github/workflows/release-homebrew.yml @@ -0,0 +1,30 @@ +name: Update Homebrew Tap +on: + release: + types: [released] + +jobs: + release: + runs-on: ubuntu-latest + steps: + - id: version + name: Compute version number + run: | + echo "::set-output name=result::$(echo $GITHUB_REF | sed -e "s/^refs\/tags\/v//")" + - id: hash + name: Compute release asset hash + uses: mjcheetham/asset-hash@v1 + with: + asset: /git-(.*)\.pkg/ + hash: sha256 + token: ${{ secrets.GITHUB_TOKEN }} + - name: Update scalar Cask + uses: mjcheetham/update-homebrew@v1.1 + with: + token: ${{ secrets.HOMEBREW_TOKEN }} + tap: microsoft/git + name: microsoft-git + type: cask + version: ${{ steps.version.outputs.result }} + sha256: ${{ steps.hash.outputs.result }} + alwaysUsePullRequest: false diff --git a/.github/workflows/release-winget.yml b/.github/workflows/release-winget.yml new file mode 100644 index 00000000000000..c1f0c5ab282618 --- /dev/null +++ b/.github/workflows/release-winget.yml @@ -0,0 +1,23 @@ +name: "release-winget" +on: + release: + types: [released] + +jobs: + release: + runs-on: windows-latest + steps: + - name: Publish manifest with winget-create + run: | + # Get correct release asset + $github = Get-Content '${{ github.event_path }}' | ConvertFrom-Json + $asset = $github.release.assets | Where-Object -Property name -match '64-bit.exe$' + + # Remove 'v' and 'vfs' from the version + $github.release.tag_name -match '\d.*' + $version = $Matches[0] -replace ".vfs","" + + # Download and run wingetcreate + Invoke-WebRequest https://aka.ms/wingetcreate/latest -OutFile wingetcreate.exe + .\wingetcreate.exe update Microsoft.Git -u $asset.browser_download_url -v $version -o manifests -t "${{ secrets.WINGET_TOKEN }}" -s + shell: powershell diff --git a/.github/workflows/scalar-functional-tests.yml b/.github/workflows/scalar-functional-tests.yml new file mode 100644 index 00000000000000..1defa314429186 --- /dev/null +++ b/.github/workflows/scalar-functional-tests.yml @@ -0,0 +1,220 @@ +name: Scalar Functional Tests + +env: + SCALAR_REPOSITORY: microsoft/scalar + SCALAR_REF: main + DEBUG_WITH_TMATE: false + SCALAR_TEST_SKIP_VSTS_INFO: true + +on: + push: + branches: [ vfs-*, tentative/vfs-* ] + pull_request: + branches: [ vfs-*, features/* ] + +jobs: + scalar: + name: "Scalar Functional Tests" + + strategy: + fail-fast: false + matrix: + # Order by runtime (in descending order) + os: [windows-2019, macos-10.15, ubuntu-18.04, ubuntu-20.04] + # Scalar.NET used to be tested using `features: [false, experimental]` + # But currently, Scalar/C ignores `feature.scalar` altogether, so let's + # save some electrons and run only one of them... + features: [ignored] + exclude: + # The built-in FSMonitor is not (yet) supported on Linux + - os: ubuntu-18.04 + features: experimental + - os: ubuntu-20.04 + features: experimental + runs-on: ${{ matrix.os }} + + env: + BUILD_FRAGMENT: bin/Release/netcoreapp3.1 + GIT_FORCE_UNTRACKED_CACHE: 1 + + steps: + - name: Check out Git's source code + uses: actions/checkout@v2 + + - name: Setup build tools on Windows + if: runner.os == 'Windows' + uses: git-for-windows/setup-git-for-windows-sdk@v1 + + - name: Provide a minimal `install` on Windows + if: runner.os == 'Windows' + shell: bash + run: | + test -x /usr/bin/install || + tr % '\t' >/usr/bin/install <<-\EOF + #!/bin/sh + + cmd=cp + while test $# != 0 + do + %case "$1" in + %-d) cmd="mkdir -p";; + %-m) shift;; # ignore mode + %*) break;; + %esac + %shift + done + + exec $cmd "$@" + EOF + + - name: Install build dependencies for Git (Linux) + if: runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get -q -y install libssl-dev libcurl4-openssl-dev gettext + + - name: Build and install Git + shell: bash + env: + NO_TCLTK: Yup + run: | + # We do require a VFS version + def_ver="$(sed -n 's/DEF_VER=\(.*vfs.*\)/\1/p' GIT-VERSION-GEN)" + test -n "$def_ver" + + # Ensure that `git version` reflects DEF_VER + case "$(git describe --match "v[0-9]*vfs*" HEAD)" in + ${def_ver%%.vfs.*}.vfs.*) ;; # okay, we can use this + *) git -c user.name=ci -c user.email=ci@github tag -m for-testing ${def_ver}.NNN.g$(git rev-parse --short HEAD);; + esac + + SUDO= + extra= + case "${{ runner.os }}" in + Windows) + extra=DESTDIR=/c/Progra~1/Git + cygpath -aw "/c/Program Files/Git/cmd" >>$GITHUB_PATH + ;; + Linux) + SUDO=sudo + extra=prefix=/usr + ;; + macOS) + SUDO=sudo + extra=prefix=/usr/local + ;; + esac + + $SUDO make -j5 $extra install + + - name: Ensure that we use the built Git and Scalar + shell: bash + run: | + type -p git + git version + case "$(git version)" in *.vfs.*) echo Good;; *) exit 1;; esac + type -p scalar + scalar version + case "$(scalar version 2>&1)" in *.vfs.*) echo Good;; *) exit 1;; esac + + - name: Check out Scalar's source code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # Indicate full history so Nerdbank.GitVersioning works. + path: scalar + repository: ${{ env.SCALAR_REPOSITORY }} + ref: ${{ env.SCALAR_REF }} + + - name: Setup .NET Core + uses: actions/setup-dotnet@v1 + with: + dotnet-version: 3.1.302 + + - name: Install dependencies + run: dotnet restore + working-directory: scalar + env: + DOTNET_NOLOGO: 1 + + - name: Build + working-directory: scalar + run: dotnet build --configuration Release --no-restore -p:UseAppHost=true # Force generation of executable on macOS. + + - name: Setup platform (Linux) + if: runner.os == 'Linux' + run: | + echo "BUILD_PLATFORM=${{ runner.os }}" >>$GITHUB_ENV + echo "TRACE2_BASENAME=Trace2.${{ github.run_id }}__${{ github.run_number }}__${{ matrix.os }}__${{ matrix.features }}" >>$GITHUB_ENV + + - name: Setup platform (Mac) + if: runner.os == 'macOS' + run: | + echo 'BUILD_PLATFORM=Mac' >>$GITHUB_ENV + echo "TRACE2_BASENAME=Trace2.${{ github.run_id }}__${{ github.run_number }}__${{ matrix.os }}__${{ matrix.features }}" >>$GITHUB_ENV + + - name: Setup platform (Windows) + if: runner.os == 'Windows' + run: | + echo "BUILD_PLATFORM=${{ runner.os }}" >>$env:GITHUB_ENV + echo 'BUILD_FILE_EXT=.exe' >>$env:GITHUB_ENV + echo "TRACE2_BASENAME=Trace2.${{ github.run_id }}__${{ github.run_number }}__${{ matrix.os }}__${{ matrix.features }}" >>$env:GITHUB_ENV + + - name: Configure feature.scalar + run: git config --global feature.scalar ${{ matrix.features }} + + - id: functional_test + name: Functional test + timeout-minutes: 60 + working-directory: scalar + shell: bash + run: | + export GIT_TRACE2_EVENT="$PWD/$TRACE2_BASENAME/Event" + export GIT_TRACE2_PERF="$PWD/$TRACE2_BASENAME/Perf" + export GIT_TRACE2_EVENT_BRIEF=true + export GIT_TRACE2_PERF_BRIEF=true + mkdir -p "$TRACE2_BASENAME" + mkdir -p "$TRACE2_BASENAME/Event" + mkdir -p "$TRACE2_BASENAME/Perf" + git version --build-options + cd ../out + Scalar.FunctionalTests/$BUILD_FRAGMENT/Scalar.FunctionalTests$BUILD_FILE_EXT --test-scalar-on-path --test-git-on-path --timeout=300000 --full-suite + + - name: Force-stop FSMonitor daemons and Git processes (Windows) + if: runner.os == 'Windows' && (success() || failure()) + shell: bash + run: | + set -x + wmic process get CommandLine,ExecutablePath,HandleCount,Name,ParentProcessID,ProcessID + wmic process where "CommandLine Like '%fsmonitor--daemon %run'" delete + wmic process where "ExecutablePath Like '%git.exe'" delete + + - id: trace2_zip_unix + if: runner.os != 'Windows' && ( success() || failure() ) && ( steps.functional_test.conclusion == 'success' || steps.functional_test.conclusion == 'failure' ) + name: Zip Trace2 Logs (Unix) + shell: bash + working-directory: scalar + run: zip -q -r $TRACE2_BASENAME.zip $TRACE2_BASENAME/ + + - id: trace2_zip_windows + if: runner.os == 'Windows' && ( success() || failure() ) && ( steps.functional_test.conclusion == 'success' || steps.functional_test.conclusion == 'failure' ) + name: Zip Trace2 Logs (Windows) + working-directory: scalar + run: Compress-Archive -DestinationPath ${{ env.TRACE2_BASENAME }}.zip -Path ${{ env.TRACE2_BASENAME }} + + - name: Archive Trace2 Logs + if: ( success() || failure() ) && ( steps.trace2_zip_unix.conclusion == 'success' || steps.trace2_zip_windows.conclusion == 'success' ) + uses: actions/upload-artifact@v2 + with: + name: ${{ env.TRACE2_BASENAME }}.zip + path: scalar/${{ env.TRACE2_BASENAME }}.zip + retention-days: 3 + + # The GitHub Action `action-tmate` allows developers to connect to the running agent + # using SSH (it will be a `tmux` session; on Windows agents it will be inside the MSYS2 + # environment in `C:\msys64`, therefore it can be slightly tricky to interact with + # Git for Windows, which runs a slightly incompatible MSYS2 runtime). + - name: action-tmate + if: env.DEBUG_WITH_TMATE == 'true' && failure() + uses: mxschmitt/action-tmate@v3 + with: + limit-access-to-actor: true diff --git a/.gitignore b/.gitignore index 6ded10067a9e97..3fa9955cd31e0f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ /fuzz-pack-headers /fuzz-pack-idx /GIT-BUILD-OPTIONS +/GIT-BUILT-FROM-COMMIT /GIT-CFLAGS /GIT-LDFLAGS /GIT-PREFIX @@ -77,6 +78,7 @@ /git-gc /git-get-tar-commit-id /git-grep +/git-gvfs-helper /git-hash-object /git-help /git-hook @@ -173,6 +175,7 @@ /git-unpack-file /git-unpack-objects /git-update-index +/git-update-microsoft-git /git-update-ref /git-update-server-info /git-upload-archive diff --git a/BRANCHES.md b/BRANCHES.md new file mode 100644 index 00000000000000..364158375e7d55 --- /dev/null +++ b/BRANCHES.md @@ -0,0 +1,59 @@ +Branches used in this repo +========================== + +The document explains the branching structure that we are using in the VFSForGit repository as well as the forking strategy that we have adopted for contributing. + +Repo Branches +------------- + +1. `vfs-#` + + These branches are used to track the specific version that match Git for Windows with the VFSForGit specific patches on top. When a new version of Git for Windows is released, the VFSForGit patches will be rebased on that windows version and a new gvfs-# branch created to create pull requests against. + + #### Examples + + ``` + vfs-2.27.0 + vfs-2.30.0 + ``` + + The versions of git for VFSForGit are based on the Git for Windows versions. v2.20.0.vfs.1 will correspond with the v2.20.0.windows.1 with the VFSForGit specific patches applied to the windows version. + +2. `vfs-#-exp` + + These branches are for releasing experimental features to early adopters. They + should contain everything within the corresponding `vfs-#` branch; if the base + branch updates, then merge into the `vfs-#-exp` branch as well. + +Tags +---- + +We are using annotated tags to build the version number for git. The build will look back through the commit history to find the first tag matching `v[0-9]*vfs*` and build the git version number using that tag. + +Full releases are of the form `v2.XX.Y.vfs.Z.W` where `v2.XX.Y` comes from the +upstream version and `Z.W` are custom updates within our fork. Specifically, +the `.Z` value represents the "compatibility level" with VFS for Git. Only +increase this version when making a breaking change with a released version +of VFS for Git. The `.W` version is used for minor updates between major +versions. + +Experimental releases are of the form `v2.XX.Y.vfs.Z.W.exp`. The `.exp` +suffix indicates that experimental features are available. The rest of the +version string comes from the full release tag. These versions will only +be made available as pre-releases on the releases page, never a full release. + +Forking +------- + +A personal fork of this repository and a branch in that repository should be used for development. + +These branches should be based on the latest vfs-# branch. If there are work in progress pull requests that you have based on a previous version branch when a new version branch is created, you will need to move your patches to the new branch to get them in that latest version. + +#### Example + +``` +git clone +git remote add ms https://github.com/Microsoft/git.git +git checkout -b my-changes ms/vfs-2.20.0 --no-track +git push -fu origin HEAD +``` diff --git a/Documentation/config.txt b/Documentation/config.txt index 98f697b52d47ca..6e38ece0f82ebd 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -437,6 +437,8 @@ include::config/gui.txt[] include::config/guitool.txt[] +include::config/gvfs.txt[] + include::config/help.txt[] include::config/http.txt[] diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index d52fc67af13efc..63418376f82031 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -111,6 +111,14 @@ Version 2 uses an opaque string so that the monitor can return something that can be used to determine what files have changed without race conditions. +core.virtualFilesystem:: + If set, the value of this variable is used as a command which + will identify all files and directories that are present in + the working directory. Git will only track and update files + listed in the virtual file system. Using the virtual file system + will supersede the sparse-checkout settings which will be ignored. + See the "virtual file system" section of linkgit:githooks[5]. + core.trustctime:: If false, the ctime differences between the index and the working tree are ignored; useful when the inode change time @@ -728,6 +736,55 @@ core.multiPackIndex:: single index. See linkgit:git-multi-pack-index[1] for more information. Defaults to true. +core.gvfs:: + Enable the features needed for GVFS. This value can be set to true + to indicate all features should be turned on or the bit values listed + below can be used to turn on specific features. ++ +-- + GVFS_SKIP_SHA_ON_INDEX:: + Bit value 1 + Disables the calculation of the sha when writing the index + GVFS_MISSING_OK:: + Bit value 4 + Normally git write-tree ensures that the objects referenced by the + directory exist in the object database. This option disables this check. + GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT:: + Bit value 8 + When marking entries to remove from the index and the working + directory this option will take into account what the + skip-worktree bit was set to so that if the entry has the + skip-worktree bit set it will not be removed from the working + directory. This will allow virtualized working directories to + detect the change to HEAD and use the new commit tree to show + the files that are in the working directory. + GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK:: + Bit value 16 + While performing a fetch with a virtual file system we know + that there will be missing objects and we don't want to download + them just because of the reachability of the commits. We also + don't want to download a pack file with commits, trees, and blobs + since these will be downloaded on demand. This flag will skip the + checks on the reachability of objects during a fetch as well as + the upload pack so that extraneous objects don't get downloaded. + GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS:: + Bit value 64 + With a virtual file system we only know the file size before any + CRLF or smudge/clean filters processing is done on the client. + To prevent file corruption due to truncation or expansion with + garbage at the end, these filters must not run when the file + is first accessed and brought down to the client. Git.exe can't + currently tell the first access vs subsequent accesses so this + flag just blocks them from occurring at all. + GVFS_PREFETCH_DURING_FETCH:: + Bit value 128 + While performing a `git fetch` command, use the gvfs-helper to + perform a "prefetch" of commits and trees. +-- + +core.useGvfsHelper:: + TODO + core.sparseCheckout:: Enable "sparse checkout" feature. See linkgit:git-sparse-checkout[1] for more information. @@ -749,3 +806,12 @@ core.abbrev:: If set to "no", no abbreviation is made and the object names are shown in their full length. The minimum length is 4. + +core.configWriteLockTimeoutMS:: + When processes try to write to the config concurrently, it is likely + that one process "wins" and the other process(es) fail to lock the + config file. By configuring a timeout larger than zero, Git can be + told to try to lock the config again a couple times within the + specified timeout. If the timeout is configure to zero (which is the + default), Git will fail immediately when the config is already + locked. diff --git a/Documentation/config/gvfs.txt b/Documentation/config/gvfs.txt new file mode 100644 index 00000000000000..6ab221ded36c91 --- /dev/null +++ b/Documentation/config/gvfs.txt @@ -0,0 +1,5 @@ +gvfs.cache-server:: + TODO + +gvfs.sharedcache:: + TODO diff --git a/Documentation/config/index.txt b/Documentation/config/index.txt index 75f3a2d1054146..c65da20a93136f 100644 --- a/Documentation/config/index.txt +++ b/Documentation/config/index.txt @@ -1,3 +1,9 @@ +index.deleteSparseDirectories:: + When enabled, the cone mode sparse-checkout feature will delete + directories that are outside of the sparse-checkout cone, unless + such a directory contains an untracked, non-ignored file. Defaults + to true. + index.recordEndOfIndexEntries:: Specifies whether the index file should include an "End Of Index Entry" section. This reduces index load time on multiprocessor diff --git a/Documentation/config/status.txt b/Documentation/config/status.txt index 0fc704ab80b223..af043d7e26f269 100644 --- a/Documentation/config/status.txt +++ b/Documentation/config/status.txt @@ -75,3 +75,25 @@ status.submoduleSummary:: the --ignore-submodules=dirty command-line option or the 'git submodule summary' command, which shows a similar output but does not honor these settings. + +status.deserializePath:: + EXPERIMENTAL, Pathname to a file containing cached status results + generated by `--serialize`. This will be overridden by + `--deserialize=` on the command line. If the cache file is + invalid or stale, git will fall-back and compute status normally. + +status.deserializeWait:: + EXPERIMENTAL, Specifies what `git status --deserialize` should do + if the serialization cache file is stale and whether it should + fall-back and compute status normally. This will be overridden by + `--deserialize-wait=` on the command line. ++ +-- +* `fail` - cause git to exit with an error when the status cache file +is stale; this is intended for testing and debugging. +* `block` - cause git to spin and periodically retry the cache file +every 100 ms; this is intended to help coordinate with another git +instance concurrently computing the cache file. +* `no` - to immediately fall-back if cache file is stale. This is the default. +* `` - time (in tenths of a second) to spin and retry. +-- diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt index 54a4b29b473cc4..f6196b47bb4bf4 100644 --- a/Documentation/git-status.txt +++ b/Documentation/git-status.txt @@ -149,6 +149,21 @@ ignored, then the directory is not shown, but all contents are shown. threshold. See also linkgit:git-diff[1] `--find-renames`. +--serialize[=]:: + (EXPERIMENTAL) Serialize raw status results to a file or stdout + in a format suitable for use by `--deserialize`. If a path is + given, serialize data will be written to that path *and* normal + status output will be written to stdout. If path is omitted, + only binary serialization data will be written to stdout. + +--deserialize[=]:: + (EXPERIMENTAL) Deserialize raw status results from a file or + stdin rather than scanning the worktree. If `` is omitted + and `status.deserializePath` is unset, input is read from stdin. +--no-deserialize:: + (EXPERIMENTAL) Disable implicit deserialization of status results + from the value of `status.deserializePath`. + ...:: See the 'pathspec' entry in linkgit:gitglossary[7]. @@ -421,6 +436,26 @@ quoted as explained for the configuration variable `core.quotePath` (see linkgit:git-config[1]). +SERIALIZATION and DESERIALIZATION (EXPERIMENTAL) +------------------------------------------------ + +The `--serialize` option allows git to cache the result of a +possibly time-consuming status scan to a binary file. A local +service/daemon watching file system events could use this to +periodically pre-compute a fresh status result. + +Interactive users could then use `--deserialize` to simply +(and immediately) print the last-known-good result without +waiting for the status scan. + +The binary serialization file format includes some worktree state +information allowing `--deserialize` to reject the cached data +and force a normal status scan if, for example, the commit, branch, +or status modes/options change. The format cannot, however, indicate +when the cached data is otherwise stale -- that coordination belongs +to the task driving the serializations. + + CONFIGURATION ------------- diff --git a/Documentation/git-update-microsoft-git.txt b/Documentation/git-update-microsoft-git.txt new file mode 100644 index 00000000000000..724bfc172f8ab7 --- /dev/null +++ b/Documentation/git-update-microsoft-git.txt @@ -0,0 +1,24 @@ +git-update-microsoft-git(1) +=========================== + +NAME +---- +git-update-microsoft-git - Update the installed version of Git + + +SYNOPSIS +-------- +[verse] +'git update-microsoft-git' + +DESCRIPTION +----------- +This version of Git is based on the Microsoft fork of Git, which +has custom capabilities focused on supporting monorepos. This +command checks for the latest release of that fork and installs +it on your machine. + + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Documentation/githooks.txt b/Documentation/githooks.txt index a16e62bc8c8ea7..135f0bd34f232b 100644 --- a/Documentation/githooks.txt +++ b/Documentation/githooks.txt @@ -698,6 +698,26 @@ and "0" meaning they were not. Only one parameter should be set to "1" when the hook runs. The hook running passing "1", "1" should not be possible. +virtualFilesystem +~~~~~~~~~~~~~~~~~~ + +"Virtual File System" allows populating the working directory sparsely. +The projection data is typically automatically generated by an external +process. Git will limit what files it checks for changes as well as which +directories are checked for untracked files based on the path names given. +Git will also only update those files listed in the projection. + +The hook is invoked when the configuration option core.virtualFilesystem +is set. It takes one argument, a version (currently 1). + +The hook should output to stdout the list of all files in the working +directory that git should track. The paths are relative to the root +of the working directory and are separated by a single NUL. Full paths +('dir1/a.txt') as well as directories are supported (ie 'dir1/'). + +The exit status determines whether git will use the data from the +hook. On error, git will abort the command with an error message. + SEE ALSO -------- linkgit:git-hook[1] diff --git a/Documentation/scalar.txt b/Documentation/scalar.txt index f33436c7f65ff9..0424d020d9b9f4 100644 --- a/Documentation/scalar.txt +++ b/Documentation/scalar.txt @@ -8,7 +8,9 @@ scalar - A tool for managing large Git repositories SYNOPSIS -------- [verse] -scalar clone [--single-branch] [--branch ] [--full-clone] [] +scalar clone [--single-branch] [--branch ] [--full-clone] + [--local-cache-path ] [--cache-server-url ] + [] scalar list scalar register [] scalar unregister [] @@ -16,6 +18,7 @@ scalar run ( all | config | commit-graph | fetch | loose-objects | pack-files ) scalar reconfigure [ --all | ] scalar diagnose [] scalar delete +scalar cache-server ( --get | --set | --list [] ) [] DESCRIPTION ----------- @@ -84,6 +87,17 @@ cloning. If the HEAD at the remote did not point at any branch when A sparse-checkout is initialized by default. This behavior can be turned off via `--full-clone`. +--local-cache-path :: + Override the path to the local cache root directory; Pre-fetched objects + are stored into a repository-dependent subdirectory of that path. ++ +The default is `:\.scalarCache` on Windows (on the same drive as the +clone), and `~/.scalarCache` on macOS. + +--cache-server-url :: + Retrieve missing objects from the specified remote, which is expected to + understand the GVFS protocol. + List ~~~~ @@ -157,6 +171,27 @@ delete :: This subcommand lets you delete an existing Scalar enlistment from your local file system, unregistering the repository. +Cache-server +~~~~~~~~~~~~ + +cache-server ( --get | --set | --list [] ) []:: + This command lets you query or set the GVFS-enabled cache server used + to fetch missing objects. + +--get:: + This is the default command mode: query the currently-configured cache + server URL, if any. + +--list:: + Access the `gvfs/info` endpoint of the specified remote (default: + `origin`) to figure out which cache servers are available, if any. ++ +In contrast to the `--get` command mode (which only accesses the local +repository), this command mode triggers a request via the network that +potentially requires authentication. If authentication is required, the +configured credential helper is employed (see linkgit:git-credential[1] +for details). + SEE ALSO -------- linkgit:git-clone[1], linkgit:git-maintenance[1]. diff --git a/Documentation/technical/read-object-protocol.txt b/Documentation/technical/read-object-protocol.txt new file mode 100644 index 00000000000000..a893b46e7c28a9 --- /dev/null +++ b/Documentation/technical/read-object-protocol.txt @@ -0,0 +1,102 @@ +Read Object Process +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The read-object process enables Git to read all missing blobs with a +single process invocation for the entire life of a single Git command. +This is achieved by using a packet format (pkt-line, see technical/ +protocol-common.txt) based protocol over standard input and standard +output as follows. All packets, except for the "*CONTENT" packets and +the "0000" flush packet, are considered text and therefore are +terminated by a LF. + +Git starts the process when it encounters the first missing object that +needs to be retrieved. After the process is started, Git sends a welcome +message ("git-read-object-client"), a list of supported protocol version +numbers, and a flush packet. Git expects to read a welcome response +message ("git-read-object-server"), exactly one protocol version number +from the previously sent list, and a flush packet. All further +communication will be based on the selected version. + +The remaining protocol description below documents "version=1". Please +note that "version=42" in the example below does not exist and is only +there to illustrate how the protocol would look with more than one +version. + +After the version negotiation Git sends a list of all capabilities that +it supports and a flush packet. Git expects to read a list of desired +capabilities, which must be a subset of the supported capabilities list, +and a flush packet as response: +------------------------ +packet: git> git-read-object-client +packet: git> version=1 +packet: git> version=42 +packet: git> 0000 +packet: git< git-read-object-server +packet: git< version=1 +packet: git< 0000 +packet: git> capability=get +packet: git> capability=have +packet: git> capability=put +packet: git> capability=not-yet-invented +packet: git> 0000 +packet: git< capability=get +packet: git< 0000 +------------------------ +The only supported capability in version 1 is "get". + +Afterwards Git sends a list of "key=value" pairs terminated with a flush +packet. The list will contain at least the command (based on the +supported capabilities) and the sha1 of the object to retrieve. Please +note, that the process must not send any response before it received the +final flush packet. + +When the process receives the "get" command, it should make the requested +object available in the git object store and then return success. Git will +then check the object store again and this time find it and proceed. +------------------------ +packet: git> command=get +packet: git> sha1=0a214a649e1b3d5011e14a3dc227753f2bd2be05 +packet: git> 0000 +------------------------ + +The process is expected to respond with a list of "key=value" pairs +terminated with a flush packet. If the process does not experience +problems then the list must contain a "success" status. +------------------------ +packet: git< status=success +packet: git< 0000 +------------------------ + +In case the process cannot or does not want to process the content, it +is expected to respond with an "error" status. +------------------------ +packet: git< status=error +packet: git< 0000 +------------------------ + +In case the process cannot or does not want to process the content as +well as any future content for the lifetime of the Git process, then it +is expected to respond with an "abort" status at any point in the +protocol. +------------------------ +packet: git< status=abort +packet: git< 0000 +------------------------ + +Git neither stops nor restarts the process in case the "error"/"abort" +status is set. + +If the process dies during the communication or does not adhere to the +protocol then Git will stop the process and restart it with the next +object that needs to be processed. + +After the read-object process has processed an object it is expected to +wait for the next "key=value" list containing a command. Git will close +the command pipe on exit. The process is expected to detect EOF and exit +gracefully on its own. Git will wait until the process has stopped. + +A long running read-object process demo implementation can be found in +`contrib/long-running-read-object/example.pl` located in the Git core +repository. If you develop your own long running process then the +`GIT_TRACE_PACKET` environment variables can be very helpful for +debugging (see linkgit:git[1]). diff --git a/Documentation/technical/status-serialization-format.txt b/Documentation/technical/status-serialization-format.txt new file mode 100644 index 00000000000000..475ae814495581 --- /dev/null +++ b/Documentation/technical/status-serialization-format.txt @@ -0,0 +1,107 @@ +Git status serialization format +=============================== + +Git status serialization enables git to dump the results of a status scan +to a binary file. This file can then be loaded by later status invocations +to print the cached status results. + +The file contains the essential fields from: +() the index +() the "struct wt_status" for the overall results +() the contents of "struct wt_status_change_data" for tracked changed files +() the list of untracked and ignored files + +Version 1 Format: +================= + +The V1 file begins with a required header section followed by optional +sections for each type of item (changed, untracked, ignored). Individual +item sections are only present if necessary. Each item section begins +with an item-type header with the number of items in the section. + +Each "line" in the format is encoded using pkt-line with a final LF. +Flush packets are used to terminate sections. + +----------------- +PKT-LINE("version" SP "1") + +[] +[] +[] +----------------- + + +V1 Header +--------- + +The v1-header-section fields are taken directly from "struct wt_status". +Each field is printed on a separate pkt-line. Lines for NULL string +values are omitted. All integers are printed with "%d". OIDs are +printed in hex. + +v1-header-section = + + PKT-LINE() + +v1-index-headers = PKT-LINE("index_mtime" SP SP LF) + +v1-wt-status-headers = PKT-LINE("is_initial" SP LF) + [ PKT-LINE("branch" SP LF) ] + [ PKT-LINE("reference" SP LF) ] + PKT-LINE("show_ignored_files" SP LF) + PKT-LINE("show_untracked_files" SP LF) + PKT-LINE("show_ignored_directory" SP LF) + [ PKT-LINE("ignore_submodule_arg" SP LF) ] + PKT-LINE("detect_rename" SP LF) + PKT-LINE("rename_score" SP LF) + PKT-LINE("rename_limit" SP LF) + PKT-LINE("detect_break" SP LF) + PKT-LINE("sha1_commit" SP LF) + PKT-LINE("committable" SP LF) + PKT-LINE("workdir_dirty" SP LF) + + +V1 Changed Items +---------------- + +The v1-changed-item-section lists all of the changed items with one +item per pkt-line. Each pkt-line contains: a binary block of data +from "struct wt_status_serialize_data_fixed" in a fixed header where +integers are in network byte order and OIDs are in raw (non-hex) form. +This is followed by one or two raw pathnames (not c-quoted) with NUL +terminators (both NULs are always present even if there is no rename). + +v1-changed-item-section = PKT-LINE("changed" SP LF) + [ PKT-LINE( LF) ]+ + PKT-LINE() + +changed_item = + + + + + + + + + + + + NUL + [ ] + NUL + + +V1 Untracked and Ignored Items +------------------------------ + +These sections are simple lists of pathnames. They ARE NOT +c-quoted. + +v1-untracked-item-section = PKT-LINE("untracked" SP LF) + [ PKT-LINE( LF) ]+ + PKT-LINE() + +v1-ignored-item-section = PKT-LINE("ignored" SP LF) + [ PKT-LINE( LF) ]+ + PKT-LINE() diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index a3eb6eca7abfb5..902d11c2821a87 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.38.0 +DEF_VER=v2.38.0.vfs.0.0 LF=' ' @@ -12,10 +12,15 @@ if test -f version then VN=$(cat version) || VN="$DEF_VER" elif test -d ${GIT_DIR:-.git} -o -f .git && - VN=$(git describe --match "v[0-9]*" HEAD 2>/dev/null) && + VN=$(git describe --match "v[0-9]*vfs*" HEAD 2>/dev/null) && case "$VN" in *$LF*) (exit 1) ;; v[0-9]*) + if test "${VN%%.vfs.*}" != "${DEF_VER%%.vfs.*}" + then + echo "Found version $VN, which is not based on $DEF_VER" >&2 + exit 1 + fi git update-index -q --refresh test -z "$(git diff-index --name-only HEAD --)" || VN="$VN-dirty" ;; diff --git a/Makefile b/Makefile index 5d0de405ae0ebc..b8548e77d1cf78 100644 --- a/Makefile +++ b/Makefile @@ -402,6 +402,10 @@ include shared.mak # Define GIT_USER_AGENT if you want to change how git identifies itself during # network interactions. The default is "git/$(GIT_VERSION)". # +# Define GIT_BUILT_FROM_COMMIT if you want to force the commit hash identified +# in 'git version --build-options' to a specific value. The default is the +# commit hash of the current HEAD. +# # Define DEFAULT_HELP_FORMAT to "man", "info" or "html" # (defaults to "man") if you want to have a different default when # "git help" is called without a parameter specifying the format. @@ -973,6 +977,8 @@ LIB_OBJS += gettext.o LIB_OBJS += gpg-interface.o LIB_OBJS += graph.o LIB_OBJS += grep.o +LIB_OBJS += gvfs.o +LIB_OBJS += gvfs-helper-client.o LIB_OBJS += hash-lookup.o LIB_OBJS += hashmap.o LIB_OBJS += help.o @@ -1120,6 +1126,7 @@ LIB_OBJS += utf8.o LIB_OBJS += varint.o LIB_OBJS += version.o LIB_OBJS += versioncmp.o +LIB_OBJS += virtualfilesystem.o LIB_OBJS += walker.o LIB_OBJS += wildmatch.o LIB_OBJS += worktree.o @@ -1127,6 +1134,8 @@ LIB_OBJS += wrapper.o LIB_OBJS += write-or-die.o LIB_OBJS += ws.o LIB_OBJS += wt-status.o +LIB_OBJS += wt-status-deserialize.o +LIB_OBJS += wt-status-serialize.o LIB_OBJS += xdiff-interface.o LIB_OBJS += zlib.o @@ -1243,6 +1252,7 @@ BUILTIN_OBJS += builtin/tag.o BUILTIN_OBJS += builtin/unpack-file.o BUILTIN_OBJS += builtin/unpack-objects.o BUILTIN_OBJS += builtin/update-index.o +BUILTIN_OBJS += builtin/update-microsoft-git.o BUILTIN_OBJS += builtin/update-ref.o BUILTIN_OBJS += builtin/update-server-info.o BUILTIN_OBJS += builtin/upload-archive.o @@ -1503,6 +1513,9 @@ else endif BASIC_CFLAGS += $(CURL_CFLAGS) + PROGRAM_OBJS += gvfs-helper.o + TEST_PROGRAMS_NEED_X += test-gvfs-protocol + REMOTE_CURL_PRIMARY = git-remote-http$X REMOTE_CURL_ALIASES = git-remote-https$X git-remote-ftp$X git-remote-ftps$X REMOTE_CURL_NAMES = $(REMOTE_CURL_PRIMARY) $(REMOTE_CURL_ALIASES) @@ -2223,6 +2236,15 @@ GIT-USER-AGENT: FORCE echo '$(GIT_USER_AGENT_SQ)' >GIT-USER-AGENT; \ fi +GIT_BUILT_FROM_COMMIT = $(eval GIT_BUILT_FROM_COMMIT := $$(shell \ + GIT_CEILING_DIRECTORIES="$$(CURDIR)/.." \ + git rev-parse -q --verify HEAD 2>/dev/null))$(GIT_BUILT_FROM_COMMIT) +GIT-BUILT-FROM-COMMIT: FORCE + @if test x'$(GIT_BUILT_FROM_COMMIT)' != x"`cat GIT-BUILT-FROM-COMMIT 2>/dev/null`" ; then \ + echo >&2 " * new built-from commit"; \ + echo '$(GIT_BUILT_FROM_COMMIT)' >GIT-BUILT-FROM-COMMIT; \ + fi + ifdef DEFAULT_HELP_FORMAT BASIC_CFLAGS += -DDEFAULT_HELP_FORMAT='"$(DEFAULT_HELP_FORMAT)"' endif @@ -2337,13 +2359,11 @@ PAGER_ENV_CQ_SQ = $(subst ','\'',$(PAGER_ENV_CQ)) pager.sp pager.s pager.o: EXTRA_CPPFLAGS = \ -DPAGER_ENV='$(PAGER_ENV_CQ_SQ)' -version.sp version.s version.o: GIT-VERSION-FILE GIT-USER-AGENT +version.sp version.s version.o: GIT-VERSION-FILE GIT-USER-AGENT GIT-BUILT-FROM-COMMIT version.sp version.s version.o: EXTRA_CPPFLAGS = \ '-DGIT_VERSION="$(GIT_VERSION)"' \ '-DGIT_USER_AGENT=$(GIT_USER_AGENT_CQ_SQ)' \ - '-DGIT_BUILT_FROM_COMMIT="$(shell \ - GIT_CEILING_DIRECTORIES="$(CURDIR)/.." \ - git rev-parse -q --verify HEAD 2>/dev/null)"' + '-DGIT_BUILT_FROM_COMMIT="$(GIT_BUILT_FROM_COMMIT)"' $(BUILT_INS): git$X $(QUIET_BUILT_IN)$(RM) $@ && \ @@ -2584,6 +2604,7 @@ GIT_OBJS += git.o .PHONY: git-objs git-objs: $(GIT_OBJS) +SCALAR_OBJS := json-parser.o SCALAR_OBJS += scalar.o .PHONY: scalar-objs scalar-objs: $(SCALAR_OBJS) @@ -2681,7 +2702,7 @@ gettext.sp gettext.s gettext.o: GIT-PREFIX gettext.sp gettext.s gettext.o: EXTRA_CPPFLAGS = \ -DGIT_LOCALE_PATH='"$(localedir_relative_SQ)"' -http-push.sp http.sp http-walker.sp remote-curl.sp imap-send.sp: SP_EXTRA_FLAGS += \ +http-push.sp http.sp http-walker.sp remote-curl.sp imap-send.sp gvfs-helper.sp: SP_EXTRA_FLAGS += \ -DCURL_DISABLE_TYPECHECK pack-revindex.sp: SP_EXTRA_FLAGS += -Wno-memcpy-max-count @@ -2732,10 +2753,14 @@ $(REMOTE_CURL_PRIMARY): remote-curl.o http.o http-walker.o GIT-LDFLAGS $(GITLIBS $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \ $(CURL_LIBCURL) $(EXPAT_LIBEXPAT) $(LIBS) -scalar$X: scalar.o GIT-LDFLAGS $(GITLIBS) +scalar$X: $(SCALAR_OBJS) GIT-LDFLAGS $(GITLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) \ $(filter %.o,$^) $(LIBS) +git-gvfs-helper$X: gvfs-helper.o http.o GIT-LDFLAGS $(GITLIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \ + $(CURL_LIBCURL) $(EXPAT_LIBEXPAT) $(LIBS) + $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^ @@ -3424,7 +3449,7 @@ dist: git-archive$(X) configure @$(MAKE) -C git-gui TARDIR=../.dist-tmp-dir/git-gui dist-version ./git-archive --format=tar \ $(GIT_ARCHIVE_EXTRA_FILES) \ - --prefix=$(GIT_TARNAME)/ HEAD^{tree} > $(GIT_TARNAME).tar + --prefix=$(GIT_TARNAME)/ HEAD > $(GIT_TARNAME).tar @$(RM) -r .dist-tmp-dir gzip -f -9 $(GIT_TARNAME).tar diff --git a/README.md b/README.md index e2314c5a313c08..f92e1e08ddc99d 100644 --- a/README.md +++ b/README.md @@ -1,147 +1,169 @@ -Git for Windows -=============== - -[![Open in Visual Studio Code](https://img.shields.io/static/v1?logo=visualstudiocode&label=&message=Open%20in%20Visual%20Studio%20Code&labelColor=2c2c32&color=007acc&logoColor=007acc)](https://open.vscode.dev/git-for-windows/git) -[![Build status](https://github.com/git-for-windows/git/workflows/CI/badge.svg)](https://github.com/git-for-windows/git/actions?query=branch%3Amain+event%3Apush) -[![Join the chat at https://gitter.im/git-for-windows/git](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/git-for-windows/git?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) - -This is [Git for Windows](http://git-for-windows.github.io/), the Windows port -of [Git](http://git-scm.com/). - -The Git for Windows project is run using a [governance -model](http://git-for-windows.github.io/governance-model.html). If you -encounter problems, you can report them as [GitHub -issues](https://github.com/git-for-windows/git/issues), discuss them on Git -for Windows' [Google Group](http://groups.google.com/group/git-for-windows), -and [contribute bug -fixes](https://github.com/git-for-windows/git/wiki/How-to-participate). - -To build Git for Windows, please either install [Git for Windows' -SDK](https://gitforwindows.org/#download-sdk), start its `git-bash.exe`, `cd` -to your Git worktree and run `make`, or open the Git worktree as a folder in -Visual Studio. - -To verify that your build works, use one of the following methods: - -- If you want to test the built executables within Git for Windows' SDK, - prepend `/bin-wrappers` to the `PATH`. -- Alternatively, run `make install` in the Git worktree. -- If you need to test this in a full installer, run `sdk build - git-and-installer`. -- You can also "install" Git into an existing portable Git via `make install - DESTDIR=` where `` refers to the top-level directory of the - portable Git. In this instance, you will want to prepend that portable Git's - `/cmd` directory to the `PATH`, or test by running that portable Git's - `git-bash.exe` or `git-cmd.exe`. -- If you built using a recent Visual Studio, you can use the menu item - `Build>Install git` (you will want to click on `Project>CMake Settings for - Git` first, then click on `Edit JSON` and then point `installRoot` to the - `mingw64` directory of an already-unpacked portable Git). - - As in the previous bullet point, you will then prepend `/cmd` to the `PATH` - or run using the portable Git's `git-bash.exe` or `git-cmd.exe`. -- If you want to run the built executables in-place, but in a CMD instead of - inside a Bash, you can run a snippet like this in the `git-bash.exe` window - where Git was built (ensure that the `EOF` line has no leading spaces), and - then paste into the CMD window what was put in the clipboard: - - ```sh - clip.exe < -including full documentation and Git related tools. - -See [Documentation/gittutorial.txt][] to get started, then see -[Documentation/giteveryday.txt][] for a useful minimum set of commands, and -`Documentation/git-.txt` for documentation of each command. -If git has been correctly installed, then the tutorial can also be -read with `man gittutorial` or `git help tutorial`, and the -documentation of each command with `man git-` or `git help -`. - -CVS users may also want to read [Documentation/gitcvs-migration.txt][] -(`man gitcvs-migration` or `git help cvs-migration` if git is -installed). - -The user discussion and development of core Git take place on the Git -mailing list -- everyone is welcome to post bug reports, feature -requests, comments and patches to git@vger.kernel.org (read -[Documentation/SubmittingPatches][] for instructions on patch submission -and [Documentation/CodingGuidelines][]). - -Those wishing to help with error message, usage and informational message -string translations (localization l10) should see [po/README.md][] -(a `po` file is a Portable Object file that holds the translations). - -To subscribe to the list, send an email with just "subscribe git" in -the body to majordomo@vger.kernel.org (not the Git list). The mailing -list archives are available at , - and other archival sites. -The core git mailing list is plain text (no HTML!). - -Issues which are security relevant should be disclosed privately to -the Git Security mailing list . - -The maintainer frequently sends the "What's cooking" reports that -list the current status of various development topics to the mailing -list. The discussion following them give a good reference for -project status, development direction and remaining tasks. - -The name "git" was given by Linus Torvalds when he wrote the very -first version. He described the tool as "the stupid content tracker" -and the name as (depending on your mood): - - - random three-letter combination that is pronounceable, and not - actually used by any common UNIX command. The fact that it is a - mispronunciation of "get" may or may not be relevant. - - stupid. contemptible and despicable. simple. Take your pick from the - dictionary of slang. - - "global information tracker": you're in a good mood, and it actually - works for you. Angels sing, and a light suddenly fills the room. - - "goddamn idiotic truckload of sh*t": when it breaks - -[INSTALL]: INSTALL -[Documentation/gittutorial.txt]: Documentation/gittutorial.txt -[Documentation/giteveryday.txt]: Documentation/giteveryday.txt -[Documentation/gitcvs-migration.txt]: Documentation/gitcvs-migration.txt -[Documentation/SubmittingPatches]: Documentation/SubmittingPatches -[Documentation/CodingGuidelines]: Documentation/CodingGuidelines -[po/README.md]: po/README.md +This project welcomes contributions and suggestions. Most contributions require you to agree to a +Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us +the rights to use your contribution. For details, visit . + +When you submit a pull request, a CLA-bot will automatically determine whether you need to provide +a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions +provided by the bot. You will only need to do this once across all repos using our CLA. + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). +For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or +contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. diff --git a/apply.c b/apply.c index f86a01b26080dd..b0453ac2ab7d93 100644 --- a/apply.c +++ b/apply.c @@ -3353,6 +3353,24 @@ static int checkout_target(struct index_state *istate, { struct checkout costate = CHECKOUT_INIT; + /* + * Do not checkout the entry if the skipworktree bit is set + * + * Both callers of this method (check_preimage and load_current) + * check for the existance of the file before calling this + * method so we know that the file doesn't exist at this point + * and we don't need to perform that check again here. + * We just need to check the skip-worktree and return. + * + * This is to prevent git from creating a file in the + * working directory that has the skip-worktree bit on, + * then updating the index from the patch and not keeping + * the working directory version up to date with what it + * changed the index version to be. + */ + if (ce_skip_worktree(ce)) + return 0; + costate.refresh_cache = 1; costate.istate = istate; if (checkout_entry(ce, &costate, NULL, NULL) || diff --git a/builtin.h b/builtin.h index 8901a34d6bf424..88597024d369eb 100644 --- a/builtin.h +++ b/builtin.h @@ -231,6 +231,7 @@ int cmd_tag(int argc, const char **argv, const char *prefix); int cmd_unpack_file(int argc, const char **argv, const char *prefix); int cmd_unpack_objects(int argc, const char **argv, const char *prefix); int cmd_update_index(int argc, const char **argv, const char *prefix); +int cmd_update_microsoft_git(int argc, const char **argv, const char *prefix); int cmd_update_ref(int argc, const char **argv, const char *prefix); int cmd_update_server_info(int argc, const char **argv, const char *prefix); int cmd_upload_archive(int argc, const char **argv, const char *prefix); diff --git a/builtin/add.c b/builtin/add.c index 76d5ad1f5da190..9a10501558ac79 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -47,6 +47,7 @@ static int chmod_pathspec(struct pathspec *pathspec, char flip, int show_only) int err; if (!include_sparse && + !core_virtualfilesystem && (ce_skip_worktree(ce) || !path_in_sparse_checkout(ce->name, &the_index))) continue; @@ -97,7 +98,8 @@ static void update_callback(struct diff_queue_struct *q, struct diff_filepair *p = q->queue[i]; const char *path = p->one->path; - if (!include_sparse && !path_in_sparse_checkout(path, &the_index)) + if (!include_sparse && !core_virtualfilesystem && + !path_in_sparse_checkout(path, &the_index)) continue; switch (fix_unmerged_status(p, data)) { @@ -215,8 +217,9 @@ static int refresh(int verbose, const struct pathspec *pathspec) if (!seen[i]) { const char *path = pathspec->items[i].original; - if (matches_skip_worktree(pathspec, i, &skip_worktree_seen) || - !path_in_sparse_checkout(path, &the_index)) { + if (!core_virtualfilesystem && + (matches_skip_worktree(pathspec, i, &skip_worktree_seen) || + !path_in_sparse_checkout(path, &the_index))) { string_list_append(&only_match_skip_worktree, pathspec->items[i].original); } else { @@ -226,7 +229,11 @@ static int refresh(int verbose, const struct pathspec *pathspec) } } - if (only_match_skip_worktree.nr) { + /* + * When using a virtual filesystem, we might re-add a path + * that is currently virtual and we want that to succeed. + */ + if (!core_virtualfilesystem && only_match_skip_worktree.nr) { advise_on_updating_sparse_paths(&only_match_skip_worktree); ret = 1; } @@ -644,7 +651,11 @@ int cmd_add(int argc, const char **argv, const char *prefix) if (seen[i]) continue; - if (!include_sparse && + /* + * When using a virtual filesystem, we might re-add a path + * that is currently virtual and we want that to succeed. + */ + if (!include_sparse && !core_virtualfilesystem && matches_skip_worktree(&pathspec, i, &skip_worktree_seen)) { string_list_append(&only_match_skip_worktree, pathspec.items[i].original); @@ -668,7 +679,6 @@ int cmd_add(int argc, const char **argv, const char *prefix) } } - if (only_match_skip_worktree.nr) { advise_on_updating_sparse_paths(&only_match_skip_worktree); exit_status = 1; diff --git a/builtin/checkout.c b/builtin/checkout.c index 0490a19fdab2b1..25b0eb91346faa 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -14,6 +14,7 @@ #include "lockfile.h" #include "merge-recursive.h" #include "object-store.h" +#include "packfile.h" #include "parse-options.h" #include "refs.h" #include "remote.h" @@ -986,8 +987,16 @@ static void update_refs_for_switch(const struct checkout_opts *opts, remove_branch_state(the_repository, !opts->quiet); strbuf_release(&msg); if (!opts->quiet && - (new_branch_info->path || (!opts->force_detach && !strcmp(new_branch_info->name, "HEAD")))) + (new_branch_info->path || (!opts->force_detach && !strcmp(new_branch_info->name, "HEAD")))) { + unsigned long nr_unpack_entry_at_start; + + trace2_region_enter("tracking", "report_tracking", the_repository); + nr_unpack_entry_at_start = get_nr_unpack_entry(); report_tracking(new_branch_info); + trace2_data_intmax("tracking", NULL, "report_tracking/nr_unpack_entries", + (intmax_t)(get_nr_unpack_entry() - nr_unpack_entry_at_start)); + trace2_region_leave("tracking", "report_tracking", the_repository); + } } static int add_pending_uninteresting_ref(const char *refname, diff --git a/builtin/commit.c b/builtin/commit.c index cf0e24d3abf25c..59064d2178f207 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -160,6 +160,122 @@ static int opt_parse_porcelain(const struct option *opt, const char *arg, int un return 0; } +static int do_serialize = 0; +static char *serialize_path = NULL; + +static int reject_implicit = 0; +static int do_implicit_deserialize = 0; +static int do_explicit_deserialize = 0; +static char *deserialize_path = NULL; + +static enum wt_status_deserialize_wait implicit_deserialize_wait = DESERIALIZE_WAIT__UNSET; +static enum wt_status_deserialize_wait explicit_deserialize_wait = DESERIALIZE_WAIT__UNSET; + +/* + * --serialize | --serialize= + * + * Request that we serialize status output rather than or in addition to + * printing in any of the established formats. + * + * Without a path, we write binary serialization data to stdout (and omit + * the normal status output). + * + * With a path, we write binary serialization data to the and then + * write normal status output. + */ +static int opt_parse_serialize(const struct option *opt, const char *arg, int unset) +{ + enum wt_status_format *value = (enum wt_status_format *)opt->value; + if (unset || !arg) + *value = STATUS_FORMAT_SERIALIZE_V1; + + if (arg) { + free(serialize_path); + serialize_path = xstrdup(arg); + } + + if (do_explicit_deserialize) + die("cannot mix --serialize and --deserialize"); + do_implicit_deserialize = 0; + + do_serialize = 1; + return 0; +} + +/* + * --deserialize | --deserialize= | + * --no-deserialize + * + * Request that we deserialize status data from some existing resource + * rather than performing a status scan. + * + * The input source can come from stdin or a path given here -- or be + * inherited from the config settings. + */ +static int opt_parse_deserialize(const struct option *opt, const char *arg, int unset) +{ + if (unset) { + do_implicit_deserialize = 0; + do_explicit_deserialize = 0; + } else { + if (do_serialize) + die("cannot mix --serialize and --deserialize"); + if (arg) { + /* override config or stdin */ + free(deserialize_path); + deserialize_path = xstrdup(arg); + } + if (!deserialize_path || !*deserialize_path) + do_explicit_deserialize = 1; /* read stdin */ + else if (wt_status_deserialize_access(deserialize_path, R_OK) == 0) + do_explicit_deserialize = 1; /* can read from this file */ + else { + /* + * otherwise, silently fallback to the normal + * collection scan + */ + do_implicit_deserialize = 0; + do_explicit_deserialize = 0; + } + } + + return 0; +} + +static enum wt_status_deserialize_wait parse_dw(const char *arg) +{ + int tenths; + + if (!strcmp(arg, "fail")) + return DESERIALIZE_WAIT__FAIL; + else if (!strcmp(arg, "block")) + return DESERIALIZE_WAIT__BLOCK; + else if (!strcmp(arg, "no")) + return DESERIALIZE_WAIT__NO; + + /* + * Otherwise, assume it is a timeout in tenths of a second. + * If it contains a bogus value, atol() will return zero + * which is OK. + */ + tenths = atol(arg); + if (tenths < 0) + tenths = DESERIALIZE_WAIT__NO; + return tenths; +} + +static int opt_parse_deserialize_wait(const struct option *opt, + const char *arg, + int unset) +{ + if (unset) + explicit_deserialize_wait = DESERIALIZE_WAIT__UNSET; + else + explicit_deserialize_wait = parse_dw(arg); + + return 0; +} + static int opt_parse_m(const struct option *opt, const char *arg, int unset) { struct strbuf *buf = opt->value; @@ -1152,6 +1268,8 @@ static void handle_untracked_files_arg(struct wt_status *s) s->show_untracked_files = SHOW_NORMAL_UNTRACKED_FILES; else if (!strcmp(untracked_files_arg, "all")) s->show_untracked_files = SHOW_ALL_UNTRACKED_FILES; + else if (!strcmp(untracked_files_arg,"complete")) + s->show_untracked_files = SHOW_COMPLETE_UNTRACKED_FILES; /* * Please update $__git_untracked_file_modes in * git-completion.bash when you add new options @@ -1437,6 +1555,28 @@ static int git_status_config(const char *k, const char *v, void *cb) s->relative_paths = git_config_bool(k, v); return 0; } + if (!strcmp(k, "status.deserializepath")) { + /* + * Automatically assume deserialization if this is + * set in the config and the file exists. Do not + * complain if the file does not exist, because we + * silently fall back to normal mode. + */ + if (v && *v && access(v, R_OK) == 0) { + do_implicit_deserialize = 1; + deserialize_path = xstrdup(v); + } else { + reject_implicit = 1; + } + return 0; + } + if (!strcmp(k, "status.deserializewait")) { + if (!v || !*v) + implicit_deserialize_wait = DESERIALIZE_WAIT__UNSET; + else + implicit_deserialize_wait = parse_dw(v); + return 0; + } if (!strcmp(k, "status.showuntrackedfiles")) { if (!v) return config_error_nonbool(k); @@ -1477,7 +1617,8 @@ int cmd_status(int argc, const char **argv, const char *prefix) static const char *rename_score_arg = (const char *)-1; static struct wt_status s; unsigned int progress_flag = 0; - int fd; + int try_deserialize; + int fd = -1; struct object_id oid; static struct option builtin_status_options[] = { OPT__VERBOSE(&verbose, N_("be verbose")), @@ -1492,6 +1633,15 @@ int cmd_status(int argc, const char **argv, const char *prefix) OPT_CALLBACK_F(0, "porcelain", &status_format, N_("version"), N_("machine-readable output"), PARSE_OPT_OPTARG, opt_parse_porcelain), + { OPTION_CALLBACK, 0, "serialize", &status_format, + N_("path"), N_("serialize raw status data to path or stdout"), + PARSE_OPT_OPTARG | PARSE_OPT_NONEG, opt_parse_serialize }, + { OPTION_CALLBACK, 0, "deserialize", NULL, + N_("path"), N_("deserialize raw status data from file"), + PARSE_OPT_OPTARG, opt_parse_deserialize }, + { OPTION_CALLBACK, 0, "deserialize-wait", NULL, + N_("fail|block|no"), N_("how to wait if status cache file is invalid"), + PARSE_OPT_OPTARG, opt_parse_deserialize_wait }, OPT_SET_INT(0, "long", &status_format, N_("show status in long format (default)"), STATUS_FORMAT_LONG), @@ -1536,10 +1686,53 @@ int cmd_status(int argc, const char **argv, const char *prefix) s.show_untracked_files == SHOW_NO_UNTRACKED_FILES) die(_("Unsupported combination of ignored and untracked-files arguments")); + if (s.show_untracked_files == SHOW_COMPLETE_UNTRACKED_FILES && + s.show_ignored_mode == SHOW_NO_IGNORED) + die(_("Complete Untracked only supported with ignored files")); + parse_pathspec(&s.pathspec, 0, PATHSPEC_PREFER_FULL, prefix, argv); + /* + * If we want to try to deserialize status data from a cache file, + * we need to re-order the initialization code. The problem is that + * this makes for a very nasty diff and causes merge conflicts as we + * carry it forward. And it easy to mess up the merge, so we + * duplicate some code here to hopefully reduce conflicts. + */ + try_deserialize = (!do_serialize && + (do_implicit_deserialize || do_explicit_deserialize)); + + /* + * Disable deserialize when verbose is set because it causes us to + * print diffs for each modified file, but that requires us to have + * the index loaded and we don't want to do that (at least not now for + * this seldom used feature). My fear is that would further tangle + * the merge conflict with upstream. + * + * TODO Reconsider this in the future. + */ + if (try_deserialize && verbose) { + trace2_data_string("status", the_repository, "deserialize/reject", + "args/verbose"); + try_deserialize = 0; + } + + if (try_deserialize) + goto skip_init; + /* + * If we implicitly received a status cache pathname from the config + * and the file does not exist, we silently reject it and do the normal + * status "collect". Fake up some trace2 messages to reflect this and + * assist post-processors know this case is different. + */ + if (!do_serialize && reject_implicit) { + trace2_cmd_mode("implicit-deserialize"); + trace2_data_string("status", the_repository, "deserialize/reject", + "status-cache/access"); + } + enable_fscache(0); if (status_format != STATUS_FORMAT_PORCELAIN && status_format != STATUS_FORMAT_PORCELAIN_V2) @@ -1554,6 +1747,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) else fd = -1; +skip_init: s.is_initial = get_oid(s.reference, &oid) ? 1 : 0; if (!s.is_initial) oidcpy(&s.oid_commit, &oid); @@ -1570,6 +1764,36 @@ int cmd_status(int argc, const char **argv, const char *prefix) s.rename_score = parse_rename_score(&rename_score_arg); } + if (try_deserialize) { + int result; + enum wt_status_deserialize_wait dw = implicit_deserialize_wait; + if (explicit_deserialize_wait != DESERIALIZE_WAIT__UNSET) + dw = explicit_deserialize_wait; + if (dw == DESERIALIZE_WAIT__UNSET) + dw = DESERIALIZE_WAIT__NO; + + if (s.relative_paths) + s.prefix = prefix; + + trace2_cmd_mode("deserialize"); + result = wt_status_deserialize(&s, deserialize_path, dw); + if (result == DESERIALIZE_OK) + return 0; + if (dw == DESERIALIZE_WAIT__FAIL) + die(_("Rejected status serialization cache")); + + /* deserialize failed, so force the initialization we skipped above. */ + enable_fscache(1); + read_cache_preload(&s.pathspec); + refresh_index(&the_index, REFRESH_QUIET|REFRESH_UNMERGED, &s.pathspec, NULL, NULL); + + if (use_optional_locks()) + fd = hold_locked_index(&index_lock, 0); + else + fd = -1; + } + + trace2_cmd_mode("collect"); wt_status_collect(&s); if (0 <= fd) @@ -1578,6 +1802,17 @@ int cmd_status(int argc, const char **argv, const char *prefix) if (s.relative_paths) s.prefix = prefix; + if (serialize_path) { + int fd_serialize = xopen(serialize_path, + O_WRONLY | O_CREAT | O_TRUNC, 0666); + if (fd_serialize < 0) + die_errno(_("could not serialize to '%s'"), + serialize_path); + trace2_cmd_mode("serialize"); + wt_status_serialize_v1(fd_serialize, &s); + close(fd_serialize); + } + wt_status_print(&s); wt_status_collect_free_buffers(&s); diff --git a/builtin/fetch.c b/builtin/fetch.c index a0fca93bb6a63e..442452702d4df4 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -13,6 +13,8 @@ #include "string-list.h" #include "remote.h" #include "transport.h" +#include "gvfs.h" +#include "gvfs-helper-client.h" #include "run-command.h" #include "parse-options.h" #include "sigchain.h" @@ -1130,6 +1132,13 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, struct check_connected_options opt = CHECK_CONNECTED_INIT; rm = ref_map; + + /* + * Before checking connectivity, be really sure we have the + * latest pack-files loaded into memory. + */ + reprepare_packed_git(the_repository); + if (check_connected(iterate_ref_map, &rm, &opt)) { rc = error(_("%s did not send all necessary objects\n"), url); goto abort; @@ -2219,6 +2228,9 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) } } + if (core_gvfs & GVFS_PREFETCH_DURING_FETCH) + gh_client__prefetch(0, NULL); + if (negotiate_only) { struct oidset acked_commits = OIDSET_INIT; struct oidset_iter iter; diff --git a/builtin/gc.c b/builtin/gc.c index effe7ac15661ce..e08d6b44b7aebe 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -12,6 +12,7 @@ #include "builtin.h" #include "repository.h" +#include "gvfs.h" #include "config.h" #include "tempfile.h" #include "lockfile.h" @@ -610,6 +611,9 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (quiet) strvec_push(&repack, "-q"); + if ((!auto_gc || (auto_gc && gc_auto_threshold > 0)) && gvfs_config_is_set(GVFS_BLOCK_COMMANDS)) + die(_("'git gc' is not supported on a GVFS repo")); + if (auto_gc) { /* * Auto-gc should be least intrusive as possible. @@ -1002,6 +1006,8 @@ static int write_loose_object_to_stdin(const struct object_id *oid, return ++(d->count) > d->batch_size; } +static const char *object_dir = NULL; + static int pack_loose(struct maintenance_run_opts *opts) { struct repository *r = the_repository; @@ -1009,11 +1015,14 @@ static int pack_loose(struct maintenance_run_opts *opts) struct write_loose_object_data data; struct child_process pack_proc = CHILD_PROCESS_INIT; + if (!object_dir) + object_dir = r->objects->odb->path; + /* * Do not start pack-objects process * if there are no loose objects. */ - if (!for_each_loose_file_in_objdir(r->objects->odb->path, + if (!for_each_loose_file_in_objdir(object_dir, bail_on_loose, NULL, NULL, NULL)) return 0; @@ -1023,7 +1032,7 @@ static int pack_loose(struct maintenance_run_opts *opts) strvec_push(&pack_proc.args, "pack-objects"); if (opts->quiet) strvec_push(&pack_proc.args, "--quiet"); - strvec_pushf(&pack_proc.args, "%s/pack/loose", r->objects->odb->path); + strvec_pushf(&pack_proc.args, "%s/pack/loose", object_dir); pack_proc.in = -1; @@ -1036,7 +1045,7 @@ static int pack_loose(struct maintenance_run_opts *opts) data.count = 0; data.batch_size = 50000; - for_each_loose_file_in_objdir(r->objects->odb->path, + for_each_loose_file_in_objdir(object_dir, write_loose_object_to_stdin, NULL, NULL, @@ -1276,6 +1285,8 @@ static int maintenance_run_tasks(struct maintenance_run_opts *opts) char *lock_path = xstrfmt("%s/maintenance", r->objects->odb->path); if (hold_lock_file_for_update(&lk, lock_path, LOCK_NO_DEREF) < 0) { + struct stat st; + struct strbuf lock_dot_lock = STRBUF_INIT; /* * Another maintenance command is running. * @@ -1286,6 +1297,25 @@ static int maintenance_run_tasks(struct maintenance_run_opts *opts) if (!opts->auto_flag && !opts->quiet) warning(_("lock file '%s' exists, skipping maintenance"), lock_path); + + /* + * Check timestamp on .lock file to see if we should + * delete it to recover from a fail state. + */ + strbuf_addstr(&lock_dot_lock, lock_path); + strbuf_addstr(&lock_dot_lock, ".lock"); + if (lstat(lock_dot_lock.buf, &st)) + warning_errno(_("unable to stat '%s'"), lock_dot_lock.buf); + else { + if (st.st_mtime < time(NULL) - (6 * 60 * 60)) { + if (unlink(lock_dot_lock.buf)) + warning_errno(_("unable to delete stale lock file")); + else + warning(_("deleted stale lock file")); + } + } + + strbuf_release(&lock_dot_lock); free(lock_path); return 0; } @@ -1414,6 +1444,7 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) { int i; struct maintenance_run_opts opts; + const char *tmp_obj_dir = NULL; struct option builtin_maintenance_run_options[] = { OPT_BOOL(0, "auto", &opts.auto_flag, N_("run tasks based on the state of the repository")), @@ -1447,6 +1478,18 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) if (argc != 0) usage_with_options(builtin_maintenance_run_usage, builtin_maintenance_run_options); + + /* + * To enable the VFS for Git/Scalar shared object cache, use + * the gvfs.sharedcache config option to redirect the + * maintenance to that location. + */ + if (!git_config_get_value("gvfs.sharedcache", &tmp_obj_dir) && + tmp_obj_dir) { + object_dir = xstrdup(tmp_obj_dir); + setenv(DB_ENVIRONMENT, object_dir, 1); + } + return maintenance_run_tasks(&opts); } @@ -1528,9 +1571,13 @@ static int maintenance_unregister(int argc, const char **argv, const char *prefi struct option options[] = { OPT_END(), }; - int rc; + const char *key = "maintenance.repo"; + int rc = 0; struct child_process config_unset = CHILD_PROCESS_INIT; char *maintpath = get_maintpath(); + int found = 0; + struct string_list_item *item; + const struct string_list *list = git_config_get_value_multi(key); argc = parse_options(argc, argv, prefix, options, builtin_maintenance_unregister_usage, 0); @@ -1538,11 +1585,21 @@ static int maintenance_unregister(int argc, const char **argv, const char *prefi usage_with_options(builtin_maintenance_unregister_usage, options); - config_unset.git_cmd = 1; - strvec_pushl(&config_unset.args, "config", "--global", "--unset", - "--fixed-value", "maintenance.repo", maintpath, NULL); + for_each_string_list_item(item, list) { + if (!strcmp(maintpath, item->string)) { + found = 1; + break; + } + } + + if (found) { + config_unset.git_cmd = 1; + strvec_pushl(&config_unset.args, "config", "--global", "--unset", + "--fixed-value", key, maintpath, NULL); + + rc = run_command(&config_unset); + } - rc = run_command(&config_unset); free(maintpath); return rc; } diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 6648f2daef5cef..46a61b7cba8bf9 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -801,7 +801,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, if (startup_info->have_repository) { read_lock(); collision_test_needed = - has_object_file_with_flags(oid, OBJECT_INFO_QUICK); + has_object_file_with_flags(oid, OBJECT_INFO_FOR_PREFETCH); read_unlock(); } diff --git a/builtin/reset.c b/builtin/reset.c index 0d471411ffe398..8eb5cfb22d91b6 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -28,6 +28,8 @@ #include "dir.h" #include "strbuf.h" #include "quote.h" +#include "dir.h" +#include "entry.h" #define REFRESH_INDEX_DELAY_WARNING_IN_MS (2 * 1000) @@ -141,9 +143,47 @@ static void update_index_from_diff(struct diff_queue_struct *q, for (i = 0; i < q->nr; i++) { int pos; + int respect_skip_worktree = 1; struct diff_filespec *one = q->queue[i]->one; + struct diff_filespec *two = q->queue[i]->two; int is_in_reset_tree = one->mode && !is_null_oid(&one->oid); + int is_missing = !(one->mode && !is_null_oid(&one->oid)); + int was_missing = !two->mode && is_null_oid(&two->oid); struct cache_entry *ce; + struct cache_entry *ceBefore; + struct checkout state = CHECKOUT_INIT; + + /* + * When using the virtual filesystem feature, the cache entries that are + * added here will not have the skip-worktree bit set. + * + * Without this code there is data that is lost because the files that + * would normally be in the working directory are not there and show as + * deleted for the next status or in the case of added files just disappear. + * We need to create the previous version of the files in the working + * directory so that they will have the right content and the next + * status call will show modified or untracked files correctly. + */ + if (core_virtualfilesystem && !file_exists(two->path)) + { + pos = cache_name_pos(two->path, strlen(two->path)); + if ((pos >= 0 && ce_skip_worktree(active_cache[pos])) && + (is_missing || !was_missing)) + { + state.force = 1; + state.refresh_cache = 1; + state.istate = &the_index; + ceBefore = make_cache_entry(&the_index, two->mode, + &two->oid, two->path, + 0, 0); + if (!ceBefore) + die(_("make_cache_entry failed for path '%s'"), + two->path); + + checkout_entry(ceBefore, &state, NULL, NULL); + respect_skip_worktree = 0; + } + } if (!is_in_reset_tree && !intent_to_add) { remove_file_from_cache(one->path); @@ -162,8 +202,14 @@ static void update_index_from_diff(struct diff_queue_struct *q, * to properly construct the reset sparse directory. */ pos = cache_name_pos(one->path, strlen(one->path)); - if ((pos >= 0 && ce_skip_worktree(active_cache[pos])) || - (pos < 0 && !path_in_sparse_checkout(one->path, &the_index))) + + /* + * Do not add the SKIP_WORKTREE bit back if we populated the + * file on purpose in a virtual filesystem scenario. + */ + if (respect_skip_worktree && + ((pos >= 0 && ce_skip_worktree(active_cache[pos])) || + (pos < 0 && !path_in_sparse_checkout(one->path, &the_index)))) ce->ce_flags |= CE_SKIP_WORKTREE; if (!ce) diff --git a/builtin/rm.c b/builtin/rm.c index b6ba859fe42571..bbf219baf0a7e0 100644 --- a/builtin/rm.c +++ b/builtin/rm.c @@ -304,7 +304,7 @@ int cmd_rm(int argc, const char **argv, const char *prefix) for (i = 0; i < active_nr; i++) { const struct cache_entry *ce = active_cache[i]; - if (!include_sparse && + if (!include_sparse && !core_virtualfilesystem && (ce_skip_worktree(ce) || !path_in_sparse_checkout(ce->name, &the_index))) continue; @@ -341,7 +341,11 @@ int cmd_rm(int argc, const char **argv, const char *prefix) *original ? original : "."); } - if (only_match_skip_worktree.nr) { + /* + * When using a virtual filesystem, we might re-add a path + * that is currently virtual and we want that to succeed. + */ + if (!core_virtualfilesystem && only_match_skip_worktree.nr) { advise_on_updating_sparse_paths(&only_match_skip_worktree); ret = 1; } diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c index 287716db68e419..d82f02597ace98 100644 --- a/builtin/sparse-checkout.c +++ b/builtin/sparse-checkout.c @@ -106,7 +106,7 @@ static int sparse_checkout_list(int argc, const char **argv, const char *prefix) static void clean_tracked_sparse_directories(struct repository *r) { - int i, was_full = 0; + int i, value, was_full = 0; struct strbuf path = STRBUF_INIT; size_t pathlen; struct string_list_item *item; @@ -122,6 +122,13 @@ static void clean_tracked_sparse_directories(struct repository *r) !r->index->sparse_checkout_patterns->use_cone_patterns) return; + /* + * Users can disable this behavior. + */ + if (!repo_config_get_bool(r, "index.deletesparsedirectories", &value) && + !value) + return; + /* * Use the sparse index as a data structure to assist finding * directories that are safe to delete. This conversion to a diff --git a/builtin/update-index.c b/builtin/update-index.c index b62249905f1b80..7c01696690f254 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -5,6 +5,7 @@ */ #define USE_THE_INDEX_COMPATIBILITY_MACROS #include "cache.h" +#include "gvfs.h" #include "bulk-checkin.h" #include "config.h" #include "lockfile.h" @@ -1170,7 +1171,13 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) argc = parse_options_end(&ctx); getline_fn = nul_term_line ? strbuf_getline_nul : strbuf_getline_lf; + if (mark_skip_worktree_only && gvfs_config_is_set(GVFS_BLOCK_COMMANDS)) + die(_("modifying the skip worktree bit is not supported on a GVFS repo")); + if (preferred_index_format) { + if (preferred_index_format != 4 && gvfs_config_is_set(GVFS_BLOCK_COMMANDS)) + die(_("changing the index version is not supported on a GVFS repo")); + if (preferred_index_format < INDEX_FORMAT_LB || INDEX_FORMAT_UB < preferred_index_format) die("index-version %d not in range: %d..%d", @@ -1211,6 +1218,9 @@ int cmd_update_index(int argc, const char **argv, const char *prefix) end_odb_transaction(); if (split_index > 0) { + if (gvfs_config_is_set(GVFS_BLOCK_COMMANDS)) + die(_("split index is not supported on a GVFS repo")); + if (git_config_get_split_index() == 0) warning(_("core.splitIndex is set to false; " "remove or change it, if you really want to " diff --git a/builtin/update-microsoft-git.c b/builtin/update-microsoft-git.c new file mode 100644 index 00000000000000..f943b808615fd7 --- /dev/null +++ b/builtin/update-microsoft-git.c @@ -0,0 +1,73 @@ +#include "builtin.h" +#include "repository.h" +#include "parse-options.h" +#include "run-command.h" +#include "strvec.h" + +#if defined(GIT_WINDOWS_NATIVE) +/* + * On Windows, run 'git update-git-for-windows' which + * is installed by the installer, based on the script + * in git-for-windows/build-extra. + */ +static int platform_specific_upgrade(void) +{ + int res; + struct strvec args = STRVEC_INIT; + + strvec_push(&args, "git-update-git-for-windows"); + res = run_command_v_opt(args.v, 0); + strvec_clear(&args); + return res; +} +#elif defined(__APPLE__) +/* + * On macOS, we expect the user to have the microsoft-git + * cask installed via Homebrew. We check using these + * commands: + * + * 1. 'brew update' to get latest versions. + * 2. 'brew upgrade --cask microsoft-git' to get the + * latest version. + */ +static int platform_specific_upgrade(void) +{ + int res; + struct strvec args = STRVEC_INIT; + + printf("Updating Homebrew with 'brew update'\n"); + + strvec_pushl(&args, "brew", "update", NULL); + res = run_command_v_opt(args.v, 0); + strvec_clear(&args); + + if (res) { + error(_("'brew update' failed; is brew installed?")); + return 1; + } + + printf("Upgrading microsoft-git with 'brew upgrade --cask microsoft-git'\n"); + strvec_pushl(&args, "brew", "upgrade", "--cask", "microsoft-git", NULL); + res = run_command_v_opt(args.v, 0); + strvec_clear(&args); + + return res; +} +#else +static int platform_specific_upgrade(void) +{ + error(_("update-microsoft-git is not supported on this platform")); + return 1; +} +#endif + +static const char builtin_update_microsoft_git_usage[] = + N_("git update-microsoft-git"); + +int cmd_update_microsoft_git(int argc, const char **argv, const char *prefix) +{ + if (argc == 2 && !strcmp(argv[1], "-h")) + usage(builtin_update_microsoft_git_usage); + + return platform_specific_upgrade(); +} diff --git a/cache-tree.c b/cache-tree.c index c97111cccf2eda..af11919fdd7c91 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -1,4 +1,5 @@ #include "cache.h" +#include "gvfs.h" #include "lockfile.h" #include "tree.h" #include "tree-walk.h" @@ -224,7 +225,7 @@ static void discard_unused_subtrees(struct cache_tree *it) } } -int cache_tree_fully_valid(struct cache_tree *it) +static int cache_tree_fully_valid_1(struct cache_tree *it) { int i; if (!it) @@ -232,7 +233,7 @@ int cache_tree_fully_valid(struct cache_tree *it) if (it->entry_count < 0 || !has_object_file(&it->oid)) return 0; for (i = 0; i < it->subtree_nr; i++) { - if (!cache_tree_fully_valid(it->down[i]->cache_tree)) + if (!cache_tree_fully_valid_1(it->down[i]->cache_tree)) return 0; } return 1; @@ -243,6 +244,17 @@ static int must_check_existence(const struct cache_entry *ce) return !(has_promisor_remote() && ce_skip_worktree(ce)); } +int cache_tree_fully_valid(struct cache_tree *it) +{ + int result; + + trace2_region_enter("cache_tree", "fully_valid", NULL); + result = cache_tree_fully_valid_1(it); + trace2_region_leave("cache_tree", "fully_valid", NULL); + + return result; +} + static int update_one(struct cache_tree *it, struct cache_entry **cache, int entries, @@ -252,7 +264,8 @@ static int update_one(struct cache_tree *it, int flags) { struct strbuf buffer; - int missing_ok = flags & WRITE_TREE_MISSING_OK; + int missing_ok = gvfs_config_is_set(GVFS_MISSING_OK) ? + WRITE_TREE_MISSING_OK : (flags & WRITE_TREE_MISSING_OK); int dryrun = flags & WRITE_TREE_DRY_RUN; int repair = flags & WRITE_TREE_REPAIR; int to_invalidate = 0; @@ -421,7 +434,29 @@ static int update_one(struct cache_tree *it, continue; strbuf_grow(&buffer, entlen + 100); - strbuf_addf(&buffer, "%o %.*s%c", mode, entlen, path + baselen, '\0'); + + switch (mode) { + case 0100644: + strbuf_add(&buffer, "100644 ", 7); + break; + case 0100664: + strbuf_add(&buffer, "100664 ", 7); + break; + case 0100755: + strbuf_add(&buffer, "100755 ", 7); + break; + case 0120000: + strbuf_add(&buffer, "120000 ", 7); + break; + case 0160000: + strbuf_add(&buffer, "160000 ", 7); + break; + default: + strbuf_addf(&buffer, "%o ", mode); + break; + } + strbuf_add(&buffer, path + baselen, entlen); + strbuf_addch(&buffer, '\0'); strbuf_add(&buffer, oid->hash, the_hash_algo->rawsz); #if DEBUG_CACHE_TREE @@ -815,14 +850,14 @@ void prime_cache_tree(struct repository *r, { struct strbuf tree_path = STRBUF_INIT; - trace2_region_enter("cache-tree", "prime_cache_tree", the_repository); + trace2_region_enter("cache-tree", "prime_cache_tree", r); cache_tree_free(&istate->cache_tree); istate->cache_tree = cache_tree(); prime_cache_tree_rec(r, istate->cache_tree, tree, &tree_path); strbuf_release(&tree_path); istate->cache_changed |= CACHE_TREE_CHANGED; - trace2_region_leave("cache-tree", "prime_cache_tree", the_repository); + trace2_region_leave("cache-tree", "prime_cache_tree", r); } /* diff --git a/cache.h b/cache.h index e7344004f5c7e0..94d4726d424e3b 100644 --- a/cache.h +++ b/cache.h @@ -240,7 +240,7 @@ static inline unsigned create_ce_flags(unsigned stage) #define ce_namelen(ce) ((ce)->ce_namelen) #define ce_size(ce) cache_entry_size(ce_namelen(ce)) #define ce_stage(ce) ((CE_STAGEMASK & (ce)->ce_flags) >> CE_STAGESHIFT) -#define ce_uptodate(ce) ((ce)->ce_flags & CE_UPTODATE) +#define ce_uptodate(ce) (((ce)->ce_flags & CE_UPTODATE) || ((ce)->ce_flags & CE_FSMONITOR_VALID)) #define ce_skip_worktree(ce) ((ce)->ce_flags & CE_SKIP_WORKTREE) #define ce_mark_uptodate(ce) ((ce)->ce_flags |= CE_UPTODATE) #define ce_intent_to_add(ce) ((ce)->ce_flags & CE_INTENT_TO_ADD) @@ -631,6 +631,30 @@ void set_git_work_tree(const char *tree); #define ALTERNATE_DB_ENVIRONMENT "GIT_ALTERNATE_OBJECT_DIRECTORIES" void setup_work_tree(void); + +/* + * discover_git_directory_reason() is similar to discover_git_directory(), + * except it returns an enum value instead. It is important to note that + * a zero-valued return here is actually GIT_DIR_NONE, which is different + * from discover_git_directory. + */ +enum discovery_result { + GIT_DIR_NONE = 0, + GIT_DIR_EXPLICIT, + GIT_DIR_DISCOVERED, + GIT_DIR_BARE, + /* these are errors */ + GIT_DIR_HIT_CEILING = -1, + GIT_DIR_HIT_MOUNT_POINT = -2, + GIT_DIR_INVALID_GITFILE = -3, + GIT_DIR_INVALID_OWNERSHIP = -4, + GIT_DIR_DISALLOWED_BARE = -5, + GIT_DIR_INVALID_FORMAT = -6, + GIT_DIR_CWD_FAILURE = -7, +}; +enum discovery_result discover_git_directory_reason(struct strbuf *commondir, + struct strbuf *gitdir); + /* * Find the commondir and gitdir of the repository that contains the current * working directory, without changing the working directory or other global @@ -639,8 +663,12 @@ void setup_work_tree(void); * both have the same result appended to the buffer. The return value is * either 0 upon success and non-zero if no repository was found. */ -int discover_git_directory(struct strbuf *commondir, - struct strbuf *gitdir); +static inline int discover_git_directory(struct strbuf *commondir, + struct strbuf *gitdir) +{ + return discover_git_directory_reason(commondir, gitdir) <= 0; +} + const char *setup_git_directory_gently(int *); const char *setup_git_directory(void); char *prefix_path(const char *prefix, int len, const char *path); @@ -809,6 +837,7 @@ int strcmp_offset(const char *s1, const char *s2, size_t *first_change); int index_dir_exists(struct index_state *istate, const char *name, int namelen); void adjust_dirname_case(struct index_state *istate, char *name); struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase); +struct cache_entry *index_file_next_match(struct index_state *istate, struct cache_entry *ce, int igncase); /* * Searches for an entry defined by name and namelen in the given index. @@ -1074,9 +1103,14 @@ enum fsync_method { extern enum fsync_method fsync_method; extern int core_preload_index; +extern const char *core_virtualfilesystem; +extern int core_gvfs; extern int precomposed_unicode; extern int protect_hfs; extern int protect_ntfs; +extern int core_use_gvfs_helper; +extern const char *gvfs_cache_server_url; +extern struct strbuf gvfs_shared_cache_pathname; extern int core_apply_sparse_checkout; extern int core_sparse_checkout_cone; @@ -1094,6 +1128,8 @@ int use_optional_locks(void); extern char comment_line_char; extern int auto_comment_line_char; +extern int core_virtualize_objects; + enum log_refs_config { LOG_REFS_UNSET = -1, LOG_REFS_NONE = 0, diff --git a/compat/mingw.c b/compat/mingw.c index 969534681a5c6e..d63bbabf9b6017 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -3969,6 +3969,8 @@ int wmain(int argc, const wchar_t **wargv) SetConsoleCtrlHandler(handle_ctrl_c, TRUE); + trace2_initialize_clock(); + maybe_redirect_std_handles(); adjust_symlink_flags(); fsync_object_files = 1; diff --git a/config.c b/config.c index 7176f800e1607b..2ec76f6ada59d0 100644 --- a/config.c +++ b/config.c @@ -7,6 +7,7 @@ */ #include "cache.h" #include "date.h" +#include "gvfs.h" #include "branch.h" #include "config.h" #include "environment.h" @@ -23,6 +24,7 @@ #include "color.h" #include "refs.h" #include "worktree.h" +#include "transport.h" struct config_source { struct config_source *prev; @@ -1729,8 +1731,22 @@ int git_default_core_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "core.gvfs")) { + gvfs_load_config_value(value); + return 0; + } + + if (!strcmp(var, "core.usegvfshelper")) { + core_use_gvfs_helper = git_config_bool(var, value); + return 0; + } + if (!strcmp(var, "core.sparsecheckout")) { - core_apply_sparse_checkout = git_config_bool(var, value); + /* virtual file system relies on the sparse checkout logic so force it on */ + if (core_virtualfilesystem) + core_apply_sparse_checkout = 1; + else + core_apply_sparse_checkout = git_config_bool(var, value); return 0; } @@ -1759,6 +1775,11 @@ int git_default_core_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "core.virtualizeobjects")) { + core_virtualize_objects = git_config_bool(var, value); + return 0; + } + /* Add other config variables here and to Documentation/config.txt. */ return platform_core_config(var, value, cb); } @@ -1862,6 +1883,35 @@ static int git_default_mailmap_config(const char *var, const char *value) return 0; } +static int git_default_gvfs_config(const char *var, const char *value) +{ + if (!strcmp(var, "gvfs.cache-server")) { + const char *v2 = NULL; + + if (!git_config_string(&v2, var, value) && v2 && *v2) + gvfs_cache_server_url = transport_anonymize_url(v2); + free((char*)v2); + return 0; + } + + if (!strcmp(var, "gvfs.sharedcache") && value && *value) { + strbuf_setlen(&gvfs_shared_cache_pathname, 0); + strbuf_addstr(&gvfs_shared_cache_pathname, value); + if (strbuf_normalize_path(&gvfs_shared_cache_pathname) < 0) { + /* + * Pretend it wasn't set. This will cause us to + * fallback to ".git/objects" effectively. + */ + strbuf_release(&gvfs_shared_cache_pathname); + return 0; + } + strbuf_trim_trailing_dir_sep(&gvfs_shared_cache_pathname); + return 0; + } + + return 0; +} + int git_default_config(const char *var, const char *value, void *cb) { if (starts_with(var, "core.")) @@ -1911,6 +1961,9 @@ int git_default_config(const char *var, const char *value, void *cb) if (starts_with(var, "sparse.")) return git_default_sparse_config(var, value); + if (starts_with(var, "gvfs.")) + return git_default_gvfs_config(var, value); + /* Add other config variables here and to Documentation/config.txt. */ return 0; } @@ -2789,6 +2842,44 @@ int git_config_get_max_percent_split_change(void) return -1; /* default value */ } +int git_config_get_virtualfilesystem(void) +{ + /* Run only once. */ + static int virtual_filesystem_result = -1; + if (virtual_filesystem_result >= 0) + return virtual_filesystem_result; + + if (git_config_get_pathname("core.virtualfilesystem", &core_virtualfilesystem)) + core_virtualfilesystem = getenv("GIT_VIRTUALFILESYSTEM_TEST"); + + if (core_virtualfilesystem && !*core_virtualfilesystem) + core_virtualfilesystem = NULL; + + if (core_virtualfilesystem) { + /* + * Some git commands spawn helpers and redirect the index to a different + * location. These include "difftool -d" and the sequencer + * (i.e. `git rebase -i`, `git cherry-pick` and `git revert`) and others. + * In those instances we don't want to update their temporary index with + * our virtualization data. + */ + char *default_index_file = xstrfmt("%s/%s", the_repository->gitdir, "index"); + int should_run_hook = !strcmp(default_index_file, the_repository->index_file); + + free(default_index_file); + if (should_run_hook) { + /* virtual file system relies on the sparse checkout logic so force it on */ + core_apply_sparse_checkout = 1; + virtual_filesystem_result = 1; + return 1; + } + core_virtualfilesystem = NULL; + } + + virtual_filesystem_result = 0; + return 0; +} + int git_config_get_index_threads(int *dest) { int is_bool, val; @@ -3202,6 +3293,7 @@ int git_config_set_multivar_in_file_gently(const char *config_filename, const char *value_pattern, unsigned flags) { + static unsigned long timeout_ms = ULONG_MAX; int fd = -1, in_fd = -1; int ret; struct lock_file lock = LOCK_INIT; @@ -3222,11 +3314,16 @@ int git_config_set_multivar_in_file_gently(const char *config_filename, if (!config_filename) config_filename = filename_buf = git_pathdup("config"); + if ((long)timeout_ms < 0 && + git_config_get_ulong("core.configWriteLockTimeoutMS", &timeout_ms)) + timeout_ms = 0; + /* * The lock serves a purpose in addition to locking: the new * contents of .git/config will be written into it. */ - fd = hold_lock_file_for_update(&lock, config_filename, 0); + fd = hold_lock_file_for_update_timeout(&lock, config_filename, 0, + timeout_ms); if (fd < 0) { error_errno(_("could not lock config file %s"), config_filename); ret = CONFIG_NO_LOCK; diff --git a/config.h b/config.h index fe5b7a1ae639b5..e4ebca2970ef3b 100644 --- a/config.h +++ b/config.h @@ -614,6 +614,7 @@ int git_config_get_pathname(const char *key, const char **dest); int git_config_get_index_threads(int *dest); int git_config_get_split_index(void); int git_config_get_max_percent_split_change(void); +int git_config_get_virtualfilesystem(void); /* This dies if the configured or default date is in the future */ int git_config_get_expiry(const char *key, const char **output); diff --git a/connected.c b/connected.c index 74a20cb32e7c7b..9cde030ece78e3 100644 --- a/connected.c +++ b/connected.c @@ -1,4 +1,5 @@ #include "cache.h" +#include "gvfs.h" #include "object-store.h" #include "run-command.h" #include "sigchain.h" @@ -30,6 +31,26 @@ int check_connected(oid_iterate_fn fn, void *cb_data, struct transport *transport; size_t base_len; + /* + * Running a virtual file system there will be objects that are + * missing locally and we don't want to download a bunch of + * commits, trees, and blobs just to make sure everything is + * reachable locally so this option will skip reachablility + * checks below that use rev-list. This will stop the check + * before uploadpack runs to determine if there is anything to + * fetch. Returning zero for the first check will also prevent the + * uploadpack from happening. It will also skip the check after + * the fetch is finished to make sure all the objects where + * downloaded in the pack file. This will allow the fetch to + * run and get all the latest tip commit ids for all the branches + * in the fetch but not pull down commits, trees, or blobs via + * upload pack. + */ + if (gvfs_config_is_set(GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK)) + return 0; + if (core_virtualize_objects) + return 0; + if (!opt) opt = &defaults; transport = opt->transport; diff --git a/contrib/buildsystems/CMakeLists.txt b/contrib/buildsystems/CMakeLists.txt index 4f366765f75f19..59a13278331a26 100644 --- a/contrib/buildsystems/CMakeLists.txt +++ b/contrib/buildsystems/CMakeLists.txt @@ -643,7 +643,7 @@ if(NOT CURL_FOUND) add_compile_definitions(NO_CURL) message(WARNING "git-http-push and git-http-fetch will not be built") else() - list(APPEND PROGRAMS_BUILT git-http-fetch git-http-push git-imap-send git-remote-http) + list(APPEND PROGRAMS_BUILT git-http-fetch git-http-push git-imap-send git-remote-http git-gvfs-helper) if(CURL_VERSION_STRING VERSION_GREATER_EQUAL 7.34.0) add_compile_definitions(USE_CURL_FOR_IMAP_SEND) endif() @@ -792,7 +792,7 @@ target_link_libraries(git-sh-i18n--envsubst common-main) add_executable(git-shell ${CMAKE_SOURCE_DIR}/shell.c) target_link_libraries(git-shell common-main) -add_executable(scalar ${CMAKE_SOURCE_DIR}/scalar.c) +add_executable(scalar ${CMAKE_SOURCE_DIR}/scalar.c ${CMAKE_SOURCE_DIR}/json-parser.c) target_link_libraries(scalar common-main) if(CURL_FOUND) @@ -811,6 +811,9 @@ if(CURL_FOUND) add_executable(git-http-push ${CMAKE_SOURCE_DIR}/http-push.c) target_link_libraries(git-http-push http_obj common-main ${CURL_LIBRARIES} ${EXPAT_LIBRARIES}) endif() + + add_executable(git-gvfs-helper ${CMAKE_SOURCE_DIR}/gvfs-helper.c) + target_link_libraries(git-gvfs-helper http_obj common-main ${CURL_LIBRARIES} ) endif() parse_makefile_for_executables(git_builtin_extra "BUILT_INS") @@ -1020,6 +1023,20 @@ set(wrapper_scripts set(wrapper_test_scripts test-fake-ssh test-tool) +if(CURL_FOUND) + list(APPEND wrapper_test_scripts test-gvfs-protocol) + + add_executable(test-gvfs-protocol ${CMAKE_SOURCE_DIR}/t/helper/test-gvfs-protocol.c) + target_link_libraries(test-gvfs-protocol common-main) + + if(MSVC) + set_target_properties(test-gvfs-protocol + PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_BINARY_DIR}/t/helper) + set_target_properties(test-gvfs-protocol + PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_BINARY_DIR}/t/helper) + endif() +endif() + foreach(script ${wrapper_scripts}) file(STRINGS ${CMAKE_SOURCE_DIR}/wrap-for-bin.sh content NEWLINE_CONSUME) diff --git a/contrib/completion/git-completion.bash b/contrib/completion/git-completion.bash index ba5c395d2d804f..4bb5b28fd266d3 100644 --- a/contrib/completion/git-completion.bash +++ b/contrib/completion/git-completion.bash @@ -1655,7 +1655,7 @@ _git_clone () esac } -__git_untracked_file_modes="all no normal" +__git_untracked_file_modes="all no normal complete" _git_commit () { diff --git a/contrib/long-running-read-object/example.pl b/contrib/long-running-read-object/example.pl new file mode 100644 index 00000000000000..b8f37f836a813c --- /dev/null +++ b/contrib/long-running-read-object/example.pl @@ -0,0 +1,114 @@ +#!/usr/bin/perl +# +# Example implementation for the Git read-object protocol version 1 +# See Documentation/technical/read-object-protocol.txt +# +# Allows you to test the ability for blobs to be pulled from a host git repo +# "on demand." Called when git needs a blob it couldn't find locally due to +# a lazy clone that only cloned the commits and trees. +# +# A lazy clone can be simulated via the following commands from the host repo +# you wish to create a lazy clone of: +# +# cd /host_repo +# git rev-parse HEAD +# git init /guest_repo +# git cat-file --batch-check --batch-all-objects | grep -v 'blob' | +# cut -d' ' -f1 | git pack-objects /guest_repo/.git/objects/pack/noblobs +# cd /guest_repo +# git config core.virtualizeobjects true +# git reset --hard +# +# Please note, this sample is a minimal skeleton. No proper error handling +# was implemented. +# + +use strict; +use warnings; + +# +# Point $DIR to the folder where your host git repo is located so we can pull +# missing objects from it +# +my $DIR = "/host_repo/.git/"; + +sub packet_bin_read { + my $buffer; + my $bytes_read = read STDIN, $buffer, 4; + if ( $bytes_read == 0 ) { + + # EOF - Git stopped talking to us! + exit(); + } + elsif ( $bytes_read != 4 ) { + die "invalid packet: '$buffer'"; + } + my $pkt_size = hex($buffer); + if ( $pkt_size == 0 ) { + return ( 1, "" ); + } + elsif ( $pkt_size > 4 ) { + my $content_size = $pkt_size - 4; + $bytes_read = read STDIN, $buffer, $content_size; + if ( $bytes_read != $content_size ) { + die "invalid packet ($content_size bytes expected; $bytes_read bytes read)"; + } + return ( 0, $buffer ); + } + else { + die "invalid packet size: $pkt_size"; + } +} + +sub packet_txt_read { + my ( $res, $buf ) = packet_bin_read(); + unless ( $buf =~ s/\n$// ) { + die "A non-binary line MUST be terminated by an LF."; + } + return ( $res, $buf ); +} + +sub packet_bin_write { + my $buf = shift; + print STDOUT sprintf( "%04x", length($buf) + 4 ); + print STDOUT $buf; + STDOUT->flush(); +} + +sub packet_txt_write { + packet_bin_write( $_[0] . "\n" ); +} + +sub packet_flush { + print STDOUT sprintf( "%04x", 0 ); + STDOUT->flush(); +} + +( packet_txt_read() eq ( 0, "git-read-object-client" ) ) || die "bad initialize"; +( packet_txt_read() eq ( 0, "version=1" ) ) || die "bad version"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad version end"; + +packet_txt_write("git-read-object-server"); +packet_txt_write("version=1"); +packet_flush(); + +( packet_txt_read() eq ( 0, "capability=get" ) ) || die "bad capability"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad capability end"; + +packet_txt_write("capability=get"); +packet_flush(); + +while (1) { + my ($command) = packet_txt_read() =~ /^command=([^=]+)$/; + + if ( $command eq "get" ) { + my ($sha1) = packet_txt_read() =~ /^sha1=([0-9a-f]{40})$/; + packet_bin_read(); + + system ('git --git-dir="' . $DIR . '" cat-file blob ' . $sha1 . ' | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1'); + packet_txt_write(($?) ? "status=error" : "status=success"); + packet_flush(); + } else { + die "bad command '$command'"; + } +} diff --git a/contrib/scalar/docs/faq.md b/contrib/scalar/docs/faq.md new file mode 100644 index 00000000000000..a14f78a996d5d5 --- /dev/null +++ b/contrib/scalar/docs/faq.md @@ -0,0 +1,51 @@ +Frequently Asked Questions +========================== + +Using Scalar +------------ + +### I don't want a sparse clone, I want every file after I clone! + +Run `scalar clone --full-clone ` to initialize your repo to include +every file. You can switch to a sparse-checkout later by running +`git sparse-checkout init --cone`. + +### I already cloned without `--full-clone`. How do I get everything? + +Run `git sparse-checkout disable`. + +Scalar Design Decisions +----------------------- + +There may be many design decisions within Scalar that are confusing at first +glance. Some of them may cause friction when you use Scalar with your existing +repos and existing habits. + +> Scalar has the most benefit when users design repositories +> with efficient patterns. + +For example: Scalar uses the sparse-checkout feature to limit the size of the +working directory within a large monorepo. It is designed to work efficiently +with monorepos that are highly componentized, allowing most developers to +need many fewer files in their daily work. + +### Why does `scalar clone` create a `/src` folder? + +Scalar uses a file system watcher to keep track of changes under this `src` folder. +Any activity in this folder is assumed to be important to Git operations. By +creating the `src` folder, we are making it easy for your build system to +create output folders outside the `src` directory. We commonly see systems +create folders for build outputs and package downloads. Scalar itself creates +these folders during its builds. + +Your build system may create build artifacts such as `.obj` or `.lib` files +next to your source code. These are commonly "hidden" from Git using +`.gitignore` files. Having such artifacts in your source tree creates +additional work for Git because it needs to look at these files and match them +against the `.gitignore` patterns. + +By following the `src` pattern Scalar tries to establish and placing your build +intermediates and outputs parallel with the `src` folder and not inside it, +you can help optimize Git command performance for developers in the repository +by limiting the number of files Git needs to consider for many common +operations. diff --git a/contrib/scalar/docs/getting-started.md b/contrib/scalar/docs/getting-started.md new file mode 100644 index 00000000000000..d5125330320d2c --- /dev/null +++ b/contrib/scalar/docs/getting-started.md @@ -0,0 +1,109 @@ +Getting Started +=============== + +Registering existing Git repos +------------------------------ + +To add a repository to the list of registered repos, run `scalar register []`. +If `` is not provided, then the "current repository" is discovered from +the working directory by scanning the parent paths for a path containing a `.git` +folder, possibly inside a `src` folder. + +To see which repositories are currently tracked by the service, run +`scalar list`. + +Run `scalar unregister []` to remove the repo from this list. + +Creating a new Scalar clone +--------------------------------------------------- + +The `clone` verb creates a local enlistment of a remote repository using the +partial clone feature available e.g. on GitHub, or using the +[GVFS protocol](https://github.com/microsoft/VFSForGit/blob/HEAD/Protocol.md), +such as Azure Repos. + +``` +scalar clone [options] [] +``` + +Create a local copy of the repository at ``. If specified, create the `` +directory and place the repository there. Otherwise, the last section of the `` +will be used for ``. + +At the end, the repo is located at `/src`. By default, the sparse-checkout +feature is enabled and the only files present are those in the root of your +Git repository. Use `git sparse-checkout set` to expand the set of directories +you want to see, or `git sparse-checkout disable` to expand to all files. You +can explore the subdirectories outside your sparse-checkout specification using +`git ls-tree HEAD`. + +### Sparse Repo Mode + +By default, Scalar reduces your working directory to only the files at the +root of the repository. You need to add the folders you care about to build up +to your working set. + +* `scalar clone ` + * Please choose the **Clone with HTTPS** option in the `Clone Repository` dialog in Azure Repos, not **Clone with SSH**. +* `cd \src` +* At this point, your `src` directory only contains files that appear in your root + tree. No folders are populated. +* Set the directory list for your sparse-checkout using: + 1. `git sparse-checkout set ...` + 2. `git sparse-checkout set --stdin < dir-list.txt` +* Run git commands as you normally would. +* To fully populate your working directory, run `git sparse-checkout disable`. + +If instead you want to start with all files on-disk, you can clone with the +`--full-clone` option. To enable sparse-checkout after the fact, run +`git sparse-checkout init --cone`. This will initialize your sparse-checkout +patterns to only match the files at root. + +If you are unfamiliar with what directories are available in the repository, +then you can run `git ls-tree -d --name-only HEAD` to discover the directories +at root, or `git ls-tree -d --name-only HEAD ` to discover the directories +in ``. + +### Options + +These options allow a user to customize their initial enlistment. + +* `--full-clone`: If specified, do not initialize the sparse-checkout feature. + All files will be present in your `src` directory. This behaves very similar + to a Git partial clone in that blobs are downloaded on demand. However, it + will use the GVFS protocol to download all Git objects. + +* `--cache-server-url=`: If specified, set the intended cache server to + the specified ``. All object queries will use the GVFS protocol to this + `` instead of the origin remote. If the remote supplies a list of + cache servers via the `/gvfs/config` endpoint, then the `clone` command + will select a nearby cache server from that list. + +* `--branch=`: Specify the branch to checkout after clone. + +* `--local-cache-path=`: Use this option to override the path for the + local Scalar cache. If not specified, then Scalar will select a default + path to share objects with your other enlistments. On Windows, this path + is a subdirectory of `:\.scalarCache\`. On Mac, this path is a + subdirectory of `~/.scalarCache/`. The default cache path is recommended so + multiple enlistments of the same remote repository share objects on the + same device. + +### Advanced Options + +The options below are not intended for use by a typical user. These are +usually used by build machines to create a temporary enlistment that +operates on a single commit. + +* `--single-branch`: Use this option to only download metadata for the branch + that will be checked out. This is helpful for build machines that target + a remote with many branches. Any `git fetch` commands after the clone will + still ask for all branches. + +Removing a Scalar Clone +----------------------- + +Since the `scalar clone` command sets up a file-system watcher (when available), +that watcher could prevent deleting the enlistment. Run `scalar delete ` +from outside of your enlistment to unregister the enlistment from the filesystem +watcher and delete the enlistment at ``. diff --git a/contrib/scalar/docs/index.md b/contrib/scalar/docs/index.md new file mode 100644 index 00000000000000..4f56e2b0ebbac6 --- /dev/null +++ b/contrib/scalar/docs/index.md @@ -0,0 +1,54 @@ +Scalar: Enabling Git at Scale +============================= + +Scalar is a tool that helps Git scale to some of the largest Git repositories. +It achieves this by enabling some advanced Git features, such as: + +* *Partial clone:* reduces time to get a working repository by not + downloading all Git objects right away. + +* *Background prefetch:* downloads Git object data from all remotes every + hour, reducing the amount of time for foreground `git fetch` calls. + +* *Sparse-checkout:* limits the size of your working directory. + +* *File system monitor:* tracks the recently modified files and eliminates + the need for Git to scan the entire worktree. + +* *Commit-graph:* accelerates commit walks and reachability calculations, + speeding up commands like `git log`. + +* *Multi-pack-index:* enables fast object lookups across many pack-files. + +* *Incremental repack:* Repacks the packed Git data into fewer pack-file + without disrupting concurrent commands by using the multi-pack-index. + +By running `scalar register` in any Git repo, Scalar will automatically enable +these features for that repo (except partial clone) and start running suggested +maintenance in the background using +[the `git maintenance` feature](https://git-scm.com/docs/git-maintenance). + +Repos cloned with the `scalar clone` command use partial clone or the +[GVFS protocol](https://github.com/microsoft/VFSForGit/blob/HEAD/Protocol.md) +to significantly reduce the amount of data required to get started +using a repository. By delaying all blob downloads until they are required, +Scalar allows you to work with very large repositories quickly. The GVFS +protocol allows a network of _cache servers_ to serve objects with lower +latency and higher throughput. The cache servers also reduce load on the +central server. + +Documentation +------------- + +* [Getting Started](getting-started.md): Get started with Scalar. + Includes `scalar register`, `scalar unregister`, `scalar clone`, and + `scalar delete`. + +* [Troubleshooting](troubleshooting.md): + Collect diagnostic information or update custom settings. Includes + `scalar diagnose` and `scalar cache-server`. + +* [The Philosophy of Scalar](philosophy.md): Why does Scalar work the way + it does, and how do we make decisions about its future? + +* [Frequently Asked Questions](faq.md) diff --git a/contrib/scalar/docs/philosophy.md b/contrib/scalar/docs/philosophy.md new file mode 100644 index 00000000000000..e3dfa025a2504c --- /dev/null +++ b/contrib/scalar/docs/philosophy.md @@ -0,0 +1,71 @@ +The Philosophy of Scalar +======================== + +The team building Scalar has **opinions** about Git performance. Scalar +takes out the guesswork by automatically configuring your Git repositories +to take advantage of the latest and greatest features. It is difficult to +say that these are the absolute best settings for every repository, but +these settings do work for some of the largest repositories in the world. + +Scalar intends to do very little more than the standard Git client. We +actively implement new features into Git instead of Scalar, then update +Scalar only to configure those new settings. In particular, we ported +features like background maintenance to Git to make Scalar simpler and +make Git more powerful. + +Scalar ships inside [a custom version of Git][microsoft-git], but we are +working to make it available in other forks of Git. The only feature +that is not intended to ever reach the standard Git client is Scalar's use +of [the GVFS Protocol][gvfs-protocol], which is essentially an older +version of [Git's partial clone feature](https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/) +that was available first in Azure Repos. Services such as GitHub support +only partial clone instead of the GVFS protocol because that is the +standard adopted by the Git project. If your hosting service supports +partial clone, then we absolutely recommend it as a way to greatly speed +up your clone and fetch times and to reduce how much disk space your Git +repository requires. Scalar will help with this! + +If you don't use the GVFS Protocol, then most of the value of Scalar can +be found in the core Git client. However, most of the advanced features +that really optimize Git's performance are off by default for compatibility +reasons. To really take advantage of Git's latest and greatest features, +you either need to study the [`git config` documentation](https://git-scm.com/docs/git-config) +and regularly read [the Git release notes](https://github.com/git/git/tree/master/Documentation/RelNotes). +Even if you do all that work and customize your Git settings on your machines, +you likely will want to share those settings with other team members. +Or, you can just use Scalar! + +Using `scalar register` on an existing Git repository will give you these +benefits: + +* Additional compression of your `.git/index` file. +* Hourly background `git fetch` operations, keeping you in-sync with your + remotes. +* Advanced data structures, such as the `commit-graph` and `multi-pack-index` + are updated automatically in the background. +* If using macOS or Windows, then Scalar configures Git's builtin File System + Monitor, providing faster commands such as `git status` or `git add`. + +Additionally, if you use `scalar clone` to create a new repository, then +you will automatically get these benefits: + +* Use Git's partial clone feature to only download the files you need for + your current checkout. +* Use Git's [sparse-checkout feature][sparse-checkout] to minimize the + number of files required in your working directory. + [Read more about sparse-checkout here.][sparse-checkout-blog] +* Create the Git repository inside `/src` to make it easy to + place build artifacts outside of the Git repository, such as in + `/bin` or `/packages`. + +We also admit that these **opinions** can always be improved! If you have +an idea of how to improve our setup, consider +[creating an issue](https://github.com/microsoft/scalar/issues/new) or +contributing a pull request! Some [existing](https://github.com/microsoft/scalar/issues/382) +[issues](https://github.com/microsoft/scalar/issues/388) have already +improved our configuration settings and roadmap! + +[gvfs-protocol]: https://github.com/microsoft/VFSForGit/blob/HEAD/Protocol.md +[microsoft-git]: https://github.com/microsoft/git +[sparse-checkout]: https://git-scm.com/docs/git-sparse-checkout +[sparse-checkout-blog]: https://github.blog/2020-01-17-bring-your-monorepo-down-to-size-with-sparse-checkout/ diff --git a/contrib/scalar/docs/troubleshooting.md b/contrib/scalar/docs/troubleshooting.md new file mode 100644 index 00000000000000..c54d2438f22523 --- /dev/null +++ b/contrib/scalar/docs/troubleshooting.md @@ -0,0 +1,40 @@ +Troubleshooting +=============== + +Diagnosing Issues +----------------- + +The `scalar diagnose` command collects logs and config details for the current +repository. The resulting zip file helps root-cause issues. + +When run inside your repository, creates a zip file containing several important +files for that repository. This includes: + +* Configuration files from your `.git` folder, such as the `config` file, + `index`, `hooks`, and `refs`. + +* A summary of your Git object database, including the number of loose objects + and the names and sizes of pack-files. + +As the `diagnose` command completes, it provides the path of the resulting +zip file. This zip can be attached to bug reports to make the analysis easier. + +Modifying Configuration Values +------------------------------ + +The Scalar-specific configuration is only available for repos using the +GVFS protocol. + +### Cache Server URL + +When using an enlistment cloned with `scalar clone` and the GVFS protocol, +you will have a value called the cache server URL. Cache servers are a feature +of the GVFS protocol to provide low-latency access to the on-demand object +requests. This modifies the `gvfs.cache-server` setting in your local Git config +file. + +Run `scalar cache-server --get` to see the current cache server. + +Run `scalar cache-server --list` to see the available cache server URLs. + +Run `scalar cache-server --set=` to set your cache server to ``. diff --git a/convert.c b/convert.c index 95e6a5244fc26c..7707038b1580b3 100644 --- a/convert.c +++ b/convert.c @@ -1,4 +1,5 @@ #include "cache.h" +#include "gvfs.h" #include "config.h" #include "object-store.h" #include "attr.h" @@ -548,6 +549,9 @@ static int crlf_to_git(struct index_state *istate, if (!buf) return 1; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("CRLF conversions not supported when running under GVFS"); + /* only grow if not in place */ if (strbuf_avail(buf) + buf->len < len) strbuf_grow(buf, len - buf->len); @@ -587,6 +591,9 @@ static int crlf_to_worktree(const char *src, size_t len, struct strbuf *buf, if (!will_convert_lf_to_crlf(&stats, crlf_action)) return 0; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("CRLF conversions not supported when running under GVFS"); + /* are we "faking" in place editing ? */ if (src == buf->buf) to_free = strbuf_detach(buf, NULL); @@ -698,6 +705,9 @@ static int apply_single_file_filter(const char *path, const char *src, size_t le struct async async; struct filter_params params; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("Filter \"%s\" not supported when running under GVFS", cmd); + memset(&async, 0, sizeof(async)); async.proc = filter_buffer_or_fd; async.data = ¶ms; @@ -1109,6 +1119,9 @@ static int ident_to_git(const char *src, size_t len, if (!buf) return 1; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("ident conversions not supported when running under GVFS"); + /* only grow if not in place */ if (strbuf_avail(buf) + buf->len < len) strbuf_grow(buf, len - buf->len); @@ -1156,6 +1169,9 @@ static int ident_to_worktree(const char *src, size_t len, if (!cnt) return 0; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("ident conversions not supported when running under GVFS"); + /* are we "faking" in place editing ? */ if (src == buf->buf) to_free = strbuf_detach(buf, NULL); @@ -1605,6 +1621,9 @@ static int lf_to_crlf_filter_fn(struct stream_filter *filter, size_t count, o = 0; struct lf_to_crlf_filter *lf_to_crlf = (struct lf_to_crlf_filter *)filter; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("CRLF conversions not supported when running under GVFS"); + /* * We may be holding onto the CR to see if it is followed by a * LF, in which case we would need to go to the main loop. @@ -1849,6 +1868,9 @@ static int ident_filter_fn(struct stream_filter *filter, struct ident_filter *ident = (struct ident_filter *)filter; static const char head[] = "$Id"; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("ident conversions not supported when running under GVFS"); + if (!input) { /* drain upon eof */ switch (ident->state) { diff --git a/credential.c b/credential.c index f6389a50684a6e..2c981321ca2b12 100644 --- a/credential.c +++ b/credential.c @@ -286,6 +286,8 @@ static int run_credential_helper(struct credential *c, else helper.no_stdout = 1; + helper.trace2_child_class = "cred"; + if (start_command(&helper) < 0) return -1; diff --git a/csum-file.c b/csum-file.c index 59ef3398ca2b01..36e581588764b6 100644 --- a/csum-file.c +++ b/csum-file.c @@ -45,7 +45,8 @@ void hashflush(struct hashfile *f) unsigned offset = f->offset; if (offset) { - the_hash_algo->update_fn(&f->ctx, f->buffer, offset); + if (!f->skip_hash) + the_hash_algo->update_fn(&f->ctx, f->buffer, offset); flush(f, f->buffer, offset); f->offset = 0; } @@ -64,7 +65,16 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result, int fd; hashflush(f); - the_hash_algo->final_fn(f->buffer, &f->ctx); + + /* + * If we skip the hash function, be sure to create an empty hash + * for the results. + */ + if (f->skip_hash) + memset(f->buffer, 0, the_hash_algo->rawsz); + else + the_hash_algo->final_fn(f->buffer, &f->ctx); + if (result) hashcpy(result, f->buffer); if (flags & CSUM_HASH_IN_STREAM) @@ -158,6 +168,7 @@ static struct hashfile *hashfd_internal(int fd, const char *name, f->buffer_len = buffer_len; f->buffer = xmalloc(buffer_len); f->check_buffer = NULL; + f->skip_hash = 0; return f; } diff --git a/csum-file.h b/csum-file.h index 0d29f528fbcb51..1dfd21d72fdd93 100644 --- a/csum-file.h +++ b/csum-file.h @@ -20,6 +20,13 @@ struct hashfile { size_t buffer_len; unsigned char *buffer; unsigned char *check_buffer; + + /* + * If set to 1, skip_hash indicates that we should + * not actually compute the hash for this hashfile and + * instead only use it as a buffered write. + */ + int skip_hash; }; /* Checkpoint */ diff --git a/diagnose.c b/diagnose.c index 8f2656989666b6..52c4df114d5d04 100644 --- a/diagnose.c +++ b/diagnose.c @@ -7,6 +7,7 @@ #include "strvec.h" #include "object-store.h" #include "packfile.h" +#include "config.h" struct archive_dir { const char *path; @@ -102,6 +103,39 @@ static unsigned char get_dtype(struct dirent *e, struct strbuf *path) return dtype; } +static void dir_stats(struct strbuf *buf, const char *path) +{ + DIR *dir = opendir(path); + struct dirent *e; + struct stat e_stat; + struct strbuf file_path = STRBUF_INIT; + size_t base_path_len; + + if (!dir) + return; + + strbuf_addstr(buf, "Contents of "); + strbuf_add_absolute_path(buf, path); + strbuf_addstr(buf, ":\n"); + + strbuf_add_absolute_path(&file_path, path); + strbuf_addch(&file_path, '/'); + base_path_len = file_path.len; + + while ((e = readdir(dir)) != NULL) + if (!is_dot_or_dotdot(e->d_name) && e->d_type == DT_REG) { + strbuf_setlen(&file_path, base_path_len); + strbuf_addstr(&file_path, e->d_name); + if (!stat(file_path.buf, &e_stat)) + strbuf_addf(buf, "%-70s %16"PRIuMAX"\n", + e->d_name, + (uintmax_t)e_stat.st_size); + } + + strbuf_release(&file_path); + closedir(dir); +} + static int count_files(struct strbuf *path) { DIR *dir = opendir(path->buf); @@ -214,7 +248,8 @@ int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode) struct strvec archiver_args = STRVEC_INIT; char **argv_copy = NULL; int stdout_fd = -1, archiver_fd = -1; - struct strbuf buf = STRBUF_INIT; + char *cache_server_url = NULL, *shared_cache = NULL; + struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; int res, i; struct archive_dir archive_dirs[] = { { ".git", 0 }, @@ -249,6 +284,13 @@ int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode) get_version_info(&buf, 1); strbuf_addf(&buf, "Repository root: %s\n", the_repository->worktree); + + git_config_get_string("gvfs.cache-server", &cache_server_url); + git_config_get_string("gvfs.sharedCache", &shared_cache); + strbuf_addf(&buf, "Cache Server: %s\nLocal Cache: %s\n\n", + cache_server_url ? cache_server_url : "None", + shared_cache ? shared_cache : "None"); + get_disk_info(&buf); write_or_die(stdout_fd, buf.buf, buf.len); strvec_pushf(&archiver_args, @@ -279,6 +321,41 @@ int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode) } } + if (shared_cache) { + strbuf_reset(&buf); + strbuf_addf(&path, "%s/pack", shared_cache); + strbuf_reset(&buf); + strbuf_addstr(&buf, "--add-virtual-file=packs-cached.txt:"); + dir_stats(&buf, path.buf); + strvec_push(&archiver_args, buf.buf); + + strbuf_reset(&buf); + strbuf_addstr(&buf, "--add-virtual-file=objects-cached.txt:"); + loose_objs_stats(&buf, shared_cache); + strvec_push(&archiver_args, buf.buf); + + strbuf_reset(&path); + strbuf_addf(&path, "%s/info", shared_cache); + if (is_directory(path.buf)) { + DIR *dir = opendir(path.buf); + struct dirent *e; + + while ((e = readdir(dir))) { + if (!strcmp(".", e->d_name) || !strcmp("..", e->d_name)) + continue; + + strbuf_reset(&buf); + strbuf_addf(&buf, "--add-virtual-file=info/%s:", e->d_name); + if (strbuf_read_file(&buf, path.buf, 0) < 0) { + res = error_errno(_("could not read '%s'"), path.buf); + goto diagnose_cleanup; + } + strvec_push(&archiver_args, buf.buf); + } + closedir(dir); + } + } + strvec_pushl(&archiver_args, "--prefix=", oid_to_hex(the_hash_algo->empty_tree), "--", NULL); @@ -292,10 +369,13 @@ int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode) goto diagnose_cleanup; } - fprintf(stderr, "\n" - "Diagnostics complete.\n" - "All of the gathered info is captured in '%s'\n", - zip_path->buf); + strbuf_reset(&buf); + strbuf_addf(&buf, "\n" + "Diagnostics complete.\n" + "All of the gathered info is captured in '%s'\n", + zip_path->buf); + write_or_die(stdout_fd, buf.buf, buf.len); + write_or_die(2, buf.buf, buf.len); diagnose_cleanup: if (archiver_fd >= 0) { @@ -306,6 +386,8 @@ int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode) free(argv_copy); strvec_clear(&archiver_args); strbuf_release(&buf); + free(cache_server_url); + free(shared_cache); return res; } diff --git a/diff.c b/diff.c index 648f6717a5597c..56d552bc8363b5 100644 --- a/diff.c +++ b/diff.c @@ -3922,6 +3922,13 @@ static int reuse_worktree_file(struct index_state *istate, if (!FAST_WORKING_DIRECTORY && !want_file && has_object_pack(oid)) return 0; + /* + * If this path does not match our sparse-checkout definition, + * then the file will not be in the working directory. + */ + if (!path_in_sparse_checkout(name, istate)) + return 0; + /* * Similarly, if we'd have to convert the file contents anyway, that * makes the optimization not worthwhile. diff --git a/dir.c b/dir.c index 08d560657486b0..c4c00c2ad03c74 100644 --- a/dir.c +++ b/dir.c @@ -6,6 +6,7 @@ * Junio Hamano, 2005-2006 */ #include "cache.h" +#include "virtualfilesystem.h" #include "config.h" #include "dir.h" #include "object-store.h" @@ -1414,6 +1415,17 @@ enum pattern_match_result path_matches_pattern_list( int result = NOT_MATCHED; size_t slash_pos; + /* + * The virtual file system data is used to prevent git from traversing + * any part of the tree that is not in the virtual file system. Return + * 1 to exclude the entry if it is not found in the virtual file system, + * else fall through to the regular excludes logic as it may further exclude. + */ + if (*dtype == DT_UNKNOWN) + *dtype = resolve_dtype(DT_UNKNOWN, istate, pathname, pathlen); + if (is_excluded_from_virtualfilesystem(pathname, pathlen, *dtype) > 0) + return 1; + if (!pl->use_cone_patterns) { pattern = last_matching_pattern_from_list(pathname, pathlen, basename, dtype, pl, istate); @@ -1503,6 +1515,13 @@ static int path_in_sparse_checkout_1(const char *path, enum pattern_match_result match = UNDECIDED; const char *end, *slash; + /* + * When using a virtual filesystem, there aren't really patterns + * to follow, but be extra careful to skip this check. + */ + if (core_virtualfilesystem) + return 1; + /* * We default to accepting a path if the path is empty, there are no * patterns, or the patterns are of the wrong type. @@ -1757,8 +1776,20 @@ struct path_pattern *last_matching_pattern(struct dir_struct *dir, int is_excluded(struct dir_struct *dir, struct index_state *istate, const char *pathname, int *dtype_p) { - struct path_pattern *pattern = - last_matching_pattern(dir, istate, pathname, dtype_p); + struct path_pattern *pattern; + + /* + * The virtual file system data is used to prevent git from traversing + * any part of the tree that is not in the virtual file system. Return + * 1 to exclude the entry if it is not found in the virtual file system, + * else fall through to the regular excludes logic as it may further exclude. + */ + if (*dtype_p == DT_UNKNOWN) + *dtype_p = resolve_dtype(DT_UNKNOWN, istate, pathname, strlen(pathname)); + if (is_excluded_from_virtualfilesystem(pathname, strlen(pathname), *dtype_p) > 0) + return 1; + + pattern = last_matching_pattern(dir, istate, pathname, dtype_p); if (pattern) return pattern->flags & PATTERN_FLAG_NEGATIVE ? 0 : 1; return 0; @@ -2344,6 +2375,8 @@ static enum path_treatment treat_path(struct dir_struct *dir, ignore_case); if (dtype != DT_DIR && has_path_in_index) return path_none; + if (is_excluded_from_virtualfilesystem(path->buf, path->len, dtype) > 0) + return path_excluded; /* * When we are looking at a directory P in the working tree, @@ -2548,6 +2581,8 @@ static void add_path_to_appropriate_result_list(struct dir_struct *dir, /* add the path to the appropriate result list */ switch (state) { case path_excluded: + if (is_excluded_from_virtualfilesystem(path->buf, path->len, DT_DIR) > 0) + break; if (dir->flags & DIR_SHOW_IGNORED) dir_add_name(dir, istate, path->buf, path->len); else if ((dir->flags & DIR_SHOW_IGNORED_TOO) || @@ -3094,6 +3129,8 @@ static int cmp_icase(char a, char b) { if (a == b) return 0; + if (is_dir_sep(a)) + return is_dir_sep(b) ? 0 : -1; if (ignore_case) return toupper(a) - toupper(b); return a - b; diff --git a/environment.c b/environment.c index 18d042b467d26a..2289175605be76 100644 --- a/environment.c +++ b/environment.c @@ -72,9 +72,12 @@ int grafts_replace_parents = 1; int core_apply_sparse_checkout; int core_sparse_checkout_cone; int sparse_expect_files_outside_of_patterns; +int core_gvfs; +const char *core_virtualfilesystem; int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ unsigned long pack_size_limit_cfg; +int core_virtualize_objects; enum log_refs_config log_all_ref_updates = LOG_REFS_UNSET; #ifndef PROTECT_HFS_DEFAULT @@ -86,6 +89,9 @@ int protect_hfs = PROTECT_HFS_DEFAULT; #define PROTECT_NTFS_DEFAULT 1 #endif int protect_ntfs = PROTECT_NTFS_DEFAULT; +int core_use_gvfs_helper; +const char *gvfs_cache_server_url; +struct strbuf gvfs_shared_cache_pathname = STRBUF_INIT; /* * The character that begins a commented line in user-editable file diff --git a/git.c b/git.c index da411c53822a18..f591a6ef8be5db 100644 --- a/git.c +++ b/git.c @@ -1,10 +1,13 @@ #include "builtin.h" +#include "gvfs.h" #include "config.h" #include "exec-cmd.h" #include "help.h" #include "run-command.h" #include "alias.h" #include "shallow.h" +#include "dir.h" +#include "hook.h" #define RUN_SETUP (1<<0) #define RUN_SETUP_GENTLY (1<<1) @@ -17,6 +20,7 @@ #define SUPPORT_SUPER_PREFIX (1<<4) #define DELAY_PAGER_CONFIG (1<<5) #define NO_PARSEOPT (1<<6) /* parse-options is not used */ +#define BLOCK_ON_GVFS_REPO (1<<7) /* command not allowed in GVFS repos */ struct cmd_struct { const char *cmd; @@ -417,6 +421,68 @@ static int handle_alias(int *argcp, const char ***argv) return ret; } +/* Runs pre/post-command hook */ +static struct strvec sargv = STRVEC_INIT; +static int run_post_hook = 0; +static int exit_code = -1; + +static int run_pre_command_hook(const char **argv) +{ + char *lock; + int ret = 0; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + + /* + * Ensure the global pre/post command hook is only called for + * the outer command and not when git is called recursively + * or spawns multiple commands (like with the alias command) + */ + lock = getenv("COMMAND_HOOK_LOCK"); + if (lock && !strcmp(lock, "true")) + return 0; + setenv("COMMAND_HOOK_LOCK", "true", 1); + + /* call the hook proc */ + strvec_pushv(&sargv, argv); + strvec_pushf(&sargv, "--git-pid=%"PRIuMAX, (uintmax_t)getpid()); + strvec_pushv(&opt.args, sargv.v); + ret = run_hooks_opt("pre-command", &opt); + + if (!ret) + run_post_hook = 1; + return ret; +} + +static int run_post_command_hook(void) +{ + char *lock; + int ret = 0; + struct run_hooks_opt opt = RUN_HOOKS_OPT_INIT; + + /* + * Only run post_command if pre_command succeeded in this process + */ + if (!run_post_hook) + return 0; + lock = getenv("COMMAND_HOOK_LOCK"); + if (!lock || strcmp(lock, "true")) + return 0; + + strvec_pushv(&opt.args, sargv.v); + strvec_pushf(&opt.args, "--exit_code=%u", exit_code); + ret = run_hooks_opt("post-command", &opt); + + run_post_hook = 0; + strvec_clear(&sargv); + setenv("COMMAND_HOOK_LOCK", "false", 1); + return ret; +} + +static void post_command_hook_atexit(void) +{ + run_post_command_hook(); +} + static int run_builtin(struct cmd_struct *p, int argc, const char **argv) { int status, help; @@ -457,18 +523,26 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) if (!help && p->option & NEED_WORK_TREE) setup_work_tree(); + if (!help && p->option & BLOCK_ON_GVFS_REPO && gvfs_config_is_set(GVFS_BLOCK_COMMANDS)) + die("'git %s' is not supported on a GVFS repo", p->cmd); + + if (run_pre_command_hook(argv)) + die("pre-command hook aborted command"); + trace_argv_printf(argv, "trace: built-in: git"); trace2_cmd_name(p->cmd); trace2_cmd_list_config(); trace2_cmd_list_env_vars(); validate_cache_entries(the_repository->index); - status = p->fn(argc, argv, prefix); + exit_code = status = p->fn(argc, argv, prefix); validate_cache_entries(the_repository->index); if (status) return status; + run_post_command_hook(); + /* Somebody closed stdout? */ if (fstat(fileno(stdout), &st)) return 0; @@ -537,7 +611,7 @@ static struct cmd_struct commands[] = { { "for-each-ref", cmd_for_each_ref, RUN_SETUP }, { "for-each-repo", cmd_for_each_repo, RUN_SETUP_GENTLY }, { "format-patch", cmd_format_patch, RUN_SETUP }, - { "fsck", cmd_fsck, RUN_SETUP }, + { "fsck", cmd_fsck, RUN_SETUP | BLOCK_ON_GVFS_REPO}, { "fsck-objects", cmd_fsck, RUN_SETUP }, { "fsmonitor--daemon", cmd_fsmonitor__daemon, SUPPORT_SUPER_PREFIX | RUN_SETUP }, { "gc", cmd_gc, RUN_SETUP }, @@ -578,7 +652,7 @@ static struct cmd_struct commands[] = { { "pack-refs", cmd_pack_refs, RUN_SETUP }, { "patch-id", cmd_patch_id, RUN_SETUP_GENTLY | NO_PARSEOPT }, { "pickaxe", cmd_blame, RUN_SETUP }, - { "prune", cmd_prune, RUN_SETUP }, + { "prune", cmd_prune, RUN_SETUP | BLOCK_ON_GVFS_REPO}, { "prune-packed", cmd_prune_packed, RUN_SETUP }, { "pull", cmd_pull, RUN_SETUP | NEED_WORK_TREE }, { "push", cmd_push, RUN_SETUP }, @@ -590,7 +664,7 @@ static struct cmd_struct commands[] = { { "remote", cmd_remote, RUN_SETUP }, { "remote-ext", cmd_remote_ext, NO_PARSEOPT }, { "remote-fd", cmd_remote_fd, NO_PARSEOPT }, - { "repack", cmd_repack, RUN_SETUP }, + { "repack", cmd_repack, RUN_SETUP | BLOCK_ON_GVFS_REPO }, { "replace", cmd_replace, RUN_SETUP }, { "rerere", cmd_rerere, RUN_SETUP }, { "reset", cmd_reset, RUN_SETUP }, @@ -610,13 +684,14 @@ static struct cmd_struct commands[] = { { "stash", cmd_stash, RUN_SETUP | NEED_WORK_TREE }, { "status", cmd_status, RUN_SETUP | NEED_WORK_TREE }, { "stripspace", cmd_stripspace }, - { "submodule--helper", cmd_submodule__helper, RUN_SETUP | SUPPORT_SUPER_PREFIX | NO_PARSEOPT }, + { "submodule--helper", cmd_submodule__helper, RUN_SETUP | SUPPORT_SUPER_PREFIX | NO_PARSEOPT | BLOCK_ON_GVFS_REPO }, { "switch", cmd_switch, RUN_SETUP | NEED_WORK_TREE }, { "symbolic-ref", cmd_symbolic_ref, RUN_SETUP }, { "tag", cmd_tag, RUN_SETUP | DELAY_PAGER_CONFIG }, { "unpack-file", cmd_unpack_file, RUN_SETUP | NO_PARSEOPT }, { "unpack-objects", cmd_unpack_objects, RUN_SETUP | NO_PARSEOPT }, { "update-index", cmd_update_index, RUN_SETUP }, + { "update-microsoft-git", cmd_update_microsoft_git }, { "update-ref", cmd_update_ref, RUN_SETUP }, { "update-server-info", cmd_update_server_info, RUN_SETUP }, { "upload-archive", cmd_upload_archive, NO_PARSEOPT }, @@ -628,7 +703,7 @@ static struct cmd_struct commands[] = { { "verify-tag", cmd_verify_tag, RUN_SETUP }, { "version", cmd_version }, { "whatchanged", cmd_whatchanged, RUN_SETUP }, - { "worktree", cmd_worktree, RUN_SETUP }, + { "worktree", cmd_worktree, RUN_SETUP | BLOCK_ON_GVFS_REPO }, { "write-tree", cmd_write_tree, RUN_SETUP }, }; @@ -749,13 +824,16 @@ static void execv_dashed_external(const char **argv) */ trace_argv_printf(cmd.args.v, "trace: exec:"); + if (run_pre_command_hook(cmd.args.v)) + die("pre-command hook aborted command"); + /* * If we fail because the command is not found, it is * OK to return. Otherwise, we just pass along the status code, * or our usual generic code if we were not even able to exec * the program. */ - status = run_command(&cmd); + exit_code = status = run_command(&cmd); /* * If the child process ran and we are now going to exit, emit a @@ -766,6 +844,8 @@ static void execv_dashed_external(const char **argv) exit(status); else if (errno != ENOENT) exit(128); + + run_post_command_hook(); } static int run_argv(int *argcp, const char ***argv) @@ -873,6 +953,7 @@ int cmd_main(int argc, const char **argv) } trace_command_performance(argv); + atexit(post_command_hook_atexit); /* * "git-xxxx" is the same as "git xxxx", but we obviously: @@ -902,10 +983,14 @@ int cmd_main(int argc, const char **argv) } else { /* The user didn't specify a command; give them help */ commit_pager_choice(); + if (run_pre_command_hook(argv)) + die("pre-command hook aborted command"); printf(_("usage: %s\n\n"), git_usage_string); list_common_cmds_help(); printf("\n%s\n", _(git_more_info_string)); - exit(1); + exit_code = 1; + run_post_command_hook(); + exit(exit_code); } cmd = argv[0]; diff --git a/gvfs-helper-client.c b/gvfs-helper-client.c new file mode 100644 index 00000000000000..e6fbb6ed1a43c3 --- /dev/null +++ b/gvfs-helper-client.c @@ -0,0 +1,568 @@ +#include "cache.h" +#include "strvec.h" +#include "trace2.h" +#include "oidset.h" +#include "object.h" +#include "object-store.h" +#include "gvfs-helper-client.h" +#include "sub-process.h" +#include "sigchain.h" +#include "pkt-line.h" +#include "quote.h" +#include "packfile.h" + +static struct oidset gh_client__oidset_queued = OIDSET_INIT; +static unsigned long gh_client__oidset_count; + +struct gh_server__process { + struct subprocess_entry subprocess; /* must be first */ + unsigned int supported_capabilities; +}; + +static int gh_server__subprocess_map_initialized; +static struct hashmap gh_server__subprocess_map; +static struct object_directory *gh_client__chosen_odb; + +/* + * The "objects" capability has verbs: "get" and "post" and "prefetch". + */ +#define CAP_OBJECTS (1u<<1) +#define CAP_OBJECTS_NAME "objects" + +#define CAP_OBJECTS__VERB_GET1_NAME "get" +#define CAP_OBJECTS__VERB_POST_NAME "post" +#define CAP_OBJECTS__VERB_PREFETCH_NAME "prefetch" + +static int gh_client__start_fn(struct subprocess_entry *subprocess) +{ + static int versions[] = {1, 0}; + static struct subprocess_capability capabilities[] = { + { CAP_OBJECTS_NAME, CAP_OBJECTS }, + { NULL, 0 } + }; + + struct gh_server__process *entry = (struct gh_server__process *)subprocess; + + return subprocess_handshake(subprocess, "gvfs-helper", versions, + NULL, capabilities, + &entry->supported_capabilities); +} + +/* + * Send the queued OIDs in the OIDSET to gvfs-helper for it to + * fetch from the cache-server or main Git server using "/gvfs/objects" + * POST semantics. + * + * objects.post LF + * ( LF)* + * + * + */ +static int gh_client__send__objects_post(struct child_process *process) +{ + struct oidset_iter iter; + struct object_id *oid; + int err; + + /* + * We assume that all of the packet_ routines call error() + * so that we don't have to. + */ + + err = packet_write_fmt_gently( + process->in, + (CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_POST_NAME "\n")); + if (err) + return err; + + oidset_iter_init(&gh_client__oidset_queued, &iter); + while ((oid = oidset_iter_next(&iter))) { + err = packet_write_fmt_gently(process->in, "%s\n", + oid_to_hex(oid)); + if (err) + return err; + } + + err = packet_flush_gently(process->in); + if (err) + return err; + + return 0; +} + +/* + * Send the given OID to gvfs-helper for it to fetch from the + * cache-server or main Git server using "/gvfs/objects" GET + * semantics. + * + * This ignores any queued OIDs. + * + * objects.get LF + * LF + * + * + */ +static int gh_client__send__objects_get(struct child_process *process, + const struct object_id *oid) +{ + int err; + + /* + * We assume that all of the packet_ routines call error() + * so that we don't have to. + */ + + err = packet_write_fmt_gently( + process->in, + (CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_GET1_NAME "\n")); + if (err) + return err; + + err = packet_write_fmt_gently(process->in, "%s\n", + oid_to_hex(oid)); + if (err) + return err; + + err = packet_flush_gently(process->in); + if (err) + return err; + + return 0; +} + +/* + * Send a request to gvfs-helper to prefetch packfiles from either the + * cache-server or the main Git server using "/gvfs/prefetch". + * + * objects.prefetch LF + * [ LF] + * + */ +static int gh_client__send__objects_prefetch(struct child_process *process, + timestamp_t seconds_since_epoch) +{ + int err; + + /* + * We assume that all of the packet_ routines call error() + * so that we don't have to. + */ + + err = packet_write_fmt_gently( + process->in, + (CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_PREFETCH_NAME "\n")); + if (err) + return err; + + if (seconds_since_epoch) { + err = packet_write_fmt_gently(process->in, "%" PRItime "\n", + seconds_since_epoch); + if (err) + return err; + } + + err = packet_flush_gently(process->in); + if (err) + return err; + + return 0; +} + +/* + * Update the loose object cache to include the newly created + * object. + */ +static void gh_client__update_loose_cache(const char *line) +{ + const char *v1_oid; + struct object_id oid; + + if (!skip_prefix(line, "loose ", &v1_oid)) + BUG("update_loose_cache: invalid line '%s'", line); + + if (get_oid_hex(v1_oid, &oid)) + BUG("update_loose_cache: invalid line '%s'", line); + + odb_loose_cache_add_new_oid(gh_client__chosen_odb, &oid); +} + +/* + * Update the packed-git list to include the newly created packfile. + */ +static void gh_client__update_packed_git(const char *line) +{ + struct strbuf path = STRBUF_INIT; + const char *v1_filename; + struct packed_git *p; + int is_local; + + if (!skip_prefix(line, "packfile ", &v1_filename)) + BUG("update_packed_git: invalid line '%s'", line); + + /* + * ODB[0] is the local .git/objects. All others are alternates. + */ + is_local = (gh_client__chosen_odb == the_repository->objects->odb); + + strbuf_addf(&path, "%s/pack/%s", + gh_client__chosen_odb->path, v1_filename); + strbuf_strip_suffix(&path, ".pack"); + strbuf_addstr(&path, ".idx"); + + p = add_packed_git(path.buf, path.len, is_local); + if (p) + install_packed_git_and_mru(the_repository, p); +} + +/* + * CAP_OBJECTS verbs return the same format response: + * + * + * * + * + * + * + * Where: + * + * ::= odb SP LF + * + * ::= / + * + * ::= packfile SP LF + * + * ::= loose SP LF + * + * ::= ok LF + * / partial LF + * / error SP LF + * + * Note that `gvfs-helper` controls how/if it chunks the request when + * it talks to the cache-server and/or main Git server. So it is + * possible for us to receive many packfiles and/or loose objects *AND + * THEN* get a hard network error or a 404 on an individual object. + * + * If we get a partial result, we can let the caller try to continue + * -- for example, maybe an immediate request for a tree object was + * grouped with a queued request for a blob. The tree-walk *might* be + * able to continue and let the 404 blob be handled later. + */ +static int gh_client__objects__receive_response( + struct child_process *process, + enum gh_client__created *p_ghc, + int *p_nr_loose, int *p_nr_packfile) +{ + enum gh_client__created ghc = GHC__CREATED__NOTHING; + const char *v1; + char *line; + int len; + int nr_loose = 0; + int nr_packfile = 0; + int err = 0; + + while (1) { + /* + * Warning: packet_read_line_gently() calls die() + * despite the _gently moniker. + */ + len = packet_read_line_gently(process->out, NULL, &line); + if ((len < 0) || !line) + break; + + if (starts_with(line, "odb")) { + /* trust that this matches what we expect */ + } + + else if (starts_with(line, "packfile")) { + gh_client__update_packed_git(line); + ghc |= GHC__CREATED__PACKFILE; + nr_packfile++; + } + + else if (starts_with(line, "loose")) { + gh_client__update_loose_cache(line); + ghc |= GHC__CREATED__LOOSE; + nr_loose++; + } + + else if (starts_with(line, "ok")) + ; + else if (starts_with(line, "partial")) + ; + else if (skip_prefix(line, "error ", &v1)) { + error("gvfs-helper error: '%s'", v1); + err = -1; + } + } + + *p_ghc = ghc; + *p_nr_loose = nr_loose; + *p_nr_packfile = nr_packfile; + + return err; +} + +/* + * Select the preferred ODB for fetching missing objects. + * This should be the alternate with the same directory + * name as set in `gvfs.sharedCache`. + * + * Fallback to .git/objects if necessary. + */ +static void gh_client__choose_odb(void) +{ + struct object_directory *odb; + + if (gh_client__chosen_odb) + return; + + prepare_alt_odb(the_repository); + gh_client__chosen_odb = the_repository->objects->odb; + + if (!gvfs_shared_cache_pathname.len) + return; + + for (odb = the_repository->objects->odb->next; odb; odb = odb->next) { + if (!strcmp(odb->path, gvfs_shared_cache_pathname.buf)) { + gh_client__chosen_odb = odb; + return; + } + } +} + +static struct gh_server__process *gh_client__find_long_running_process( + unsigned int cap_needed) +{ + struct gh_server__process *entry; + struct strvec argv = STRVEC_INIT; + struct strbuf quoted = STRBUF_INIT; + + gh_client__choose_odb(); + + /* + * TODO decide what defaults we want. + */ + strvec_push(&argv, "gvfs-helper"); + strvec_push(&argv, "--fallback"); + strvec_push(&argv, "--cache-server=trust"); + strvec_pushf(&argv, "--shared-cache=%s", + gh_client__chosen_odb->path); + strvec_push(&argv, "server"); + + sq_quote_argv_pretty("ed, argv.v); + + /* + * Find an existing long-running process with the above command + * line -or- create a new long-running process for this and + * subsequent requests. + */ + if (!gh_server__subprocess_map_initialized) { + gh_server__subprocess_map_initialized = 1; + hashmap_init(&gh_server__subprocess_map, + (hashmap_cmp_fn)cmd2process_cmp, NULL, 0); + entry = NULL; + } else + entry = (struct gh_server__process *)subprocess_find_entry( + &gh_server__subprocess_map, quoted.buf); + + if (!entry) { + entry = xmalloc(sizeof(*entry)); + entry->supported_capabilities = 0; + + if (subprocess_start_strvec(&gh_server__subprocess_map, + &entry->subprocess, 1, + &argv, gh_client__start_fn)) + FREE_AND_NULL(entry); + } + + if (entry && + (entry->supported_capabilities & cap_needed) != cap_needed) { + error("gvfs-helper: does not support needed capabilities"); + subprocess_stop(&gh_server__subprocess_map, + (struct subprocess_entry *)entry); + FREE_AND_NULL(entry); + } + + strvec_clear(&argv); + strbuf_release("ed); + + return entry; +} + +void gh_client__queue_oid(const struct object_id *oid) +{ + /* + * Keep this trace as a printf only, so that it goes to the + * perf log, but not the event log. It is useful for interactive + * debugging, but generates way too much (unuseful) noise for the + * database. + */ + if (trace2_is_enabled()) + trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid)); + + if (!oidset_insert(&gh_client__oidset_queued, oid)) + gh_client__oidset_count++; +} + +/* + * This routine should actually take a "const struct oid_array *" + * rather than the component parts, but fetch_objects() uses + * this model (because of the call in sha1-file.c). + */ +void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr) +{ + int k; + + for (k = 0; k < oid_nr; k++) + gh_client__queue_oid(&oids[k]); +} + +/* + * Bulk fetch all of the queued OIDs in the OIDSET. + */ +int gh_client__drain_queue(enum gh_client__created *p_ghc) +{ + struct gh_server__process *entry; + struct child_process *process; + int nr_loose = 0; + int nr_packfile = 0; + int err = 0; + + *p_ghc = GHC__CREATED__NOTHING; + + if (!gh_client__oidset_count) + return 0; + + entry = gh_client__find_long_running_process(CAP_OBJECTS); + if (!entry) + return -1; + + trace2_region_enter("gh-client", "objects/post", the_repository); + + process = &entry->subprocess.process; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = gh_client__send__objects_post(process); + if (!err) + err = gh_client__objects__receive_response( + process, p_ghc, &nr_loose, &nr_packfile); + + sigchain_pop(SIGPIPE); + + if (err) { + subprocess_stop(&gh_server__subprocess_map, + (struct subprocess_entry *)entry); + FREE_AND_NULL(entry); + } + + trace2_data_intmax("gh-client", the_repository, + "objects/post/nr_objects", gh_client__oidset_count); + trace2_region_leave("gh-client", "objects/post", the_repository); + + oidset_clear(&gh_client__oidset_queued); + gh_client__oidset_count = 0; + + return err; +} + +/* + * Get exactly 1 object immediately. + * Ignore any queued objects. + */ +int gh_client__get_immediate(const struct object_id *oid, + enum gh_client__created *p_ghc) +{ + struct gh_server__process *entry; + struct child_process *process; + int nr_loose = 0; + int nr_packfile = 0; + int err = 0; + + /* + * Keep this trace as a printf only, so that it goes to the + * perf log, but not the event log. It is useful for interactive + * debugging, but generates way too much (unuseful) noise for the + * database. + */ + if (trace2_is_enabled()) + trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid)); + + entry = gh_client__find_long_running_process(CAP_OBJECTS); + if (!entry) + return -1; + + trace2_region_enter("gh-client", "objects/get", the_repository); + + process = &entry->subprocess.process; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = gh_client__send__objects_get(process, oid); + if (!err) + err = gh_client__objects__receive_response( + process, p_ghc, &nr_loose, &nr_packfile); + + sigchain_pop(SIGPIPE); + + if (err) { + subprocess_stop(&gh_server__subprocess_map, + (struct subprocess_entry *)entry); + FREE_AND_NULL(entry); + } + + trace2_region_leave("gh-client", "objects/get", the_repository); + + return err; +} + +/* + * Ask gvfs-helper to prefetch commits-and-trees packfiles since a + * given timestamp. + * + * If seconds_since_epoch is zero, gvfs-helper will scan the ODB for + * the last received prefetch and ask for ones newer than that. + */ +int gh_client__prefetch(timestamp_t seconds_since_epoch, + int *nr_packfiles_received) +{ + struct gh_server__process *entry; + struct child_process *process; + enum gh_client__created ghc; + int nr_loose = 0; + int nr_packfile = 0; + int err = 0; + + entry = gh_client__find_long_running_process(CAP_OBJECTS); + if (!entry) + return -1; + + trace2_region_enter("gh-client", "objects/prefetch", the_repository); + trace2_data_intmax("gh-client", the_repository, "prefetch/since", + seconds_since_epoch); + + process = &entry->subprocess.process; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = gh_client__send__objects_prefetch(process, seconds_since_epoch); + if (!err) + err = gh_client__objects__receive_response( + process, &ghc, &nr_loose, &nr_packfile); + + sigchain_pop(SIGPIPE); + + if (err) { + subprocess_stop(&gh_server__subprocess_map, + (struct subprocess_entry *)entry); + FREE_AND_NULL(entry); + } + + trace2_data_intmax("gh-client", the_repository, + "prefetch/packfile_count", nr_packfile); + trace2_region_leave("gh-client", "objects/prefetch", the_repository); + + if (nr_packfiles_received) + *nr_packfiles_received = nr_packfile; + + return err; +} diff --git a/gvfs-helper-client.h b/gvfs-helper-client.h new file mode 100644 index 00000000000000..7692534ecda54c --- /dev/null +++ b/gvfs-helper-client.h @@ -0,0 +1,87 @@ +#ifndef GVFS_HELPER_CLIENT_H +#define GVFS_HELPER_CLIENT_H + +struct repository; +struct commit; +struct object_id; + +enum gh_client__created { + /* + * The _get_ operation did not create anything. If doesn't + * matter if `gvfs-helper` had errors or not -- just that + * nothing was created. + */ + GHC__CREATED__NOTHING = 0, + + /* + * The _get_ operation created one or more packfiles. + */ + GHC__CREATED__PACKFILE = 1<<1, + + /* + * The _get_ operation created one or more loose objects. + * (Not necessarily the for the individual OID you requested.) + */ + GHC__CREATED__LOOSE = 1<<2, + + /* + * The _get_ operation created one or more packfilea *and* + * one or more loose objects. + */ + GHC__CREATED__PACKFILE_AND_LOOSE = (GHC__CREATED__PACKFILE | + GHC__CREATED__LOOSE), +}; + +/* + * Ask `gvfs-helper server` to immediately fetch a single object + * using "/gvfs/objects" GET semantics. + * + * A long-running background process is used to make subsequent + * requests more efficient. + * + * A loose object will be created in the shared-cache ODB and + * in-memory cache updated. + */ +int gh_client__get_immediate(const struct object_id *oid, + enum gh_client__created *p_ghc); + +/* + * Queue this OID for a future fetch using `gvfs-helper service`. + * It does not wait. + * + * Callers should not rely on the queued object being on disk until + * the queue has been drained. + */ +void gh_client__queue_oid(const struct object_id *oid); +void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr); + +/* + * Ask `gvfs-helper server` to fetch the set of queued OIDs using + * "/gvfs/objects" POST semantics. + * + * A long-running background process is used to subsequent requests + * more efficient. + * + * One or more packfiles will be created in the shared-cache ODB. + */ +int gh_client__drain_queue(enum gh_client__created *p_ghc); + +/* + * Ask `gvfs-helper server` to fetch any "prefetch packs" + * available on the server more recent than the requested time. + * + * If seconds_since_epoch is zero, gvfs-helper will scan the ODB for + * the last received prefetch and ask for ones newer than that. + * + * A long-running background process is used to subsequent requests + * (either prefetch or regular immediate/queued requests) more efficient. + * + * One or more packfiles will be created in the shared-cache ODB. + * + * Returns 0 on success, -1 on error. Optionally also returns the + * number of prefetch packs received. + */ +int gh_client__prefetch(timestamp_t seconds_since_epoch, + int *nr_packfiles_received); + +#endif /* GVFS_HELPER_CLIENT_H */ diff --git a/gvfs-helper.c b/gvfs-helper.c new file mode 100644 index 00000000000000..e8e6fbbdd9f361 --- /dev/null +++ b/gvfs-helper.c @@ -0,0 +1,4217 @@ +// TODO Write a man page. Here are some notes for dogfooding. +// TODO +// +// Usage: git gvfs-helper [] [] +// +// : +// +// --remote= // defaults to "origin" +// +// --fallback // boolean. defaults to off +// +// When a fetch from the cache-server fails, automatically +// fallback to the main Git server. This option has no effect +// if no cache-server is defined. +// +// --cache-server= // defaults to "verify" +// +// verify := lookup the set of defined cache-servers using +// "gvfs/config" and confirm that the selected +// cache-server is well-known. Silently disable the +// cache-server if not. (See security notes later.) +// +// error := verify cache-server and abort if not well-known. +// +// trust := do not verify cache-server. just use it, if set. +// +// disable := disable the cache-server and always use the main +// Git server. +// +// --shared-cache= +// +// A relative or absolute pathname to the ODB directory to store +// fetched objects. +// +// If this option is not specified, we default to the value +// in the "gvfs.sharedcache" config setting and then to the +// local ".git/objects" directory. +// +// : +// +// config +// +// Fetch the "gvfs/config" string from the main Git server. +// (The cache-server setting is ignored because cache-servers +// do not support this REST API.) +// +// get +// +// Fetch 1 or more objects one at a time using a "/gvfs/objects" +// GET request. +// +// If a cache-server is configured, +// try it first. Optionally fallback to the main Git server. +// +// The set of objects is given on stdin and is assumed to be +// a list of , one per line. +// +// : +// +// --max-retries= // defaults to "6" +// +// Number of retries after transient network errors. +// Set to zero to disable such retries. +// +// post +// +// Fetch 1 or more objects in bulk using a "/gvfs/objects" POST +// request. +// +// If a cache-server is configured, +// try it first. Optionally fallback to the main Git server. +// +// The set of objects is given on stdin and is assumed to be +// a list of , one per line. +// +// : +// +// --block-size= // defaults to "4000" +// +// Request objects from server in batches of at +// most n objects (not bytes). +// +// --depth= // defaults to "1" +// +// --max-retries= // defaults to "6" +// +// Number of retries after transient network errors. +// Set to zero to disable such retries. +// +// prefetch +// +// Use "/gvfs/prefetch" REST API to fetch 1 or more commits-and-trees +// prefetch packs from the server. +// +// : +// +// --since= // defaults to "0" +// +// Time in seconds since the epoch. If omitted or +// zero, the timestamp from the newest prefetch +// packfile found in the shared-cache ODB is used. +// (This is based upon the packfile name, not the +// mtime.) +// +// The GVFS Protocol defines this value as a way to +// request cached packfiles NEWER THAN this timestamp. +// +// server +// +// Interactive/sub-process mode. Listen for a series of commands +// and data on stdin and return results on stdout. This command +// uses pkt-line format [1] and implements the long-running process +// protocol [2] to communicate with the foreground/parent process. +// +// : +// +// --block-size= // defaults to "4000" +// +// Request objects from server in batches of at +// most n objects (not bytes) when using POST +// requests. +// +// --depth= // defaults to "1" +// +// --max-retries= // defaults to "6" +// +// Number of retries after transient network errors. +// Set to zero to disable such retries. +// +// Interactive verb: objects.get +// +// Fetch 1 or more objects, one at a time, using a +// "/gvfs/objects" GET requests. +// +// Each object will be created as a loose object in the ODB. +// +// Create 1 or more loose objects in the shared-cache ODB. +// (The pathname of the selected ODB is reported at the +// beginning of the response; this should match the pathname +// given on the command line). +// +// git> objects.get +// git> +// git> +// git> ... +// git> +// git> 0000 +// +// git< odb +// git< loose +// git< loose +// git< ... +// git< loose +// git< ok | partial | error +// git< 0000 +// +// Interactive verb: objects.post +// +// Fetch 1 or more objects, in bulk, using one or more +// "/gvfs/objects" POST requests. +// +// Create 1 or more loose objects and/or packfiles in the +// shared-cache ODB. A POST is allowed to respond with +// either loose or packed objects. +// +// git> objects.post +// git> +// git> +// git> ... +// git> +// git> 0000 +// +// git< odb +// git< loose | packfile +// git< loose | packfile +// git< ... +// git< loose | packfile +// git< ok | partial | error +// git< 0000 +// +// Interactive verb: object.prefetch +// +// Fetch 1 or more prefetch packs using a "/gvfs/prefetch" +// request. +// +// git> objects.prefetch +// git> // optional +// git> 0000 +// +// git< odb +// git< packfile +// git< packfile +// git< ... +// git< packfile +// git< ok | error +// git< 0000 +// +// If a cache-server is configured, try it first. +// Optionally fallback to the main Git server. +// +// [1] Documentation/technical/protocol-common.txt +// [2] Documentation/technical/long-running-process-protocol.txt +// [3] See GIT_TRACE_PACKET +// +// endpoint +// +// Fetch the given endpoint from the main Git server (specifying +// `gvfs/config` as endpoint is idempotent to the `config` +// command mentioned above). +// +////////////////////////////////////////////////////////////////// + +#include "cache.h" +#include "config.h" +#include "remote.h" +#include "connect.h" +#include "strbuf.h" +#include "walker.h" +#include "http.h" +#include "exec-cmd.h" +#include "run-command.h" +#include "pkt-line.h" +#include "string-list.h" +#include "sideband.h" +#include "strvec.h" +#include "credential.h" +#include "oid-array.h" +#include "send-pack.h" +#include "protocol.h" +#include "quote.h" +#include "transport.h" +#include "parse-options.h" +#include "object-store.h" +#include "json-writer.h" +#include "tempfile.h" +#include "oidset.h" +#include "dir.h" +#include "progress.h" +#include "packfile.h" +#include "date.h" + +#define TR2_CAT "gvfs-helper" + +static const char * const main_usage[] = { + N_("git gvfs-helper [] config []"), + N_("git gvfs-helper [] get []"), + N_("git gvfs-helper [] post []"), + N_("git gvfs-helper [] prefetch []"), + N_("git gvfs-helper [] server []"), + NULL +}; + +static const char *const objects_get_usage[] = { + N_("git gvfs-helper [] get []"), + NULL +}; + +static const char *const objects_post_usage[] = { + N_("git gvfs-helper [] post []"), + NULL +}; + +static const char *const prefetch_usage[] = { + N_("git gvfs-helper [] prefetch []"), + NULL +}; + +static const char *const server_usage[] = { + N_("git gvfs-helper [] server []"), + NULL +}; + +/* + * "commitDepth" field in gvfs protocol + */ +#define GH__DEFAULT__OBJECTS_POST__COMMIT_DEPTH 1 + +/* + * Chunk/block size in number of objects we request in each packfile + */ +#define GH__DEFAULT__OBJECTS_POST__BLOCK_SIZE 4000 + +/* + * Retry attempts (after the initial request) for transient errors and 429s. + */ +#define GH__DEFAULT_MAX_RETRIES 6 + +/* + * Maximum delay in seconds for transient (network) error retries. + */ +#define GH__DEFAULT_MAX_TRANSIENT_BACKOFF_SEC 300 + +/* + * Our exit-codes. + */ +enum gh__error_code { + GH__ERROR_CODE__USAGE = -1, /* will be mapped to usage() */ + GH__ERROR_CODE__OK = 0, + GH__ERROR_CODE__ERROR = 1, /* unspecified */ + GH__ERROR_CODE__CURL_ERROR = 2, + GH__ERROR_CODE__HTTP_401 = 3, + GH__ERROR_CODE__HTTP_404 = 4, + GH__ERROR_CODE__HTTP_429 = 5, + GH__ERROR_CODE__HTTP_503 = 6, + GH__ERROR_CODE__HTTP_OTHER = 7, + GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE = 8, + GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE = 8, + GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE = 10, + GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE = 11, + GH__ERROR_CODE__SUBPROCESS_SYNTAX = 12, + GH__ERROR_CODE__INDEX_PACK_FAILED = 13, + GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH = 14, +}; + +enum gh__cache_server_mode { + /* verify URL. disable if unknown. */ + GH__CACHE_SERVER_MODE__VERIFY_DISABLE = 0, + /* verify URL. error if unknown. */ + GH__CACHE_SERVER_MODE__VERIFY_ERROR, + /* disable the cache-server, if defined */ + GH__CACHE_SERVER_MODE__DISABLE, + /* trust any cache-server */ + GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY, +}; + +/* + * The set of command line, config, and environment variables + * that we use as input to decide how we should operate. + */ +static struct gh__cmd_opts { + const char *remote_name; + + int try_fallback; /* to git server if cache-server fails */ + int show_progress; + + int depth; + int block_size; + int max_retries; + int max_transient_backoff_sec; + + enum gh__cache_server_mode cache_server_mode; +} gh__cmd_opts; + +/* + * The chosen global state derrived from the inputs in gh__cmd_opts. + */ +static struct gh__global { + struct remote *remote; + + struct credential main_creds; + struct credential cache_creds; + + const char *main_url; + const char *cache_server_url; + + struct strbuf buf_odb_path; + + int http_is_initialized; + int cache_server_is_initialized; /* did sub-command look for one */ + int main_creds_need_approval; /* try to only approve them once */ + +} gh__global; + +enum gh__server_type { + GH__SERVER_TYPE__MAIN = 0, + GH__SERVER_TYPE__CACHE = 1, + + GH__SERVER_TYPE__NR, +}; + +static const char *gh__server_type_label[GH__SERVER_TYPE__NR] = { + "(main)", + "(cs)" +}; + +enum gh__objects_mode { + GH__OBJECTS_MODE__NONE = 0, + + /* + * Bulk fetch objects. + * + * But also, force the use of HTTP POST regardless of how many + * objects we are requesting. + * + * The GVFS Protocol treats requests for commit objects + * differently in GET and POST requests WRT whether it + * automatically also fetches the referenced trees. + */ + GH__OBJECTS_MODE__POST, + + /* + * Fetch objects one at a time using HTTP GET. + * + * Force the use of GET (primarily because of the commit + * object treatment). + */ + GH__OBJECTS_MODE__GET, + + /* + * Fetch one or more pre-computed "prefetch packs" containing + * commits and trees. + */ + GH__OBJECTS_MODE__PREFETCH, +}; + +struct gh__azure_throttle +{ + unsigned long tstu_limit; + unsigned long tstu_remaining; + + unsigned long reset_sec; + unsigned long retry_after_sec; +}; + +static void gh__azure_throttle__zero(struct gh__azure_throttle *azure) +{ + azure->tstu_limit = 0; + azure->tstu_remaining = 0; + azure->reset_sec = 0; + azure->retry_after_sec = 0; +} + +#define GH__AZURE_THROTTLE_INIT { \ + .tstu_limit = 0, \ + .tstu_remaining = 0, \ + .reset_sec = 0, \ + .retry_after_sec = 0, \ + } + +static struct gh__azure_throttle gh__global_throttle[GH__SERVER_TYPE__NR] = { + GH__AZURE_THROTTLE_INIT, + GH__AZURE_THROTTLE_INIT, +}; + +/* + * Stolen from http.c + */ +static CURLcode gh__curlinfo_strbuf(CURL *curl, CURLINFO info, struct strbuf *buf) +{ + char *ptr; + CURLcode ret; + + strbuf_reset(buf); + ret = curl_easy_getinfo(curl, info, &ptr); + if (!ret && ptr) + strbuf_addstr(buf, ptr); + return ret; +} + +enum gh__progress_state { + GH__PROGRESS_STATE__START = 0, + GH__PROGRESS_STATE__PHASE1, + GH__PROGRESS_STATE__PHASE2, + GH__PROGRESS_STATE__PHASE3, +}; + +/* + * Parameters to drive an HTTP request (with any necessary retries). + */ +struct gh__request_params { + /* + * b_is_post indicates if the current HTTP request is a POST=1 or + * a GET=0. This is a lower level field used to setup CURL and + * the tempfile used to receive the content. + * + * It is related to, but different from the GH__OBJECTS_MODE__ + * field that we present to the gvfs-helper client or in the CLI + * (which only concerns the semantics of the /gvfs/objects protocol + * on the set of requested OIDs). + * + * For example, we use an HTTP GET to get the /gvfs/config data + * into a buffer. + */ + int b_is_post; + int b_write_to_file; /* write to file=1 or strbuf=0 */ + int b_permit_cache_server_if_defined; + + enum gh__objects_mode objects_mode; + enum gh__server_type server_type; + + int k_attempt; /* robust retry attempt */ + int k_transient_delay_sec; /* delay before transient error retries */ + + unsigned long object_count; /* number of objects being fetched */ + + const struct strbuf *post_payload; /* POST body to send */ + + struct curl_slist *headers; /* additional http headers to send */ + struct tempfile *tempfile; /* for response content when file */ + struct strbuf *buffer; /* for response content when strbuf */ + struct strbuf tr2_label; /* for trace2 regions */ + + struct object_id loose_oid; + + /* + * Note that I am putting all of the progress-related instance data + * inside the request-params in the hope that we can eventually + * do multi-threaded/concurrent HTTP requests when chunking + * large requests. However, the underlying "struct progress" API + * is not thread safe (that is, it doesn't allow concurrent progress + * reports (since that might require multiple lines on the screen + * or something)). + */ + enum gh__progress_state progress_state; + struct strbuf progress_base_phase2_msg; + struct strbuf progress_base_phase3_msg; + + /* + * The buffer for the formatted progress message is shared by the + * "struct progress" API and must remain valid for the duration of + * the start_progress..stop_progress lifespan. + */ + struct strbuf progress_msg; + struct progress *progress; + + struct strbuf e2eid; + + struct string_list *result_list; /* we do not own this */ +}; + +#define GH__REQUEST_PARAMS_INIT { \ + .b_is_post = 0, \ + .b_write_to_file = 0, \ + .b_permit_cache_server_if_defined = 1, \ + .server_type = GH__SERVER_TYPE__MAIN, \ + .k_attempt = 0, \ + .k_transient_delay_sec = 0, \ + .object_count = 0, \ + .post_payload = NULL, \ + .headers = NULL, \ + .tempfile = NULL, \ + .buffer = NULL, \ + .tr2_label = STRBUF_INIT, \ + .loose_oid = {{0}}, \ + .progress_state = GH__PROGRESS_STATE__START, \ + .progress_base_phase2_msg = STRBUF_INIT, \ + .progress_base_phase3_msg = STRBUF_INIT, \ + .progress_msg = STRBUF_INIT, \ + .progress = NULL, \ + .e2eid = STRBUF_INIT, \ + .result_list = NULL, \ + } + +static void gh__request_params__release(struct gh__request_params *params) +{ + if (!params) + return; + + params->post_payload = NULL; /* we do not own this */ + + curl_slist_free_all(params->headers); + params->headers = NULL; + + delete_tempfile(¶ms->tempfile); + + params->buffer = NULL; /* we do not own this */ + + strbuf_release(¶ms->tr2_label); + + strbuf_release(¶ms->progress_base_phase2_msg); + strbuf_release(¶ms->progress_base_phase3_msg); + strbuf_release(¶ms->progress_msg); + + stop_progress(¶ms->progress); + params->progress = NULL; + + strbuf_release(¶ms->e2eid); + + params->result_list = NULL; /* we do not own this */ +} + +/* + * How we handle retries for various unexpected network errors. + */ +enum gh__retry_mode { + /* + * The operation was successful, so no retry is needed. + * Use this for HTTP 200, for example. + */ + GH__RETRY_MODE__SUCCESS = 0, + + /* + * Retry using the normal 401 Auth mechanism. + */ + GH__RETRY_MODE__HTTP_401, + + /* + * Fail because at least one of the requested OIDs does not exist. + */ + GH__RETRY_MODE__FAIL_404, + + /* + * A transient network error, such as dropped connection + * or network IO error. Our belief is that a retry MAY + * succeed. (See Gremlins and Cosmic Rays....) + */ + GH__RETRY_MODE__TRANSIENT, + + /* + * Request was blocked completely because of a 429. + */ + GH__RETRY_MODE__HTTP_429, + + /* + * Request failed because the server was (temporarily?) offline. + */ + GH__RETRY_MODE__HTTP_503, + + /* + * The operation had a hard failure and we have no + * expectation that a second attempt will give a different + * answer, such as a bad hostname or a mal-formed URL. + */ + GH__RETRY_MODE__HARD_FAIL, +}; + +/* + * Bucket to describe the results of an HTTP requests (may be + * overwritten during retries so that it describes the final attempt). + */ +struct gh__response_status { + struct strbuf error_message; + struct strbuf content_type; + enum gh__error_code ec; + enum gh__retry_mode retry; + intmax_t bytes_received; + struct gh__azure_throttle *azure; +}; + +#define GH__RESPONSE_STATUS_INIT { \ + .error_message = STRBUF_INIT, \ + .content_type = STRBUF_INIT, \ + .ec = GH__ERROR_CODE__OK, \ + .retry = GH__RETRY_MODE__SUCCESS, \ + .bytes_received = 0, \ + .azure = NULL, \ + } + +static void gh__response_status__zero(struct gh__response_status *s) +{ + strbuf_setlen(&s->error_message, 0); + strbuf_setlen(&s->content_type, 0); + s->ec = GH__ERROR_CODE__OK; + s->retry = GH__RETRY_MODE__SUCCESS; + s->bytes_received = 0; + s->azure = NULL; +} + +static void install_result(struct gh__request_params *params, + struct gh__response_status *status); + +/* + * Log the E2EID for the current request. + * + * Since every HTTP request to the cache-server and to the main Git server + * will send back a unique E2EID (probably a GUID), we don't want to overload + * telemetry with each ID -- rather, only the ones for which there was a + * problem and that may be helpful in a post mortem. + */ +static void log_e2eid(struct gh__request_params *params, + struct gh__response_status *status) +{ + if (!params->e2eid.len) + return; + + switch (status->retry) { + default: + case GH__RETRY_MODE__SUCCESS: + case GH__RETRY_MODE__HTTP_401: + case GH__RETRY_MODE__FAIL_404: + return; + + case GH__RETRY_MODE__HARD_FAIL: + case GH__RETRY_MODE__TRANSIENT: + case GH__RETRY_MODE__HTTP_429: + case GH__RETRY_MODE__HTTP_503: + break; + } + + if (trace2_is_enabled()) { + struct strbuf key = STRBUF_INIT; + + strbuf_addstr(&key, "e2eid"); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + trace2_data_string(TR2_CAT, NULL, key.buf, + params->e2eid.buf); + + strbuf_release(&key); + } +} + +/* + * Normalize a few HTTP response codes before we try to decide + * how to dispatch on them. + */ +static long gh__normalize_odd_codes(struct gh__request_params *params, + long http_response_code) +{ + if (params->server_type == GH__SERVER_TYPE__CACHE && + http_response_code == 400) { + /* + * The cache-server sends a somewhat bogus 400 instead of + * the normal 401 when AUTH is required. Fixup the status + * to hide that. + * + * TODO Technically, the cache-server could send a 400 + * TODO for many reasons, not just for their bogus + * TODO pseudo-401, but we're going to assume it is a + * TODO 401 for now. We should confirm the expected + * TODO error message in the response-body. + */ + return 401; + } + + if (http_response_code == 203) { + /* + * A proxy server transformed a 200 from the origin server + * into a 203. We don't care about the subtle distinction. + */ + return 200; + } + + return http_response_code; +} + +/* + * Map HTTP response codes into a retry strategy. + * See https://en.wikipedia.org/wiki/List_of_HTTP_status_codes + * + * https://docs.microsoft.com/en-us/azure/devops/integrate/concepts/rate-limits?view=azure-devops + */ +static void compute_retry_mode_from_http_response( + struct gh__response_status *status, + long http_response_code) +{ + switch (http_response_code) { + + case 200: + status->retry = GH__RETRY_MODE__SUCCESS; + status->ec = GH__ERROR_CODE__OK; + return; + + case 301: /* all the various flavors of HTTP Redirect */ + case 302: + case 303: + case 304: + case 305: + case 306: + case 307: + case 308: + /* + * TODO Consider a redirected-retry (with or without + * TODO a Retry-After header). + */ + goto hard_fail; + + case 401: + strbuf_addstr(&status->error_message, + "(http:401) Not Authorized"); + status->retry = GH__RETRY_MODE__HTTP_401; + status->ec = GH__ERROR_CODE__HTTP_401; + return; + + case 404: + /* + * TODO if params->object_count > 1, consider + * TODO splitting the request into 2 halves + * TODO and retrying each half in series. + */ + strbuf_addstr(&status->error_message, + "(http:404) Not Found"); + status->retry = GH__RETRY_MODE__FAIL_404; + status->ec = GH__ERROR_CODE__HTTP_404; + return; + + case 429: + /* + * This is a hard block because we've been bad. + */ + strbuf_addstr(&status->error_message, + "(http:429) Too Many Requests [throttled]"); + status->retry = GH__RETRY_MODE__HTTP_429; + status->ec = GH__ERROR_CODE__HTTP_429; + + trace2_data_string(TR2_CAT, NULL, "error/http", + status->error_message.buf); + return; + + case 503: + /* + * We assume that this comes with a "Retry-After" header like 429s. + */ + strbuf_addstr(&status->error_message, + "(http:503) Server Unavailable [throttled]"); + status->retry = GH__RETRY_MODE__HTTP_503; + status->ec = GH__ERROR_CODE__HTTP_503; + + trace2_data_string(TR2_CAT, NULL, "error/http", + status->error_message.buf); + return; + + default: + goto hard_fail; + } + +hard_fail: + strbuf_addf(&status->error_message, "(http:%d) Other [hard_fail]", + (int)http_response_code); + status->retry = GH__RETRY_MODE__HARD_FAIL; + status->ec = GH__ERROR_CODE__HTTP_OTHER; + + trace2_data_string(TR2_CAT, NULL, "error/http", + status->error_message.buf); + return; +} + +/* + * Map CURLE errors code to a retry strategy. + * See and + * https://curl.haxx.se/libcurl/c/libcurl-errors.html + * + * This could be a static table rather than a switch, but + * that is harder to debug and we may want to selectively + * log errors. + * + * I've commented out all of the hard-fail cases for now + * and let the default handle them. This is to indicate + * that I considered them and found them to be not actionable. + * Also, the spelling of some of the CURLE_ symbols seem + * to change between curl releases on different platforms, + * so I'm not going to fight that. + */ +static void compute_retry_mode_from_curl_error( + struct gh__response_status *status, + CURLcode curl_code) +{ + switch (curl_code) { + case CURLE_OK: + status->retry = GH__RETRY_MODE__SUCCESS; + status->ec = GH__ERROR_CODE__OK; + return; + + //se CURLE_UNSUPPORTED_PROTOCOL: goto hard_fail; + //se CURLE_FAILED_INIT: goto hard_fail; + //se CURLE_URL_MALFORMAT: goto hard_fail; + //se CURLE_NOT_BUILT_IN: goto hard_fail; + //se CURLE_COULDNT_RESOLVE_PROXY: goto hard_fail; + //se CURLE_COULDNT_RESOLVE_HOST: goto hard_fail; + case CURLE_COULDNT_CONNECT: goto transient; + //se CURLE_WEIRD_SERVER_REPLY: goto hard_fail; + //se CURLE_REMOTE_ACCESS_DENIED: goto hard_fail; + //se CURLE_FTP_ACCEPT_FAILED: goto hard_fail; + //se CURLE_FTP_WEIRD_PASS_REPLY: goto hard_fail; + //se CURLE_FTP_ACCEPT_TIMEOUT: goto hard_fail; + //se CURLE_FTP_WEIRD_PASV_REPLY: goto hard_fail; + //se CURLE_FTP_WEIRD_227_FORMAT: goto hard_fail; + //se CURLE_FTP_CANT_GET_HOST: goto hard_fail; + case CURLE_HTTP2: goto transient; + //se CURLE_FTP_COULDNT_SET_TYPE: goto hard_fail; + case CURLE_PARTIAL_FILE: goto transient; + //se CURLE_FTP_COULDNT_RETR_FILE: goto hard_fail; + //se CURLE_OBSOLETE20: goto hard_fail; + //se CURLE_QUOTE_ERROR: goto hard_fail; + //se CURLE_HTTP_RETURNED_ERROR: goto hard_fail; + case CURLE_WRITE_ERROR: goto transient; + //se CURLE_OBSOLETE24: goto hard_fail; + case CURLE_UPLOAD_FAILED: goto transient; + //se CURLE_READ_ERROR: goto hard_fail; + //se CURLE_OUT_OF_MEMORY: goto hard_fail; + case CURLE_OPERATION_TIMEDOUT: goto transient; + //se CURLE_OBSOLETE29: goto hard_fail; + //se CURLE_FTP_PORT_FAILED: goto hard_fail; + //se CURLE_FTP_COULDNT_USE_REST: goto hard_fail; + //se CURLE_OBSOLETE32: goto hard_fail; + //se CURLE_RANGE_ERROR: goto hard_fail; + case CURLE_HTTP_POST_ERROR: goto transient; + //se CURLE_SSL_CONNECT_ERROR: goto hard_fail; + //se CURLE_BAD_DOWNLOAD_RESUME: goto hard_fail; + //se CURLE_FILE_COULDNT_READ_FILE: goto hard_fail; + //se CURLE_LDAP_CANNOT_BIND: goto hard_fail; + //se CURLE_LDAP_SEARCH_FAILED: goto hard_fail; + //se CURLE_OBSOLETE40: goto hard_fail; + //se CURLE_FUNCTION_NOT_FOUND: goto hard_fail; + //se CURLE_ABORTED_BY_CALLBACK: goto hard_fail; + //se CURLE_BAD_FUNCTION_ARGUMENT: goto hard_fail; + //se CURLE_OBSOLETE44: goto hard_fail; + //se CURLE_INTERFACE_FAILED: goto hard_fail; + //se CURLE_OBSOLETE46: goto hard_fail; + //se CURLE_TOO_MANY_REDIRECTS: goto hard_fail; + //se CURLE_UNKNOWN_OPTION: goto hard_fail; + //se CURLE_TELNET_OPTION_SYNTAX: goto hard_fail; + //se CURLE_OBSOLETE50: goto hard_fail; + //se CURLE_PEER_FAILED_VERIFICATION: goto hard_fail; + //se CURLE_GOT_NOTHING: goto hard_fail; + //se CURLE_SSL_ENGINE_NOTFOUND: goto hard_fail; + //se CURLE_SSL_ENGINE_SETFAILED: goto hard_fail; + case CURLE_SEND_ERROR: goto transient; + case CURLE_RECV_ERROR: goto transient; + //se CURLE_OBSOLETE57: goto hard_fail; + //se CURLE_SSL_CERTPROBLEM: goto hard_fail; + //se CURLE_SSL_CIPHER: goto hard_fail; + //se CURLE_SSL_CACERT: goto hard_fail; + //se CURLE_BAD_CONTENT_ENCODING: goto hard_fail; + //se CURLE_LDAP_INVALID_URL: goto hard_fail; + //se CURLE_FILESIZE_EXCEEDED: goto hard_fail; + //se CURLE_USE_SSL_FAILED: goto hard_fail; + //se CURLE_SEND_FAIL_REWIND: goto hard_fail; + //se CURLE_SSL_ENGINE_INITFAILED: goto hard_fail; + //se CURLE_LOGIN_DENIED: goto hard_fail; + //se CURLE_TFTP_NOTFOUND: goto hard_fail; + //se CURLE_TFTP_PERM: goto hard_fail; + //se CURLE_REMOTE_DISK_FULL: goto hard_fail; + //se CURLE_TFTP_ILLEGAL: goto hard_fail; + //se CURLE_TFTP_UNKNOWNID: goto hard_fail; + //se CURLE_REMOTE_FILE_EXISTS: goto hard_fail; + //se CURLE_TFTP_NOSUCHUSER: goto hard_fail; + //se CURLE_CONV_FAILED: goto hard_fail; + //se CURLE_CONV_REQD: goto hard_fail; + //se CURLE_SSL_CACERT_BADFILE: goto hard_fail; + //se CURLE_REMOTE_FILE_NOT_FOUND: goto hard_fail; + //se CURLE_SSH: goto hard_fail; + //se CURLE_SSL_SHUTDOWN_FAILED: goto hard_fail; + case CURLE_AGAIN: goto transient; + //se CURLE_SSL_CRL_BADFILE: goto hard_fail; + //se CURLE_SSL_ISSUER_ERROR: goto hard_fail; + //se CURLE_FTP_PRET_FAILED: goto hard_fail; + //se CURLE_RTSP_CSEQ_ERROR: goto hard_fail; + //se CURLE_RTSP_SESSION_ERROR: goto hard_fail; + //se CURLE_FTP_BAD_FILE_LIST: goto hard_fail; + //se CURLE_CHUNK_FAILED: goto hard_fail; + //se CURLE_NO_CONNECTION_AVAILABLE: goto hard_fail; + //se CURLE_SSL_PINNEDPUBKEYNOTMATCH: goto hard_fail; + //se CURLE_SSL_INVALIDCERTSTATUS: goto hard_fail; +#ifdef CURLE_HTTP2_STREAM + case CURLE_HTTP2_STREAM: goto transient; +#endif + default: goto hard_fail; + } + +hard_fail: + strbuf_addf(&status->error_message, "(curl:%d) %s [hard_fail]", + curl_code, curl_easy_strerror(curl_code)); + status->retry = GH__RETRY_MODE__HARD_FAIL; + status->ec = GH__ERROR_CODE__CURL_ERROR; + + trace2_data_string(TR2_CAT, NULL, "error/curl", + status->error_message.buf); + return; + +transient: + strbuf_addf(&status->error_message, "(curl:%d) %s [transient]", + curl_code, curl_easy_strerror(curl_code)); + status->retry = GH__RETRY_MODE__TRANSIENT; + status->ec = GH__ERROR_CODE__CURL_ERROR; + + trace2_data_string(TR2_CAT, NULL, "error/curl", + status->error_message.buf); + return; +} + +/* + * Create a single normalized 'ec' error-code from the status we + * received from the HTTP request. Map a few of the expected HTTP + * status code to 'ec', but don't get too crazy here. + */ +static void gh__response_status__set_from_slot( + struct gh__request_params *params, + struct gh__response_status *status, + const struct active_request_slot *slot) +{ + long http_response_code; + CURLcode curl_code; + + curl_code = slot->results->curl_result; + gh__curlinfo_strbuf(slot->curl, CURLINFO_CONTENT_TYPE, + &status->content_type); + curl_easy_getinfo(slot->curl, CURLINFO_RESPONSE_CODE, + &http_response_code); + + strbuf_setlen(&status->error_message, 0); + + http_response_code = gh__normalize_odd_codes(params, + http_response_code); + + /* + * Use normalized response/status codes form curl/http to decide + * how to set the error-code we propagate *AND* to decide if we + * we should retry because of transient network problems. + */ + if (curl_code == CURLE_OK || + curl_code == CURLE_HTTP_RETURNED_ERROR) + compute_retry_mode_from_http_response(status, + http_response_code); + else + compute_retry_mode_from_curl_error(status, curl_code); + + if (status->ec != GH__ERROR_CODE__OK) + status->bytes_received = 0; + else if (params->b_write_to_file) + status->bytes_received = (intmax_t)ftell(params->tempfile->fp); + else + status->bytes_received = (intmax_t)params->buffer->len; +} + +static void gh__response_status__release(struct gh__response_status *status) +{ + if (!status) + return; + strbuf_release(&status->error_message); + strbuf_release(&status->content_type); +} + +static int gh__curl_progress_cb(void *clientp, + curl_off_t dltotal, curl_off_t dlnow, + curl_off_t ultotal, curl_off_t ulnow) +{ + struct gh__request_params *params = clientp; + + /* + * From what I can tell, CURL progress arrives in 3 phases. + * + * [1] An initial connection setup phase where we get [0,0] [0,0]. + * [2] An upload phase where we start sending the request headers + * and body. ulnow will be > 0. ultotal may or may not be 0. + * [3] A download phase where we start receiving the response + * headers and payload body. dlnow will be > 0. dltotal may + * or may not be 0. + * + * If we pass zero for the total to the "struct progress" API, we + * get simple numbers rather than percentages. So our progress + * output format may vary depending. + * + * It is unclear if CURL will give us a final callback after + * everything is finished, so we leave the progress handle open + * and let the caller issue the final stop_progress(). + * + * There is a bit of a mismatch between the CURL API and the + * "struct progress" API. The latter requires us to set the + * progress message when we call one of the start_progress + * methods. We cannot change the progress message while we are + * showing progress state. And we cannot change the denominator + * (total) after we start. CURL may or may not give us the total + * sizes for each phase. + * + * Also be advised that the "struct progress" API eats messages + * so that the screen is only updated every second or so. And + * may not print anything if the start..stop happen in less then + * 2 seconds. Whereas CURL calls this callback very frequently. + * The net-net is that we may not actually see this progress + * message for small/fast HTTP requests. + */ + + switch (params->progress_state) { + case GH__PROGRESS_STATE__START: /* first callback */ + if (dlnow == 0 && ulnow == 0) + goto enter_phase_1; + + if (ulnow) + goto enter_phase_2; + else + goto enter_phase_3; + + case GH__PROGRESS_STATE__PHASE1: + if (dlnow == 0 && ulnow == 0) + return 0; + + if (ulnow) + goto enter_phase_2; + else + goto enter_phase_3; + + case GH__PROGRESS_STATE__PHASE2: + display_progress(params->progress, ulnow); + if (dlnow == 0) + return 0; + + stop_progress(¶ms->progress); + goto enter_phase_3; + + case GH__PROGRESS_STATE__PHASE3: + display_progress(params->progress, dlnow); + return 0; + + default: + return 0; + } + +enter_phase_1: + /* + * Don't bother to create a progress handle during phase [1]. + * Because we get [0,0,0,0], we don't have any data to report + * and would just have to synthesize some type of progress. + * From my testing, phase [1] is fairly quick (probably just + * the SSL handshake), so the "struct progress" API will most + * likely completely eat any messages that we did produce. + */ + params->progress_state = GH__PROGRESS_STATE__PHASE1; + return 0; + +enter_phase_2: + strbuf_setlen(¶ms->progress_msg, 0); + if (params->progress_base_phase2_msg.len) { + if (params->k_attempt > 0) + strbuf_addf(¶ms->progress_msg, "%s [retry %d/%d] (bytes sent)", + params->progress_base_phase2_msg.buf, + params->k_attempt, gh__cmd_opts.max_retries); + else + strbuf_addf(¶ms->progress_msg, "%s (bytes sent)", + params->progress_base_phase2_msg.buf); + params->progress = start_progress(params->progress_msg.buf, ultotal); + display_progress(params->progress, ulnow); + } + params->progress_state = GH__PROGRESS_STATE__PHASE2; + return 0; + +enter_phase_3: + strbuf_setlen(¶ms->progress_msg, 0); + if (params->progress_base_phase3_msg.len) { + if (params->k_attempt > 0) + strbuf_addf(¶ms->progress_msg, "%s [retry %d/%d] (bytes received)", + params->progress_base_phase3_msg.buf, + params->k_attempt, gh__cmd_opts.max_retries); + else + strbuf_addf(¶ms->progress_msg, "%s (bytes received)", + params->progress_base_phase3_msg.buf); + params->progress = start_progress(params->progress_msg.buf, dltotal); + display_progress(params->progress, dlnow); + } + params->progress_state = GH__PROGRESS_STATE__PHASE3; + return 0; +} + +/* + * Run the request without using "run_one_slot()" because we + * don't want the post-request normalization, error handling, + * and auto-reauth handling in http.c. + */ +static void gh__run_one_slot(struct active_request_slot *slot, + struct gh__request_params *params, + struct gh__response_status *status) +{ + struct strbuf key = STRBUF_INIT; + + strbuf_addbuf(&key, ¶ms->tr2_label); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + params->progress_state = GH__PROGRESS_STATE__START; + strbuf_setlen(¶ms->e2eid, 0); + + trace2_region_enter(TR2_CAT, key.buf, NULL); + + if (!start_active_slot(slot)) { + compute_retry_mode_from_curl_error(status, + CURLE_FAILED_INIT); + } else { + run_active_slot(slot); + if (params->b_write_to_file) + fflush(params->tempfile->fp); + + gh__response_status__set_from_slot(params, status, slot); + + log_e2eid(params, status); + + if (status->ec == GH__ERROR_CODE__OK) { + int old_len = key.len; + + /* + * We only log the number of bytes received. + * We do not log the number of objects requested + * because the server may give us more than that + * (such as when we request a commit). + */ + strbuf_addstr(&key, "/nr_bytes"); + trace2_data_intmax(TR2_CAT, NULL, + key.buf, + status->bytes_received); + strbuf_setlen(&key, old_len); + } + } + + if (params->progress) + stop_progress(¶ms->progress); + + if (status->ec == GH__ERROR_CODE__OK && params->b_write_to_file) + install_result(params, status); + + trace2_region_leave(TR2_CAT, key.buf, NULL); + + strbuf_release(&key); +} + +static int option_parse_cache_server_mode(const struct option *opt, + const char *arg, int unset) +{ + if (unset) /* should not happen */ + return error(_("missing value for switch '%s'"), + opt->long_name); + + else if (!strcmp(arg, "verify")) + gh__cmd_opts.cache_server_mode = + GH__CACHE_SERVER_MODE__VERIFY_DISABLE; + + else if (!strcmp(arg, "error")) + gh__cmd_opts.cache_server_mode = + GH__CACHE_SERVER_MODE__VERIFY_ERROR; + + else if (!strcmp(arg, "disable")) + gh__cmd_opts.cache_server_mode = + GH__CACHE_SERVER_MODE__DISABLE; + + else if (!strcmp(arg, "trust")) + gh__cmd_opts.cache_server_mode = + GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY; + + else + return error(_("invalid value for switch '%s'"), + opt->long_name); + + return 0; +} + +/* + * Let command line args override "gvfs.sharedcache" config setting + * and override the value set by git_default_config(). + * + * The command line is parsed *AFTER* the config is loaded, so + * prepared_alt_odb() has already been called any default or inherited + * shared-cache has already been set. + * + * We have a chance to override it here. + */ +static int option_parse_shared_cache_directory(const struct option *opt, + const char *arg, int unset) +{ + struct strbuf buf_arg = STRBUF_INIT; + + if (unset) /* should not happen */ + return error(_("missing value for switch '%s'"), + opt->long_name); + + strbuf_addstr(&buf_arg, arg); + if (strbuf_normalize_path(&buf_arg) < 0) { + /* + * Pretend command line wasn't given. Use whatever + * settings we already have from the config. + */ + strbuf_release(&buf_arg); + return 0; + } + strbuf_trim_trailing_dir_sep(&buf_arg); + + if (!strbuf_cmp(&buf_arg, &gvfs_shared_cache_pathname)) { + /* + * The command line argument matches what we got from + * the config, so we're already setup correctly. (And + * we have already verified that the directory exists + * on disk.) + */ + strbuf_release(&buf_arg); + return 0; + } + + else if (!gvfs_shared_cache_pathname.len) { + /* + * A shared-cache was requested and we did not inherit one. + * Try it, but let alt_odb_usable() secretly disable it if + * it cannot create the directory on disk. + */ + strbuf_addbuf(&gvfs_shared_cache_pathname, &buf_arg); + + add_to_alternates_memory(buf_arg.buf); + + strbuf_release(&buf_arg); + return 0; + } + + else { + /* + * The requested shared-cache is different from the one + * we inherited. Replace the inherited value with this + * one, but smartly fallback if necessary. + */ + struct strbuf buf_prev = STRBUF_INIT; + + strbuf_addbuf(&buf_prev, &gvfs_shared_cache_pathname); + + strbuf_setlen(&gvfs_shared_cache_pathname, 0); + strbuf_addbuf(&gvfs_shared_cache_pathname, &buf_arg); + + add_to_alternates_memory(buf_arg.buf); + + /* + * alt_odb_usable() releases gvfs_shared_cache_pathname + * if it cannot create the directory on disk, so fallback + * to the previous choice when it fails. + */ + if (!gvfs_shared_cache_pathname.len) + strbuf_addbuf(&gvfs_shared_cache_pathname, + &buf_prev); + + strbuf_release(&buf_arg); + strbuf_release(&buf_prev); + return 0; + } +} + +/* + * Lookup the URL for this remote (defaults to 'origin'). + */ +static void lookup_main_url(void) +{ + /* + * Both VFS and Scalar only work with 'origin', so we expect this. + * The command line arg is mainly for debugging. + */ + if (!gh__cmd_opts.remote_name || !*gh__cmd_opts.remote_name) + gh__cmd_opts.remote_name = "origin"; + + gh__global.remote = remote_get(gh__cmd_opts.remote_name); + if (!gh__global.remote->url[0] || !*gh__global.remote->url[0]) + die("unknown remote '%s'", gh__cmd_opts.remote_name); + + /* + * Strip out any in-line auth in the origin server URL so that + * we can control which creds we fetch. + * + * Azure DevOps has been known to suggest https URLS of the + * form "https://@dev.azure.com//". + * + * Break that so that we can force the use of a PAT. + */ + gh__global.main_url = transport_anonymize_url(gh__global.remote->url[0]); + + trace2_data_string(TR2_CAT, NULL, "remote/url", gh__global.main_url); +} + +static void do__http_get__gvfs_config(struct gh__response_status *status, + struct strbuf *config_data); + +/* + * Find the URL of the cache-server, if we have one. + * + * This routine is called by the initialization code and is allowed + * to call die() rather than returning an 'ec'. + */ +static void select_cache_server(void) +{ + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct strbuf config_data = STRBUF_INIT; + const char *match = NULL; + + /* + * This only indicates that the sub-command actually called + * this routine. We rely on gh__global.cache_server_url to tell + * us if we actually have a cache-server configured. + */ + gh__global.cache_server_is_initialized = 1; + gh__global.cache_server_url = NULL; + + if (gh__cmd_opts.cache_server_mode == GH__CACHE_SERVER_MODE__DISABLE) { + trace2_data_string(TR2_CAT, NULL, "cache/url", "disabled"); + return; + } + + if (!gvfs_cache_server_url || !*gvfs_cache_server_url) { + switch (gh__cmd_opts.cache_server_mode) { + default: + case GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY: + case GH__CACHE_SERVER_MODE__VERIFY_DISABLE: + trace2_data_string(TR2_CAT, NULL, "cache/url", "unset"); + return; + + case GH__CACHE_SERVER_MODE__VERIFY_ERROR: + die("cache-server not set"); + } + } + + /* + * If the cache-server and main Git server have the same URL, we + * can silently disable the cache-server (by NOT setting the field + * in gh__global and explicitly disable the fallback logic.) + */ + if (!strcmp(gvfs_cache_server_url, gh__global.main_url)) { + gh__cmd_opts.try_fallback = 0; + trace2_data_string(TR2_CAT, NULL, "cache/url", "same"); + return; + } + + if (gh__cmd_opts.cache_server_mode == + GH__CACHE_SERVER_MODE__TRUST_WITHOUT_VERIFY) { + gh__global.cache_server_url = gvfs_cache_server_url; + trace2_data_string(TR2_CAT, NULL, "cache/url", + gvfs_cache_server_url); + return; + } + + /* + * GVFS cache-servers use the main Git server's creds rather + * than having their own creds. This feels like a security + * hole. For example, if the cache-server URL is pointed to a + * bad site, we'll happily send them our creds to the main Git + * server with each request to the cache-server. This would + * allow an attacker to later use our creds to impersonate us + * on the main Git server. + * + * So we optionally verify that the URL to the cache-server is + * well-known by the main Git server. + */ + + do__http_get__gvfs_config(&status, &config_data); + + if (status.ec == GH__ERROR_CODE__OK) { + /* + * The gvfs/config response is in JSON, but I don't think + * we need to parse it and all that. Lets just do a simple + * strstr() and assume it is sufficient. + * + * We do add some context to the pattern to guard against + * some attacks. + */ + struct strbuf pattern = STRBUF_INIT; + + strbuf_addf(&pattern, "\"Url\":\"%s\"", gvfs_cache_server_url); + match = strstr(config_data.buf, pattern.buf); + + strbuf_release(&pattern); + } + + strbuf_release(&config_data); + + if (match) { + gh__global.cache_server_url = gvfs_cache_server_url; + trace2_data_string(TR2_CAT, NULL, "cache/url", + gvfs_cache_server_url); + } + + else if (gh__cmd_opts.cache_server_mode == + GH__CACHE_SERVER_MODE__VERIFY_ERROR) { + if (status.ec != GH__ERROR_CODE__OK) + die("could not verify cache-server '%s': %s", + gvfs_cache_server_url, + status.error_message.buf); + else + die("could not verify cache-server '%s'", + gvfs_cache_server_url); + } + + else if (gh__cmd_opts.cache_server_mode == + GH__CACHE_SERVER_MODE__VERIFY_DISABLE) { + if (status.ec != GH__ERROR_CODE__OK) + warning("could not verify cache-server '%s': %s", + gvfs_cache_server_url, + status.error_message.buf); + else + warning("could not verify cache-server '%s'", + gvfs_cache_server_url); + trace2_data_string(TR2_CAT, NULL, "cache/url", + "disabled"); + } + + gh__response_status__release(&status); +} + +/* + * Read stdin until EOF (or a blank line) and add the desired OIDs + * to the oidset. + * + * Stdin should contain a list of OIDs. Lines may have additional + * text following the OID that we ignore. + */ +static unsigned long read_stdin_for_oids(struct oidset *oids) +{ + struct object_id oid; + struct strbuf buf_stdin = STRBUF_INIT; + unsigned long count = 0; + + do { + if (strbuf_getline(&buf_stdin, stdin) == EOF || !buf_stdin.len) + break; + + if (get_oid_hex(buf_stdin.buf, &oid)) + continue; /* just silently eat it */ + + if (!oidset_insert(oids, &oid)) + count++; + } while (1); + + return count; +} + +/* + * Build a complete JSON payload for a gvfs/objects POST request + * containing the first `nr_in_block` OIDs found in the OIDSET + * indexed by the given iterator. + * + * https://github.com/microsoft/VFSForGit/blob/master/Protocol.md + * + * Return the number of OIDs we actually put into the payload. + * If only 1 OID was found, also return it. + */ +static unsigned long build_json_payload__gvfs_objects( + struct json_writer *jw_req, + struct oidset_iter *iter, + unsigned long nr_in_block, + struct object_id *oid_out) +{ + unsigned long k; + const struct object_id *oid; + const struct object_id *oid_prev = NULL; + + k = 0; + + jw_init(jw_req); + jw_object_begin(jw_req, 0); + jw_object_intmax(jw_req, "commitDepth", gh__cmd_opts.depth); + jw_object_inline_begin_array(jw_req, "objectIds"); + while (k < nr_in_block && (oid = oidset_iter_next(iter))) { + jw_array_string(jw_req, oid_to_hex(oid)); + k++; + oid_prev = oid; + } + jw_end(jw_req); + jw_end(jw_req); + + if (oid_out) { + if (k == 1) + oidcpy(oid_out, oid_prev); + else + oidclr(oid_out); + } + + return k; +} + +/* + * Lookup the creds for the main/origin Git server. + */ +static void lookup_main_creds(void) +{ + if (gh__global.main_creds.username && *gh__global.main_creds.username) + return; + + credential_from_url(&gh__global.main_creds, gh__global.main_url); + credential_fill(&gh__global.main_creds); + gh__global.main_creds_need_approval = 1; +} + +/* + * If we have a set of creds for the main Git server, tell the credential + * manager to throw them away and ask it to reacquire them. + */ +static void refresh_main_creds(void) +{ + if (gh__global.main_creds.username && *gh__global.main_creds.username) + credential_reject(&gh__global.main_creds); + + lookup_main_creds(); + + // TODO should we compare before and after values of u/p and + // TODO shortcut reauth if we already know it will fail? + // TODO if so, return a bool if same/different. +} + +static void approve_main_creds(void) +{ + if (!gh__global.main_creds_need_approval) + return; + + credential_approve(&gh__global.main_creds); + gh__global.main_creds_need_approval = 0; +} + +/* + * Build a set of creds for the cache-server based upon the main Git + * server (assuming we have a cache-server configured). + * + * That is, we NEVER fill them directly for the cache-server -- we + * only synthesize them from the filled main creds. + */ +static void synthesize_cache_server_creds(void) +{ + if (!gh__global.cache_server_is_initialized) + BUG("sub-command did not initialize cache-server vars"); + + if (!gh__global.cache_server_url) + return; + + if (gh__global.cache_creds.username && *gh__global.cache_creds.username) + return; + + /* + * Get the main Git server creds so we can borrow the username + * and password when we talk to the cache-server. + */ + lookup_main_creds(); + gh__global.cache_creds.username = xstrdup(gh__global.main_creds.username); + gh__global.cache_creds.password = xstrdup(gh__global.main_creds.password); +} + +/* + * Flush and refresh the cache-server creds. Because the cache-server + * does not do 401s (or manage creds), we have to reload the main Git + * server creds first. + * + * That is, we NEVER reject them directly because we never filled them. + */ +static void refresh_cache_server_creds(void) +{ + credential_clear(&gh__global.cache_creds); + + refresh_main_creds(); + synthesize_cache_server_creds(); +} + +/* + * We NEVER approve cache-server creds directly because we never directly + * filled them. However, we should be able to infer that the main ones + * are valid and can approve them if necessary. + */ +static void approve_cache_server_creds(void) +{ + approve_main_creds(); +} + +/* + * Get the pathname to the ODB where we write objects that we download. + */ +static void select_odb(void) +{ + prepare_alt_odb(the_repository); + + strbuf_init(&gh__global.buf_odb_path, 0); + + if (gvfs_shared_cache_pathname.len) + strbuf_addbuf(&gh__global.buf_odb_path, + &gvfs_shared_cache_pathname); + else + strbuf_addstr(&gh__global.buf_odb_path, + the_repository->objects->odb->path); +} + +/* + * Create a unique tempfile or tempfile-pair inside the + * tempPacks directory. + */ +static void my_create_tempfile( + struct gh__response_status *status, + int b_fdopen, + const char *suffix1, struct tempfile **t1, + const char *suffix2, struct tempfile **t2) +{ + static unsigned int nth = 0; + static struct timeval tv = {0}; + static struct tm tm = {0}; + static time_t secs = 0; + static char date[32] = {0}; + + struct strbuf basename = STRBUF_INIT; + struct strbuf buf = STRBUF_INIT; + int len_tp; + enum scld_error scld; + int retries; + + gh__response_status__zero(status); + + if (!nth) { + /* + * Create a unique string to use in the name of all + * tempfiles created by this process. + */ + gettimeofday(&tv, NULL); + secs = tv.tv_sec; + gmtime_r(&secs, &tm); + + xsnprintf(date, sizeof(date), "%4d%02d%02d-%02d%02d%02d-%06ld", + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec, + (long)tv.tv_usec); + } + + /* + * Create a for this instance/pair using a series + * number . + */ + strbuf_addf(&basename, "t-%s-%04d", date, nth++); + + if (!suffix1 || !*suffix1) + suffix1 = "temp"; + + /* + * Create full pathname as: + * + * "/pack/tempPacks/." + */ + strbuf_setlen(&buf, 0); + strbuf_addbuf(&buf, &gh__global.buf_odb_path); + strbuf_complete(&buf, '/'); + strbuf_addstr(&buf, "pack/tempPacks/"); + len_tp = buf.len; + strbuf_addf( &buf, "%s.%s", basename.buf, suffix1); + + scld = safe_create_leading_directories(buf.buf); + if (scld != SCLD_OK && scld != SCLD_EXISTS) { + strbuf_addf(&status->error_message, + "could not create directory for tempfile: '%s'", + buf.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + goto cleanup; + } + + retries = 0; + *t1 = create_tempfile(buf.buf); + while (!*t1 && retries < 5) { + retries++; + strbuf_setlen(&buf, len_tp); + strbuf_addf(&buf, "%s-%d.%s", basename.buf, retries, suffix1); + *t1 = create_tempfile(buf.buf); + } + + if (!*t1) { + strbuf_addf(&status->error_message, + "could not create tempfile: '%s'", + buf.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + goto cleanup; + } + if (b_fdopen) + fdopen_tempfile(*t1, "w"); + + /* + * Optionally create a peer tempfile with the same basename. + * (This is useful for prefetching .pack and .idx pairs.) + * + * "/pack/tempPacks/." + */ + if (suffix2 && *suffix2 && t2) { + strbuf_setlen(&buf, len_tp); + strbuf_addf( &buf, "%s.%s", basename.buf, suffix2); + + *t2 = create_tempfile(buf.buf); + while (!*t2 && retries < 5) { + retries++; + strbuf_setlen(&buf, len_tp); + strbuf_addf(&buf, "%s-%d.%s", basename.buf, retries, suffix2); + *t2 = create_tempfile(buf.buf); + } + + if (!*t2) { + strbuf_addf(&status->error_message, + "could not create tempfile: '%s'", + buf.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_CREATE_TEMPFILE; + goto cleanup; + } + if (b_fdopen) + fdopen_tempfile(*t2, "w"); + } + +cleanup: + strbuf_release(&buf); + strbuf_release(&basename); +} + +/* + * Create pathnames to the final location of the .pack and .idx + * files in the ODB. These are of the form: + * + * "/pack/-[-]." + * + * For example, for prefetch packs, will be the epoch + * timestamp and will be the packfile hash. + */ +static void create_final_packfile_pathnames( + const char *term_1, const char *term_2, const char *term_3, + struct strbuf *pack_path, struct strbuf *idx_path, + struct strbuf *pack_filename) +{ + struct strbuf base = STRBUF_INIT; + struct strbuf path = STRBUF_INIT; + + if (term_3 && *term_3) + strbuf_addf(&base, "%s-%s-%s", term_1, term_2, term_3); + else + strbuf_addf(&base, "%s-%s", term_1, term_2); + + strbuf_setlen(pack_filename, 0); + strbuf_addf( pack_filename, "%s.pack", base.buf); + + strbuf_addbuf(&path, &gh__global.buf_odb_path); + strbuf_complete(&path, '/'); + strbuf_addstr(&path, "pack/"); + + strbuf_setlen(pack_path, 0); + strbuf_addbuf(pack_path, &path); + strbuf_addf( pack_path, "%s.pack", base.buf); + + strbuf_setlen(idx_path, 0); + strbuf_addbuf(idx_path, &path); + strbuf_addf( idx_path, "%s.idx", base.buf); + + strbuf_release(&base); + strbuf_release(&path); +} + +/* + * Create a pathname to the loose object in the shared-cache ODB + * with the given OID. Try to "mkdir -p" to ensure the parent + * directories exist. + */ +static int create_loose_pathname_in_odb(struct strbuf *buf_path, + const struct object_id *oid) +{ + enum scld_error scld; + const char *hex; + + hex = oid_to_hex(oid); + + strbuf_setlen(buf_path, 0); + strbuf_addbuf(buf_path, &gh__global.buf_odb_path); + strbuf_complete(buf_path, '/'); + strbuf_add(buf_path, hex, 2); + strbuf_addch(buf_path, '/'); + strbuf_addstr(buf_path, hex+2); + + scld = safe_create_leading_directories(buf_path->buf); + if (scld != SCLD_OK && scld != SCLD_EXISTS) + return -1; + + return 0; +} + +static void my_run_index_pack(struct gh__request_params *params, + struct gh__response_status *status, + const struct strbuf *temp_path_pack, + const struct strbuf *temp_path_idx, + struct strbuf *packfile_checksum) +{ + struct child_process ip = CHILD_PROCESS_INIT; + struct strbuf ip_stdout = STRBUF_INIT; + + strvec_push(&ip.args, "git"); + strvec_push(&ip.args, "index-pack"); + + if (gh__cmd_opts.show_progress) { + strvec_push(&ip.args, "-v"); + ip.err = 0; + } else { + ip.err = -1; + ip.no_stderr = 1; + } + + strvec_pushl(&ip.args, "-o", temp_path_idx->buf, NULL); + strvec_push(&ip.args, temp_path_pack->buf); + ip.no_stdin = 1; + ip.out = -1; + + if (pipe_command(&ip, NULL, 0, &ip_stdout, 0, NULL, 0)) { + unlink(temp_path_pack->buf); + unlink(temp_path_idx->buf); + strbuf_addf(&status->error_message, + "index-pack failed on '%s'", + temp_path_pack->buf); + /* + * Lets assume that index-pack failed because the + * downloaded file is corrupt (truncated). + * + * Retry it as if the network had dropped. + */ + status->retry = GH__RETRY_MODE__TRANSIENT; + status->ec = GH__ERROR_CODE__INDEX_PACK_FAILED; + goto cleanup; + } + + if (packfile_checksum) { + /* + * stdout from index-pack should have the packfile hash. + * Extract it and use it in the final packfile name. + * + * TODO What kind of validation should we do on the + * TODO string and is there ever any other output besides + * TODO just the checksum ? + */ + strbuf_trim_trailing_newline(&ip_stdout); + + strbuf_addbuf(packfile_checksum, &ip_stdout); + } + +cleanup: + strbuf_release(&ip_stdout); + child_process_clear(&ip); +} + +static void my_finalize_packfile(struct gh__request_params *params, + struct gh__response_status *status, + int b_keep, + const struct strbuf *temp_path_pack, + const struct strbuf *temp_path_idx, + struct strbuf *final_path_pack, + struct strbuf *final_path_idx, + struct strbuf *final_filename) +{ + /* + * Install the .pack and .idx into the ODB pack directory. + * + * We might be racing with other instances of gvfs-helper if + * we, in parallel, both downloaded the exact same packfile + * (with the same checksum SHA) and try to install it at the + * same time. This might happen on Windows where the loser + * can get an EBUSY or EPERM trying to move/rename the + * tempfile into the pack dir, for example. + * + * So, we always install the .pack before the .idx for + * consistency. And only if *WE* created the .pack and .idx + * files, do we create the matching .keep (when requested). + * + * If we get an error and the target files already exist, we + * silently eat the error. Note that finalize_object_file() + * has already munged errno (and it has various creation + * strategies), so we don't bother looking at it. + */ + if (finalize_object_file(temp_path_pack->buf, final_path_pack->buf) || + finalize_object_file(temp_path_idx->buf, final_path_idx->buf)) { + unlink(temp_path_pack->buf); + unlink(temp_path_idx->buf); + + if (file_exists(final_path_pack->buf) && + file_exists(final_path_idx->buf)) { + trace2_printf("%s: assuming ok for %s", TR2_CAT, final_path_pack->buf); + goto assume_ok; + } + + strbuf_addf(&status->error_message, + "could not install packfile '%s'", + final_path_pack->buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; + return; + } + + if (b_keep) { + struct strbuf keep = STRBUF_INIT; + int fd_keep; + + strbuf_addbuf(&keep, final_path_pack); + strbuf_strip_suffix(&keep, ".pack"); + strbuf_addstr(&keep, ".keep"); + + fd_keep = xopen(keep.buf, O_WRONLY | O_CREAT | O_TRUNC, 0666); + if (fd_keep >= 0) + close(fd_keep); + + strbuf_release(&keep); + } + +assume_ok: + if (params->result_list) { + struct strbuf result_msg = STRBUF_INIT; + + strbuf_addf(&result_msg, "packfile %s", final_filename->buf); + string_list_append(params->result_list, result_msg.buf); + strbuf_release(&result_msg); + } +} + +/* + * Convert the tempfile into a temporary .pack, index it into a temporary .idx + * file, and then install the pair into ODB. + */ +static void install_packfile(struct gh__request_params *params, + struct gh__response_status *status) +{ + struct strbuf temp_path_pack = STRBUF_INIT; + struct strbuf temp_path_idx = STRBUF_INIT; + struct strbuf packfile_checksum = STRBUF_INIT; + struct strbuf final_path_pack = STRBUF_INIT; + struct strbuf final_path_idx = STRBUF_INIT; + struct strbuf final_filename = STRBUF_INIT; + + gh__response_status__zero(status); + + /* + * After the download is complete, we will need to steal the file + * from the tempfile() class (so that it doesn't magically delete + * it when we close the file handle) and then index it. + */ + strbuf_addf(&temp_path_pack, "%s.pack", + get_tempfile_path(params->tempfile)); + strbuf_addf(&temp_path_idx, "%s.idx", + get_tempfile_path(params->tempfile)); + + if (rename_tempfile(¶ms->tempfile, + temp_path_pack.buf) == -1) { + strbuf_addf(&status->error_message, + "could not rename packfile to '%s'", + temp_path_pack.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PACKFILE; + goto cleanup; + } + + my_run_index_pack(params, status, &temp_path_pack, &temp_path_idx, + &packfile_checksum); + if (status->ec != GH__ERROR_CODE__OK) + goto cleanup; + + create_final_packfile_pathnames("vfs", packfile_checksum.buf, NULL, + &final_path_pack, &final_path_idx, + &final_filename); + my_finalize_packfile(params, status, 0, + &temp_path_pack, &temp_path_idx, + &final_path_pack, &final_path_idx, + &final_filename); + +cleanup: + strbuf_release(&temp_path_pack); + strbuf_release(&temp_path_idx); + strbuf_release(&packfile_checksum); + strbuf_release(&final_path_pack); + strbuf_release(&final_path_idx); + strbuf_release(&final_filename); +} + +/* + * bswap.h only defines big endian functions. + * The GVFS Protocol defines fields in little endian. + */ +static inline uint64_t my_get_le64(uint64_t le_val) +{ +#if GIT_BYTE_ORDER == GIT_LITTLE_ENDIAN + return le_val; +#else + return default_bswap64(le_val); +#endif +} + +#define MY_MIN(x,y) (((x) < (y)) ? (x) : (y)) +#define MY_MAX(x,y) (((x) > (y)) ? (x) : (y)) + +/* + * Copy the `nr_bytes_total` from `fd_in` to `fd_out`. + * + * This could be used to extract a single packfile from + * a multipart file, for example. + */ +static int my_copy_fd_len(int fd_in, int fd_out, ssize_t nr_bytes_total) +{ + char buffer[8192]; + + while (nr_bytes_total > 0) { + ssize_t len_to_read = MY_MIN(nr_bytes_total, sizeof(buffer)); + ssize_t nr_read = xread(fd_in, buffer, len_to_read); + + if (!nr_read) + break; + if (nr_read < 0) + return -1; + + if (write_in_full(fd_out, buffer, nr_read) < 0) + return -1; + + nr_bytes_total -= nr_read; + } + + return 0; +} + +/* + * Copy the `nr_bytes_total` from `fd_in` to `fd_out` AND save the + * final `tail_len` bytes in the given buffer. + * + * This could be used to extract a single packfile from + * a multipart file and read the final SHA into the buffer. + */ +static int my_copy_fd_len_tail(int fd_in, int fd_out, ssize_t nr_bytes_total, + unsigned char *buf_tail, ssize_t tail_len) +{ + memset(buf_tail, 0, tail_len); + + if (my_copy_fd_len(fd_in, fd_out, nr_bytes_total) < 0) + return -1; + + if (nr_bytes_total < tail_len) + return 0; + + /* Reset the position to read the tail */ + lseek(fd_in, -tail_len, SEEK_CUR); + + if (xread(fd_in, (char *)buf_tail, tail_len) != tail_len) + return -1; + + return 0; +} + +/* + * See the protocol document for the per-packfile header. + */ +struct ph { + uint64_t timestamp; + uint64_t pack_len; + uint64_t idx_len; +}; + +/* + * Extract the next packfile from the multipack. + * Install {.pack, .idx, .keep} set. + * + * Mark each successfully installed prefetch pack as .keep it as installed + * in case we have errors decoding/indexing later packs within the received + * multipart file. (A later pass can delete the unnecessary .keep files + * from this and any previous invocations.) + */ +static void extract_packfile_from_multipack( + struct gh__request_params *params, + struct gh__response_status *status, + int fd_multipack, + unsigned short k) +{ + struct ph ph; + struct tempfile *tempfile_pack = NULL; + struct tempfile *tempfile_idx = NULL; + int result = -1; + int b_no_idx_in_multipack; + struct object_id packfile_checksum; + char hex_checksum[GIT_MAX_HEXSZ + 1]; + struct strbuf buf_timestamp = STRBUF_INIT; + struct strbuf temp_path_pack = STRBUF_INIT; + struct strbuf temp_path_idx = STRBUF_INIT; + struct strbuf final_path_pack = STRBUF_INIT; + struct strbuf final_path_idx = STRBUF_INIT; + struct strbuf final_filename = STRBUF_INIT; + + if (xread(fd_multipack, &ph, sizeof(ph)) != sizeof(ph)) { + strbuf_addf(&status->error_message, + "could not read header for packfile[%d] in multipack", + k); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; + goto done; + } + + ph.timestamp = my_get_le64(ph.timestamp); + ph.pack_len = my_get_le64(ph.pack_len); + ph.idx_len = my_get_le64(ph.idx_len); + + if (!ph.pack_len) { + strbuf_addf(&status->error_message, + "packfile[%d]: zero length packfile?", k); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; + goto done; + } + + b_no_idx_in_multipack = (ph.idx_len == maximum_unsigned_value_of_type(uint64_t) || + ph.idx_len == 0); + + if (b_no_idx_in_multipack) { + my_create_tempfile(status, 0, "pack", &tempfile_pack, NULL, NULL); + if (!tempfile_pack) + goto done; + } else { + /* create a pair of tempfiles with the same basename */ + my_create_tempfile(status, 0, "pack", &tempfile_pack, "idx", &tempfile_idx); + if (!tempfile_pack || !tempfile_idx) + goto done; + } + + /* + * Copy the current packfile from the open stream and capture + * the checksum. + * + * TODO This assumes that the checksum is SHA1. Fix this if/when + * TODO Git converts to SHA256. + */ + result = my_copy_fd_len_tail(fd_multipack, + get_tempfile_fd(tempfile_pack), + ph.pack_len, + packfile_checksum.hash, + GIT_SHA1_RAWSZ); + packfile_checksum.algo = GIT_HASH_SHA1; + + if (result < 0){ + strbuf_addf(&status->error_message, + "could not extract packfile[%d] from multipack", + k); + goto done; + } + strbuf_addstr(&temp_path_pack, get_tempfile_path(tempfile_pack)); + close_tempfile_gently(tempfile_pack); + + oid_to_hex_r(hex_checksum, &packfile_checksum); + + if (b_no_idx_in_multipack) { + /* + * The server did not send the corresponding .idx, so + * we have to compute it ourselves. + */ + strbuf_addbuf(&temp_path_idx, &temp_path_pack); + strbuf_strip_suffix(&temp_path_idx, ".pack"); + strbuf_addstr(&temp_path_idx, ".idx"); + + my_run_index_pack(params, status, + &temp_path_pack, &temp_path_idx, + NULL); + if (status->ec != GH__ERROR_CODE__OK) + goto done; + + } else { + /* + * Server sent the .idx immediately after the .pack in the + * data stream. I'm tempted to verify it, but that defeats + * the purpose of having it cached... + */ + if (my_copy_fd_len(fd_multipack, get_tempfile_fd(tempfile_idx), + ph.idx_len) < 0) { + strbuf_addf(&status->error_message, + "could not extract index[%d] in multipack", + k); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; + goto done; + } + + strbuf_addstr(&temp_path_idx, get_tempfile_path(tempfile_idx)); + close_tempfile_gently(tempfile_idx); + } + + strbuf_addf(&buf_timestamp, "%u", (unsigned int)ph.timestamp); + create_final_packfile_pathnames("prefetch", buf_timestamp.buf, hex_checksum, + &final_path_pack, &final_path_idx, + &final_filename); + + my_finalize_packfile(params, status, 1, + &temp_path_pack, &temp_path_idx, + &final_path_pack, &final_path_idx, + &final_filename); + +done: + delete_tempfile(&tempfile_pack); + delete_tempfile(&tempfile_idx); + strbuf_release(&temp_path_pack); + strbuf_release(&temp_path_idx); + strbuf_release(&final_path_pack); + strbuf_release(&final_path_idx); + strbuf_release(&final_filename); +} + +struct keep_files_data { + timestamp_t max_timestamp; + int pos_of_max; + struct string_list *keep_files; +}; + +static void cb_keep_files(const char *full_path, size_t full_path_len, + const char *file_path, void *void_data) +{ + struct keep_files_data *data = void_data; + const char *val; + timestamp_t t; + + /* + * We expect prefetch packfiles named like: + * + * prefetch--.keep + */ + if (!skip_prefix(file_path, "prefetch-", &val)) + return; + if (!ends_with(val, ".keep")) + return; + + t = strtol(val, NULL, 10); + if (t > data->max_timestamp) { + data->pos_of_max = data->keep_files->nr; + data->max_timestamp = t; + } + + string_list_append(data->keep_files, full_path); +} + +static void delete_stale_keep_files( + struct gh__request_params *params, + struct gh__response_status *status) +{ + struct string_list keep_files = STRING_LIST_INIT_DUP; + struct keep_files_data data = { 0, 0, &keep_files }; + int k; + + for_each_file_in_pack_dir(gh__global.buf_odb_path.buf, + cb_keep_files, &data); + for (k = 0; k < keep_files.nr; k++) { + if (k != data.pos_of_max) + unlink(keep_files.items[k].string); + } + + string_list_clear(&keep_files, 0); +} + +/* + * Cut apart the received multipart response into individual packfiles + * and install each one. + */ +static void install_prefetch(struct gh__request_params *params, + struct gh__response_status *status) +{ + static unsigned char v1_h[6] = { 'G', 'P', 'R', 'E', ' ', 0x01 }; + + struct mh { + unsigned char h[6]; + unsigned char np[2]; + }; + + struct mh mh; + unsigned short np; + unsigned short k; + int fd = -1; + int nr_installed = 0; + + struct strbuf temp_path_mp = STRBUF_INIT; + + /* + * Steal the multi-part file from the tempfile class. + */ + strbuf_addf(&temp_path_mp, "%s.mp", get_tempfile_path(params->tempfile)); + if (rename_tempfile(¶ms->tempfile, temp_path_mp.buf) == -1) { + strbuf_addf(&status->error_message, + "could not rename prefetch tempfile to '%s'", + temp_path_mp.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; + goto cleanup; + } + + fd = git_open_cloexec(temp_path_mp.buf, O_RDONLY); + if (fd == -1) { + strbuf_addf(&status->error_message, + "could not reopen prefetch tempfile '%s'", + temp_path_mp.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_PREFETCH; + goto cleanup; + } + + if ((xread(fd, &mh, sizeof(mh)) != sizeof(mh)) || + (memcmp(mh.h, &v1_h, sizeof(mh.h)))) { + strbuf_addstr(&status->error_message, + "invalid prefetch multipart header"); + goto cleanup; + } + + np = (unsigned short)mh.np[0] + ((unsigned short)mh.np[1] << 8); + if (np) + trace2_data_intmax(TR2_CAT, NULL, + "prefetch/packfile_count", np); + + for (k = 0; k < np; k++) { + extract_packfile_from_multipack(params, status, fd, k); + if (status->ec != GH__ERROR_CODE__OK) + break; + nr_installed++; + } + + if (nr_installed) + delete_stale_keep_files(params, status); + +cleanup: + if (fd != -1) + close(fd); + + unlink(temp_path_mp.buf); + strbuf_release(&temp_path_mp); +} + +/* + * Wrapper for read_loose_object() to read and verify the hash of a + * loose object, and discard the contents buffer. + * + * Returns 0 on success, negative on error (details may be written to stderr). + */ +static int verify_loose_object(const char *path, + const struct object_id *expected_oid) +{ + enum object_type type; + void *contents = NULL; + unsigned long size; + struct strbuf type_name = STRBUF_INIT; + int ret; + struct object_info oi = OBJECT_INFO_INIT; + struct object_id real_oid = *null_oid(); + oi.typep = &type; + oi.sizep = &size; + oi.type_name = &type_name; + + ret = read_loose_object(path, expected_oid, &real_oid, &contents, &oi); + if (!ret) + free(contents); + + return ret; +} + +/* + * Convert the tempfile into a permanent loose object in the ODB. + */ +static void install_loose(struct gh__request_params *params, + struct gh__response_status *status) +{ + struct strbuf tmp_path = STRBUF_INIT; + struct strbuf loose_path = STRBUF_INIT; + + gh__response_status__zero(status); + + /* + * close tempfile to steal ownership away from tempfile class. + */ + strbuf_addstr(&tmp_path, get_tempfile_path(params->tempfile)); + close_tempfile_gently(params->tempfile); + + /* + * Compute the hash of the received content (while it is still + * in a temp file) and verify that it matches the OID that we + * requested and was not corrupted. + */ + if (verify_loose_object(tmp_path.buf, ¶ms->loose_oid)) { + strbuf_addf(&status->error_message, + "hash failed for received loose object '%s'", + oid_to_hex(¶ms->loose_oid)); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE; + goto cleanup; + } + + /* + * Try to install the tempfile as the actual loose object. + * + * If the loose object already exists, finalize_object_file() + * will NOT overwrite/replace it. It will silently eat the + * EEXIST error and unlink the tempfile as it if was + * successful. We just let it lie to us. + * + * Since our job is to back-fill missing objects needed by a + * foreground git process -- git should have called + * oid_object_info_extended() and loose_object_info() BEFORE + * asking us to download the missing object. So if we get a + * collision we have to assume something else is happening in + * parallel and we lost the race. And that's OK. + */ + if (create_loose_pathname_in_odb(&loose_path, ¶ms->loose_oid)) { + strbuf_addf(&status->error_message, + "cannot create directory for loose object '%s'", + loose_path.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE; + goto cleanup; + } + + if (finalize_object_file(tmp_path.buf, loose_path.buf)) { + unlink(tmp_path.buf); + strbuf_addf(&status->error_message, + "could not install loose object '%s'", + loose_path.buf); + status->ec = GH__ERROR_CODE__COULD_NOT_INSTALL_LOOSE; + goto cleanup; + } + + if (params->result_list) { + struct strbuf result_msg = STRBUF_INIT; + + strbuf_addf(&result_msg, "loose %s", + oid_to_hex(¶ms->loose_oid)); + string_list_append(params->result_list, result_msg.buf); + strbuf_release(&result_msg); + } + +cleanup: + strbuf_release(&tmp_path); + strbuf_release(&loose_path); +} + +static void install_result(struct gh__request_params *params, + struct gh__response_status *status) +{ + if (params->objects_mode == GH__OBJECTS_MODE__PREFETCH) { + /* + * The "gvfs/prefetch" API is the only thing that sends + * these multi-part packfiles. According to the protocol + * documentation, they will have this x- content type. + * + * However, it appears that there is a BUG in the origin + * server causing it to sometimes send "text/html" instead. + * So, we silently handle both. + */ + if (!strcmp(status->content_type.buf, + "application/x-gvfs-timestamped-packfiles-indexes")) { + install_prefetch(params, status); + return; + } + + if (!strcmp(status->content_type.buf, "text/html")) { + install_prefetch(params, status); + return; + } + } else { + if (!strcmp(status->content_type.buf, "application/x-git-packfile")) { + assert(params->b_is_post); + assert(params->objects_mode == GH__OBJECTS_MODE__POST); + + install_packfile(params, status); + return; + } + + if (!strcmp(status->content_type.buf, + "application/x-git-loose-object")) { + /* + * We get these for "gvfs/objects" GET and POST requests. + * + * Note that this content type is singular, not plural. + */ + install_loose(params, status); + return; + } + } + + strbuf_addf(&status->error_message, + "install_result: received unknown content-type '%s'", + status->content_type.buf); + status->ec = GH__ERROR_CODE__UNEXPECTED_CONTENT_TYPE; +} + +/* + * Our wrapper to initialize the HTTP layer. + * + * We always use the real origin server, not the cache-server, when + * initializing the http/curl layer. + */ +static void gh_http_init(void) +{ + if (gh__global.http_is_initialized) + return; + + http_init(gh__global.remote, gh__global.main_url, 0); + gh__global.http_is_initialized = 1; +} + +static void gh_http_cleanup(void) +{ + if (!gh__global.http_is_initialized) + return; + + http_cleanup(); + gh__global.http_is_initialized = 0; +} + +/* + * buffer has ": [\r]\n" + */ +static void parse_resp_hdr_1(const char *buffer, size_t size, size_t nitems, + struct strbuf *key, struct strbuf *value) +{ + const char *end = buffer + (size * nitems); + const char *p; + + p = strchr(buffer, ':'); + + strbuf_setlen(key, 0); + strbuf_add(key, buffer, (p - buffer)); + + p++; /* skip ':' */ + p++; /* skip ' ' */ + + strbuf_setlen(value, 0); + strbuf_add(value, p, (end - p)); + strbuf_trim_trailing_newline(value); +} + +static size_t parse_resp_hdr(char *buffer, size_t size, size_t nitems, + void *void_params) +{ + struct gh__request_params *params = void_params; + struct gh__azure_throttle *azure = &gh__global_throttle[params->server_type]; + + if (starts_with(buffer, "X-RateLimit-")) { + struct strbuf key = STRBUF_INIT; + struct strbuf val = STRBUF_INIT; + + parse_resp_hdr_1(buffer, size, nitems, &key, &val); + + /* + * The following X- headers are specific to AzureDevOps. + * Other servers have similar sets of values, but I haven't + * compared them in depth. + */ + // trace2_printf("%s: Throttle: %s %s", TR2_CAT, key.buf, val.buf); + + if (!strcmp(key.buf, "X-RateLimit-Resource")) { + /* + * The name of the resource that is complaining. + * Just log it because we can't do anything with it. + */ + strbuf_setlen(&key, 0); + strbuf_addstr(&key, "ratelimit/resource"); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + trace2_data_string(TR2_CAT, NULL, key.buf, val.buf); + } + + else if (!strcmp(key.buf, "X-RateLimit-Delay")) { + /* + * The amount of delay added to our response. + * Just log it because we can't do anything with it. + */ + unsigned long tarpit_delay_ms; + + strbuf_setlen(&key, 0); + strbuf_addstr(&key, "ratelimit/delay_ms"); + strbuf_addstr(&key, gh__server_type_label[params->server_type]); + + git_parse_ulong(val.buf, &tarpit_delay_ms); + + trace2_data_intmax(TR2_CAT, NULL, key.buf, tarpit_delay_ms); + } + + else if (!strcmp(key.buf, "X-RateLimit-Limit")) { + /* + * The resource limit/quota before we get a 429. + */ + git_parse_ulong(val.buf, &azure->tstu_limit); + } + + else if (!strcmp(key.buf, "X-RateLimit-Remaining")) { + /* + * The amount of our quota remaining. When zero, we + * should get 429s on futher requests until the reset + * time. + */ + git_parse_ulong(val.buf, &azure->tstu_remaining); + } + + else if (!strcmp(key.buf, "X-RateLimit-Reset")) { + /* + * The server gave us a time-in-seconds-since-the-epoch + * for when our quota will be reset (if we stop all + * activity right now). + * + * Checkpoint the local system clock so we can do some + * sanity checks on any clock skew. Also, since we get + * the headers before we get the content, we can adjust + * our delay to compensate for the full download time. + */ + unsigned long now = time(NULL); + unsigned long reset_time; + + git_parse_ulong(val.buf, &reset_time); + if (reset_time > now) + azure->reset_sec = reset_time - now; + } + + strbuf_release(&key); + strbuf_release(&val); + } + + else if (starts_with(buffer, "Retry-After")) { + struct strbuf key = STRBUF_INIT; + struct strbuf val = STRBUF_INIT; + + parse_resp_hdr_1(buffer, size, nitems, &key, &val); + + /* + * We get this header with a 429 and 503 and possibly a 30x. + * + * Curl does have CURLINFO_RETRY_AFTER that nicely parses and + * normalizes the value (and supports HTTP/1.1 usage), but it + * is not present yet in the version shipped with the Mac, so + * we do it directly here. + */ + git_parse_ulong(val.buf, &azure->retry_after_sec); + + strbuf_release(&key); + strbuf_release(&val); + } + + else if (starts_with(buffer, "X-VSS-E2EID")) { + struct strbuf key = STRBUF_INIT; + + /* + * Capture the E2EID as it goes by, but don't log it until we + * know the request result. + */ + parse_resp_hdr_1(buffer, size, nitems, &key, ¶ms->e2eid); + + strbuf_release(&key); + } + + return nitems * size; +} + +/* + * Wait "duration" seconds and drive the progress mechanism. + * + * We spin slightly faster than we need to to keep the progress bar + * drawn (especially if the user presses return while waiting) and to + * compensate for delay factors built into the progress class (which + * might wait for 2 seconds before drawing the first message). + */ +static void do_throttle_spin(struct gh__request_params *params, + const char *tr2_label, + const char *progress_msg, + int duration) +{ + struct strbuf region = STRBUF_INIT; + struct progress *progress = NULL; + unsigned long begin = time(NULL); + unsigned long now = begin; + unsigned long end = begin + duration; + + strbuf_addstr(®ion, tr2_label); + strbuf_addstr(®ion, gh__server_type_label[params->server_type]); + trace2_region_enter(TR2_CAT, region.buf, NULL); + + if (gh__cmd_opts.show_progress) + progress = start_progress(progress_msg, duration); + + while (now < end) { + display_progress(progress, (now - begin)); + + sleep_millisec(100); + + now = time(NULL); + } + + display_progress(progress, duration); + stop_progress(&progress); + + trace2_region_leave(TR2_CAT, region.buf, NULL); + strbuf_release(®ion); +} + +/* + * Delay the outbound request if necessary in response to previous throttle + * blockages or hints. Throttle data is somewhat orthogonal to the status + * results from any previous request and/or the request params of the next + * request. + * + * Note that the throttle info also is cross-process information, such as + * 2 concurrent fetches in 2 different terminal windows to the same server + * will be sharing the same server quota. These could be coordinated too, + * so that a blockage received in one process would prevent the other + * process from starting another request (and also blocked or extending + * the delay interval). We're NOT going to do that level of integration. + * We will let both processes independently attempt the next request. + * This may cause us to miss the end-of-quota boundary if the server + * extends it because of the second request. + * + * TODO Should we have a max-wait option and then return a hard-error + * TODO of some type? + */ +static void do_throttle_wait(struct gh__request_params *params, + struct gh__response_status *status) +{ + struct gh__azure_throttle *azure = + &gh__global_throttle[params->server_type]; + + if (azure->retry_after_sec) { + /* + * We were given a hard delay (such as after a 429). + * Spin until the requested time. + */ + do_throttle_spin(params, "throttle/hard", + "Waiting on hard throttle (sec)", + azure->retry_after_sec); + return; + } + + if (azure->reset_sec > 0) { + /* + * We were given a hint that we are overloading + * the server. Voluntarily backoff (before we + * get tarpitted or blocked). + */ + do_throttle_spin(params, "throttle/soft", + "Waiting on soft throttle (sec)", + azure->reset_sec); + return; + } + + if (params->k_transient_delay_sec) { + /* + * Insert an arbitrary delay before retrying after a + * transient (network) failure. + */ + do_throttle_spin(params, "throttle/transient", + "Waiting to retry after network error (sec)", + params->k_transient_delay_sec); + return; + } +} + +static void set_main_creds_on_slot(struct active_request_slot *slot, + const struct credential *creds) +{ + assert(creds == &gh__global.main_creds); + + /* + * When talking to the main/origin server, we have 3 modes + * of operation: + * + * [1] The initial request is sent without loading creds + * and with ANY-AUTH set. (And the `":"` is a magic + * value.) + * + * This allows libcurl to negotiate for us if it can. + * For example, this allows NTLM to work by magic and + * we get 200s without ever seeing a 401. If libcurl + * cannot negotiate for us, it gives us a 401 (and all + * of the 401 code in this file responds to that). + * + * [2] A 401 retry will load the main creds and try again. + * This causes `creds->username`to be non-NULL (even + * if refers to a zero-length string). And we assume + * BASIC Authentication. (And a zero-length username + * is a convention for PATs, but then sometimes users + * put the PAT in their `username` field and leave the + * `password` field blank. And that works too.) + * + * [3] Subsequent requests on the same connection use + * whatever worked before. + */ + if (creds && creds->username) { + curl_easy_setopt(slot->curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC); + curl_easy_setopt(slot->curl, CURLOPT_USERNAME, creds->username); + curl_easy_setopt(slot->curl, CURLOPT_PASSWORD, creds->password); + } else { + curl_easy_setopt(slot->curl, CURLOPT_HTTPAUTH, CURLAUTH_ANY); + curl_easy_setopt(slot->curl, CURLOPT_USERPWD, ":"); + } +} + +static void set_cache_server_creds_on_slot(struct active_request_slot *slot, + const struct credential *creds) +{ + assert(creds == &gh__global.cache_creds); + assert(creds->username); + + /* + * Things are weird when talking to a cache-server: + * + * [1] They don't send 401s on an auth error, rather they send + * a 400 (with a nice human-readable string in the html body). + * This prevents libcurl from doing any negotiation for us. + * + * [2] Cache-servers don't manage their own passwords, but + * rather require us to send the Basic Authentication + * username & password that we would send to the main + * server. (So yes, we have to get creds validated + * against the main server creds and substitute them when + * talking to the cache-server.) + * + * This means that: + * + * [a] We cannot support cache-servers that want to use NTLM. + * + * [b] If we want to talk to a cache-server, we have get the + * Basic Auth creds for the main server. And this may be + * problematic if the libcurl and/or the credential manager + * insists on using NTLM and prevents us from getting them. + * + * So we never try AUTH-ANY and force Basic Auth (if possible). + */ + if (creds && creds->username) { + curl_easy_setopt(slot->curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC); + curl_easy_setopt(slot->curl, CURLOPT_USERNAME, creds->username); + curl_easy_setopt(slot->curl, CURLOPT_PASSWORD, creds->password); + } +} + +/* + * Do a single HTTP request WITHOUT robust-retry, auth-retry or fallback. + */ +static void do_req(const char *url_base, + const char *url_component, + const struct credential *creds, + struct gh__request_params *params, + struct gh__response_status *status) +{ + struct active_request_slot *slot; + struct slot_results results; + struct strbuf rest_url = STRBUF_INIT; + + gh__response_status__zero(status); + + if (params->b_write_to_file) { + /* Delete dirty tempfile from a previous attempt. */ + if (params->tempfile) + delete_tempfile(¶ms->tempfile); + + my_create_tempfile(status, 1, NULL, ¶ms->tempfile, NULL, NULL); + if (!params->tempfile || status->ec != GH__ERROR_CODE__OK) + return; + } else { + /* Guard against caller using dirty buffer */ + strbuf_setlen(params->buffer, 0); + } + + end_url_with_slash(&rest_url, url_base); + strbuf_addstr(&rest_url, url_component); + + do_throttle_wait(params, status); + gh__azure_throttle__zero(&gh__global_throttle[params->server_type]); + + slot = get_active_slot(); + slot->results = &results; + + curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 0); /* not a HEAD request */ + curl_easy_setopt(slot->curl, CURLOPT_URL, rest_url.buf); + curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, params->headers); + + if (params->b_is_post) { + curl_easy_setopt(slot->curl, CURLOPT_POST, 1); + curl_easy_setopt(slot->curl, CURLOPT_ENCODING, NULL); + curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDS, + params->post_payload->buf); + curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE, + (long)params->post_payload->len); + } else { + curl_easy_setopt(slot->curl, CURLOPT_POST, 0); + } + + if (params->b_write_to_file) { + curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite); + curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, + (void*)params->tempfile->fp); + } else { + curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, + fwrite_buffer); + curl_easy_setopt(slot->curl, CURLOPT_FILE, params->buffer); + } + + curl_easy_setopt(slot->curl, CURLOPT_HEADERFUNCTION, parse_resp_hdr); + curl_easy_setopt(slot->curl, CURLOPT_HEADERDATA, params); + + if (params->server_type == GH__SERVER_TYPE__MAIN) + set_main_creds_on_slot(slot, creds); + else + set_cache_server_creds_on_slot(slot, creds); + + if (params->progress_base_phase2_msg.len || + params->progress_base_phase3_msg.len) { + curl_easy_setopt(slot->curl, CURLOPT_XFERINFOFUNCTION, + gh__curl_progress_cb); + curl_easy_setopt(slot->curl, CURLOPT_XFERINFODATA, params); + curl_easy_setopt(slot->curl, CURLOPT_NOPROGRESS, 0); + } else { + curl_easy_setopt(slot->curl, CURLOPT_NOPROGRESS, 1); + } + + gh__run_one_slot(slot, params, status); +} + +/* + * Compute the delay for the nth attempt. + * + * No delay for the first attempt. Then use a normal exponential backoff + * starting from 8. + */ +static int compute_transient_delay(int attempt) +{ + int v; + + if (attempt < 1) + return 0; + + /* + * Let 8K be our hard limit (for integer overflow protection). + * That's over 2 hours. This is 8<<10. + */ + if (attempt > 10) + attempt = 10; + + v = 8 << (attempt - 1); + + if (v > gh__cmd_opts.max_transient_backoff_sec) + v = gh__cmd_opts.max_transient_backoff_sec; + + return v; +} + +/* + * Robustly make an HTTP request. Retry if necessary to hide common + * transient network errors and/or 429 blockages. + * + * For a transient (network) failure (where we do not have a throttle + * delay factor), we should insert a small delay to let the network + * recover. The outage might be because the VPN dropped, or the + * machine went to sleep or something and we want to give the network + * time to come back up. Insert AI here :-) + */ +static void do_req__with_robust_retry(const char *url_base, + const char *url_component, + const struct credential *creds, + struct gh__request_params *params, + struct gh__response_status *status) +{ + for (params->k_attempt = 0; + params->k_attempt < gh__cmd_opts.max_retries + 1; + params->k_attempt++) { + + do_req(url_base, url_component, creds, params, status); + + switch (status->retry) { + default: + case GH__RETRY_MODE__SUCCESS: + case GH__RETRY_MODE__HTTP_401: /* caller does auth-retry */ + case GH__RETRY_MODE__HARD_FAIL: + case GH__RETRY_MODE__FAIL_404: + return; + + case GH__RETRY_MODE__HTTP_429: + case GH__RETRY_MODE__HTTP_503: + /* + * We should have gotten a "Retry-After" header with + * these and that gives us the wait time. If not, + * fallthru and use the backoff delay. + */ + if (gh__global_throttle[params->server_type].retry_after_sec) + continue; + /*fallthru*/ + + case GH__RETRY_MODE__TRANSIENT: + params->k_transient_delay_sec = + compute_transient_delay(params->k_attempt); + continue; + } + } +} + +static void do_req__to_main(const char *url_component, + struct gh__request_params *params, + struct gh__response_status *status) +{ + params->server_type = GH__SERVER_TYPE__MAIN; + + /* + * When talking to the main Git server, we DO NOT preload the + * creds before the first request. + */ + + do_req__with_robust_retry(gh__global.main_url, url_component, + &gh__global.main_creds, + params, status); + + if (status->retry == GH__RETRY_MODE__HTTP_401) { + refresh_main_creds(); + + do_req__with_robust_retry(gh__global.main_url, url_component, + &gh__global.main_creds, + params, status); + } + + if (status->retry == GH__RETRY_MODE__SUCCESS) + approve_main_creds(); +} + +static void do_req__to_cache_server(const char *url_component, + struct gh__request_params *params, + struct gh__response_status *status) +{ + params->server_type = GH__SERVER_TYPE__CACHE; + + /* + * When talking to a cache-server, DO force load the creds. + * This implicitly preloads the creds to the main server. + */ + synthesize_cache_server_creds(); + + do_req__with_robust_retry(gh__global.cache_server_url, url_component, + &gh__global.cache_creds, + params, status); + + if (status->retry == GH__RETRY_MODE__HTTP_401) { + refresh_cache_server_creds(); + + do_req__with_robust_retry(gh__global.cache_server_url, + url_component, + &gh__global.cache_creds, + params, status); + } + + if (status->retry == GH__RETRY_MODE__SUCCESS) + approve_cache_server_creds(); +} + +/* + * Try the cache-server (if configured) then fall-back to the main Git server. + */ +static void do_req__with_fallback(const char *url_component, + struct gh__request_params *params, + struct gh__response_status *status) +{ + if (gh__global.cache_server_url && + params->b_permit_cache_server_if_defined) { + do_req__to_cache_server(url_component, params, status); + + if (status->retry == GH__RETRY_MODE__SUCCESS) + return; + + if (!gh__cmd_opts.try_fallback) + return; + + /* + * The cache-server shares creds with the main Git server, + * so if our creds failed against the cache-server, they + * will also fail against the main Git server. We just let + * this fail. + * + * Falling-back would likely just cause the 3rd (or maybe + * 4th) cred prompt. + */ + if (status->retry == GH__RETRY_MODE__HTTP_401) + return; + } + + do_req__to_main(url_component, params, status); +} + +/* + * Call "gvfs/config" REST API. + * + * Return server's response buffer. This is probably a raw JSON string. + */ +static void do__http_get__simple_endpoint(struct gh__response_status *status, + struct strbuf *response, + const char *endpoint, + const char *tr2_label) +{ + struct gh__request_params params = GH__REQUEST_PARAMS_INIT; + + strbuf_addstr(¶ms.tr2_label, tr2_label); + + params.b_is_post = 0; + params.b_write_to_file = 0; + /* cache-servers do not handle gvfs/config REST calls */ + params.b_permit_cache_server_if_defined = 0; + params.buffer = response; + params.objects_mode = GH__OBJECTS_MODE__NONE; + + params.object_count = 1; /* a bit of a lie */ + + /* + * "X-TFS-FedAuthRedirect: Suppress" disables the 302 + 203 redirect + * sequence to a login page and forces the main Git server to send a + * normal 401. + */ + params.headers = http_copy_default_headers(); + params.headers = curl_slist_append(params.headers, + "X-TFS-FedAuthRedirect: Suppress"); + params.headers = curl_slist_append(params.headers, + "Pragma: no-cache"); + + if (gh__cmd_opts.show_progress) { + /* + * gvfs/config has a very small reqest payload, so I don't + * see any need to report progress on the upload side of + * the GET. So just report progress on the download side. + */ + strbuf_addf(¶ms.progress_base_phase3_msg, + "Receiving %s", endpoint); + } + + do_req__with_fallback(endpoint, ¶ms, status); + + gh__request_params__release(¶ms); +} + +static void do__http_get__gvfs_config(struct gh__response_status *status, + struct strbuf *config_data) +{ + do__http_get__simple_endpoint(status, config_data, "gvfs/config", + "GET/config"); +} + +static void setup_gvfs_objects_progress(struct gh__request_params *params, + unsigned long num, unsigned long den) +{ + if (!gh__cmd_opts.show_progress) + return; + + if (params->b_is_post) { + strbuf_addf(¶ms->progress_base_phase3_msg, + "Receiving packfile %ld/%ld with %ld objects", + num, den, params->object_count); + } + /* If requesting only one object, then do not show progress */ +} + +/* + * Call "gvfs/objects/" REST API to fetch a loose object + * and write it to the ODB. + */ +static void do__http_get__gvfs_object(struct gh__response_status *status, + const struct object_id *oid, + unsigned long l_num, unsigned long l_den, + struct string_list *result_list) +{ + struct gh__request_params params = GH__REQUEST_PARAMS_INIT; + struct strbuf component_url = STRBUF_INIT; + + gh__response_status__zero(status); + + strbuf_addf(&component_url, "gvfs/objects/%s", oid_to_hex(oid)); + + strbuf_addstr(¶ms.tr2_label, "GET/objects"); + + params.b_is_post = 0; + params.b_write_to_file = 1; + params.b_permit_cache_server_if_defined = 1; + params.objects_mode = GH__OBJECTS_MODE__GET; + + params.object_count = 1; + + params.result_list = result_list; + + params.headers = http_copy_default_headers(); + params.headers = curl_slist_append(params.headers, + "X-TFS-FedAuthRedirect: Suppress"); + params.headers = curl_slist_append(params.headers, + "Pragma: no-cache"); + + oidcpy(¶ms.loose_oid, oid); + + setup_gvfs_objects_progress(¶ms, l_num, l_den); + + do_req__with_fallback(component_url.buf, ¶ms, status); + + gh__request_params__release(¶ms); + strbuf_release(&component_url); +} + +/* + * Call "gvfs/objects" POST REST API to fetch a batch of objects + * from the OIDSET. Normal, this is results in a packfile containing + * `nr_wanted_in_block` objects. And we return the number actually + * consumed (along with the filename of the resulting packfile). + * + * However, if we only have 1 oid (remaining) in the OIDSET, the + * server *MAY* respond to our POST with a loose object rather than + * a packfile with 1 object. + * + * Append a message to the result_list describing the result. + * + * Return the number of OIDs consumed from the OIDSET. + */ +static void do__http_post__gvfs_objects(struct gh__response_status *status, + struct oidset_iter *iter, + unsigned long nr_wanted_in_block, + int j_pack_num, int j_pack_den, + struct string_list *result_list, + unsigned long *nr_oid_taken) +{ + struct json_writer jw_req = JSON_WRITER_INIT; + struct gh__request_params params = GH__REQUEST_PARAMS_INIT; + + gh__response_status__zero(status); + + params.object_count = build_json_payload__gvfs_objects( + &jw_req, iter, nr_wanted_in_block, ¶ms.loose_oid); + *nr_oid_taken = params.object_count; + + strbuf_addstr(¶ms.tr2_label, "POST/objects"); + + params.b_is_post = 1; + params.b_write_to_file = 1; + params.b_permit_cache_server_if_defined = 1; + params.objects_mode = GH__OBJECTS_MODE__POST; + + params.post_payload = &jw_req.json; + + params.result_list = result_list; + + params.headers = http_copy_default_headers(); + params.headers = curl_slist_append(params.headers, + "X-TFS-FedAuthRedirect: Suppress"); + params.headers = curl_slist_append(params.headers, + "Pragma: no-cache"); + params.headers = curl_slist_append(params.headers, + "Content-Type: application/json"); + /* + * If our POST contains more than one object, we want the + * server to send us a packfile. We DO NOT want the non-standard + * concatenated loose object format, so we DO NOT send: + * "Accept: application/x-git-loose-objects" (plural) + * + * However, if the payload only requests 1 OID, the server + * will send us a single loose object instead of a packfile, + * so we ACK that and send: + * "Accept: application/x-git-loose-object" (singular) + */ + params.headers = curl_slist_append(params.headers, + "Accept: application/x-git-packfile"); + params.headers = curl_slist_append(params.headers, + "Accept: application/x-git-loose-object"); + + setup_gvfs_objects_progress(¶ms, j_pack_num, j_pack_den); + + do_req__with_fallback("gvfs/objects", ¶ms, status); + + gh__request_params__release(¶ms); + jw_release(&jw_req); +} + +struct find_last_data { + timestamp_t timestamp; + int nr_files; +}; + +static void cb_find_last(const char *full_path, size_t full_path_len, + const char *file_path, void *void_data) +{ + struct find_last_data *data = void_data; + const char *val; + timestamp_t t; + + if (!skip_prefix(file_path, "prefetch-", &val)) + return; + if (!ends_with(val, ".pack")) + return; + + data->nr_files++; + + /* + * We expect prefetch packfiles named like: + * + * prefetch--.pack + */ + t = strtol(val, NULL, 10); + + data->timestamp = MY_MAX(t, data->timestamp); +} + +/* + * Find the server timestamp on the last prefetch packfile that + * we have in the ODB. + * + * TODO I'm going to assume that all prefetch packs are created + * TODO equal and take the one with the largest t value. + * TODO + * TODO Or should we look for one marked with .keep ? + * + * TODO Alternatively, should we maybe get the 2nd largest? + * TODO (Or maybe subtract an hour delta from the largest?) + * TODO + * TODO Since each cache-server maintains its own set of prefetch + * TODO packs (such that 2 requests may hit 2 different + * TODO load-balanced servers and get different answers (with or + * TODO without clock-skew issues)), is it possible for us to miss + * TODO the absolute fringe of new commits and trees? + * TODO + * TODO That is, since the cache-server generates hourly prefetch + * TODO packs, we could do a prefetch and be up-to-date, but then + * TODO do the main fetch and hit a different cache/main server + * TODO and be behind by as much as an hour and have to demand- + * TODO load the commits/trees. + * + * TODO Alternatively, should we compare the last timestamp found + * TODO with "now" and silently do nothing if within an epsilon? + */ +static void find_last_prefetch_timestamp(timestamp_t *last) +{ + struct find_last_data data; + + memset(&data, 0, sizeof(data)); + + for_each_file_in_pack_dir(gh__global.buf_odb_path.buf, cb_find_last, &data); + + *last = data.timestamp; +} + +/* + * Call "gvfs/prefetch[?lastPackTimestamp=]" REST API to + * fetch a series of packfiles and write them to the ODB. + * + * Return a list of packfile names. + */ +static void do__http_get__gvfs_prefetch(struct gh__response_status *status, + timestamp_t seconds_since_epoch, + struct string_list *result_list) +{ + struct gh__request_params params = GH__REQUEST_PARAMS_INIT; + struct strbuf component_url = STRBUF_INIT; + + gh__response_status__zero(status); + + strbuf_addstr(&component_url, "gvfs/prefetch"); + + if (!seconds_since_epoch) + find_last_prefetch_timestamp(&seconds_since_epoch); + if (seconds_since_epoch) + strbuf_addf(&component_url, "?lastPackTimestamp=%"PRItime, + seconds_since_epoch); + + params.b_is_post = 0; + params.b_write_to_file = 1; + params.b_permit_cache_server_if_defined = 1; + params.objects_mode = GH__OBJECTS_MODE__PREFETCH; + + params.object_count = -1; + + params.result_list = result_list; + + params.headers = http_copy_default_headers(); + params.headers = curl_slist_append(params.headers, + "X-TFS-FedAuthRedirect: Suppress"); + params.headers = curl_slist_append(params.headers, + "Pragma: no-cache"); + params.headers = curl_slist_append(params.headers, + "Accept: application/x-gvfs-timestamped-packfiles-indexes"); + + if (gh__cmd_opts.show_progress) + strbuf_addf(¶ms.progress_base_phase3_msg, + "Prefetch %"PRItime" (%s)", + seconds_since_epoch, + show_date(seconds_since_epoch, 0, + DATE_MODE(ISO8601))); + + do_req__with_fallback(component_url.buf, ¶ms, status); + + gh__request_params__release(¶ms); + strbuf_release(&component_url); +} + +/* + * Drive one or more HTTP GET requests to fetch the objects + * in the given OIDSET. These are received into loose objects. + * + * Accumulate results for each request in `result_list` until we get a + * hard error and have to stop. + */ +static void do__http_get__fetch_oidset(struct gh__response_status *status, + struct oidset *oids, + unsigned long nr_oid_total, + struct string_list *result_list) +{ + struct oidset_iter iter; + struct strbuf err404 = STRBUF_INIT; + const struct object_id *oid; + unsigned long k; + int had_404 = 0; + + gh__response_status__zero(status); + if (!nr_oid_total) + return; + + oidset_iter_init(oids, &iter); + + for (k = 0; k < nr_oid_total; k++) { + oid = oidset_iter_next(&iter); + + do__http_get__gvfs_object(status, oid, k+1, nr_oid_total, + result_list); + + /* + * If we get a 404 for an individual object, ignore + * it and get the rest. We'll fixup the 'ec' later. + */ + if (status->ec == GH__ERROR_CODE__HTTP_404) { + if (!err404.len) + strbuf_addf(&err404, "%s: from GET %s", + status->error_message.buf, + oid_to_hex(oid)); + /* + * Mark the fetch as "incomplete", but don't + * stop trying to get other chunks. + */ + had_404 = 1; + continue; + } + + if (status->ec != GH__ERROR_CODE__OK) { + /* Stop at the first hard error. */ + strbuf_addf(&status->error_message, ": from GET %s", + oid_to_hex(oid)); + goto cleanup; + } + } + +cleanup: + if (had_404 && status->ec == GH__ERROR_CODE__OK) { + strbuf_setlen(&status->error_message, 0); + strbuf_addbuf(&status->error_message, &err404); + status->ec = GH__ERROR_CODE__HTTP_404; + } + + strbuf_release(&err404); +} + +/* + * Drive one or more HTTP POST requests to bulk fetch the objects in + * the given OIDSET. Create one or more packfiles and/or loose objects. + * + * Accumulate results for each request in `result_list` until we get a + * hard error and have to stop. + */ +static void do__http_post__fetch_oidset(struct gh__response_status *status, + struct oidset *oids, + unsigned long nr_oid_total, + struct string_list *result_list) +{ + struct oidset_iter iter; + struct strbuf err404 = STRBUF_INIT; + unsigned long k; + unsigned long nr_oid_taken; + int j_pack_den = 0; + int j_pack_num = 0; + int had_404 = 0; + + gh__response_status__zero(status); + if (!nr_oid_total) + return; + + oidset_iter_init(oids, &iter); + + j_pack_den = ((nr_oid_total + gh__cmd_opts.block_size - 1) + / gh__cmd_opts.block_size); + + for (k = 0; k < nr_oid_total; k += nr_oid_taken) { + j_pack_num++; + + do__http_post__gvfs_objects(status, &iter, + gh__cmd_opts.block_size, + j_pack_num, j_pack_den, + result_list, + &nr_oid_taken); + + /* + * Because the oidset iterator has random + * order, it does no good to say the k-th or + * n-th chunk was incomplete; the client + * cannot use that index for anything. + * + * We get a 404 when at least one object in + * the chunk was not found. + * + * For now, ignore the 404 and go on to the + * next chunk and then fixup the 'ec' later. + */ + if (status->ec == GH__ERROR_CODE__HTTP_404) { + if (!err404.len) + strbuf_addf(&err404, + "%s: from POST", + status->error_message.buf); + /* + * Mark the fetch as "incomplete", but don't + * stop trying to get other chunks. + */ + had_404 = 1; + continue; + } + + if (status->ec != GH__ERROR_CODE__OK) { + /* Stop at the first hard error. */ + strbuf_addstr(&status->error_message, + ": from POST"); + goto cleanup; + } + } + +cleanup: + if (had_404 && status->ec == GH__ERROR_CODE__OK) { + strbuf_setlen(&status->error_message, 0); + strbuf_addbuf(&status->error_message, &err404); + status->ec = GH__ERROR_CODE__HTTP_404; + } + + strbuf_release(&err404); +} + +/* + * Finish with initialization. This happens after the main option + * parsing, dispatch to sub-command, and sub-command option parsing + * and before actually doing anything. + * + * Optionally configure the cache-server if the sub-command will + * use it. + */ +static void finish_init(int setup_cache_server) +{ + select_odb(); + + lookup_main_url(); + gh_http_init(); + + if (setup_cache_server) + select_cache_server(); +} + +/* + * Request gvfs/config from main Git server. (Config data is not + * available from a GVFS cache-server.) + * + * Print the received server configuration (as the raw JSON string). + */ +static enum gh__error_code do_sub_cmd__config(int argc, const char **argv) +{ + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct strbuf config_data = STRBUF_INIT; + enum gh__error_code ec = GH__ERROR_CODE__OK; + + trace2_cmd_mode("config"); + + finish_init(0); + + do__http_get__gvfs_config(&status, &config_data); + ec = status.ec; + + if (ec == GH__ERROR_CODE__OK) + printf("%s\n", config_data.buf); + else + error("config: %s", status.error_message.buf); + + gh__response_status__release(&status); + strbuf_release(&config_data); + + return ec; +} + +static enum gh__error_code do_sub_cmd__endpoint(int argc, const char **argv) +{ + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct strbuf data = STRBUF_INIT; + enum gh__error_code ec = GH__ERROR_CODE__OK; + const char *endpoint; + + if (argc != 2) + return GH__ERROR_CODE__ERROR; + endpoint = argv[1]; + + trace2_cmd_mode(endpoint); + + finish_init(0); + + do__http_get__simple_endpoint(&status, &data, endpoint, endpoint); + ec = status.ec; + + if (ec == GH__ERROR_CODE__OK) + printf("%s\n", data.buf); + else + error("config: %s", status.error_message.buf); + + gh__response_status__release(&status); + strbuf_release(&data); + + return ec; +} + +/* + * Read a list of objects from stdin and fetch them as a series of + * single object HTTP GET requests. + */ +static enum gh__error_code do_sub_cmd__get(int argc, const char **argv) +{ + static struct option get_options[] = { + OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, + N_("retries for transient network errors")), + OPT_END(), + }; + + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct oidset oids = OIDSET_INIT; + struct string_list result_list = STRING_LIST_INIT_DUP; + enum gh__error_code ec = GH__ERROR_CODE__OK; + unsigned long nr_oid_total; + int k; + + trace2_cmd_mode("get"); + + if (argc > 1 && !strcmp(argv[1], "-h")) + usage_with_options(objects_get_usage, get_options); + + argc = parse_options(argc, argv, NULL, get_options, objects_get_usage, 0); + if (gh__cmd_opts.max_retries < 0) + gh__cmd_opts.max_retries = 0; + + finish_init(1); + + nr_oid_total = read_stdin_for_oids(&oids); + + do__http_get__fetch_oidset(&status, &oids, nr_oid_total, &result_list); + + ec = status.ec; + + for (k = 0; k < result_list.nr; k++) + printf("%s\n", result_list.items[k].string); + + if (ec != GH__ERROR_CODE__OK) + error("get: %s", status.error_message.buf); + + gh__response_status__release(&status); + oidset_clear(&oids); + string_list_clear(&result_list, 0); + + return ec; +} + +/* + * Read a list of objects from stdin and fetch them in a single request (or + * multiple block-size requests) using one or more HTTP POST requests. + */ +static enum gh__error_code do_sub_cmd__post(int argc, const char **argv) +{ + static struct option post_options[] = { + OPT_MAGNITUDE('b', "block-size", &gh__cmd_opts.block_size, + N_("number of objects to request at a time")), + OPT_INTEGER('d', "depth", &gh__cmd_opts.depth, + N_("Commit depth")), + OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, + N_("retries for transient network errors")), + OPT_END(), + }; + + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct oidset oids = OIDSET_INIT; + struct string_list result_list = STRING_LIST_INIT_DUP; + enum gh__error_code ec = GH__ERROR_CODE__OK; + unsigned long nr_oid_total; + int k; + + trace2_cmd_mode("post"); + + if (argc > 1 && !strcmp(argv[1], "-h")) + usage_with_options(objects_post_usage, post_options); + + argc = parse_options(argc, argv, NULL, post_options, objects_post_usage, 0); + if (gh__cmd_opts.depth < 1) + gh__cmd_opts.depth = 1; + if (gh__cmd_opts.max_retries < 0) + gh__cmd_opts.max_retries = 0; + + finish_init(1); + + nr_oid_total = read_stdin_for_oids(&oids); + + do__http_post__fetch_oidset(&status, &oids, nr_oid_total, &result_list); + + ec = status.ec; + + for (k = 0; k < result_list.nr; k++) + printf("%s\n", result_list.items[k].string); + + if (ec != GH__ERROR_CODE__OK) + error("post: %s", status.error_message.buf); + + gh__response_status__release(&status); + oidset_clear(&oids); + string_list_clear(&result_list, 0); + + return ec; +} + +/* + * Interpret the given string as a timestamp and compute an absolute + * UTC-seconds-since-epoch value (and without TZ). + * + * Note that the gvfs/prefetch API only accepts seconds since epoch, + * so that is all we really need here. But there is a tradition of + * various Git commands allowing a variety of formats for args like + * this. For example, see the `--date` arg in `git commit`. We allow + * these other forms mainly for testing purposes. + */ +static int my_parse_since(const char *since, timestamp_t *p_timestamp) +{ + int offset = 0; + int errors = 0; + unsigned long t; + + if (!parse_date_basic(since, p_timestamp, &offset)) + return 0; + + t = approxidate_careful(since, &errors); + if (!errors) { + *p_timestamp = t; + return 0; + } + + return -1; +} + +/* + * Ask the server for all available packfiles -or- all available since + * the given timestamp. + */ +static enum gh__error_code do_sub_cmd__prefetch(int argc, const char **argv) +{ + static const char *since_str; + static struct option prefetch_options[] = { + OPT_STRING(0, "since", &since_str, N_("since"), N_("seconds since epoch")), + OPT_END(), + }; + + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct string_list result_list = STRING_LIST_INIT_DUP; + enum gh__error_code ec = GH__ERROR_CODE__OK; + timestamp_t seconds_since_epoch = 0; + int k; + + trace2_cmd_mode("prefetch"); + + if (argc > 1 && !strcmp(argv[1], "-h")) + usage_with_options(prefetch_usage, prefetch_options); + + argc = parse_options(argc, argv, NULL, prefetch_options, prefetch_usage, 0); + if (since_str && *since_str) { + if (my_parse_since(since_str, &seconds_since_epoch)) + die("could not parse 'since' field"); + } + + finish_init(1); + + do__http_get__gvfs_prefetch(&status, seconds_since_epoch, &result_list); + + ec = status.ec; + + for (k = 0; k < result_list.nr; k++) + printf("%s\n", result_list.items[k].string); + + if (ec != GH__ERROR_CODE__OK) + error("prefetch: %s", status.error_message.buf); + + gh__response_status__release(&status); + string_list_clear(&result_list, 0); + + return ec; +} + +/* + * Handle the 'objects.get' and 'objects.post' and 'objects.prefetch' + * verbs in "server mode". + * + * Only call error() and set ec for hard errors where we cannot + * communicate correctly with the foreground client process. Pass any + * actual data errors (such as 404's or 401's from the fetch) back to + * the client process. + */ +static enum gh__error_code do_server_subprocess__objects(const char *verb_line) +{ + struct gh__response_status status = GH__RESPONSE_STATUS_INIT; + struct oidset oids = OIDSET_INIT; + struct object_id oid; + struct string_list result_list = STRING_LIST_INIT_DUP; + enum gh__error_code ec = GH__ERROR_CODE__OK; + char *line; + int len; + int err; + int k; + enum gh__objects_mode objects_mode; + unsigned long nr_oid_total = 0; + timestamp_t seconds_since_epoch = 0; + + if (!strcmp(verb_line, "objects.get")) + objects_mode = GH__OBJECTS_MODE__GET; + else if (!strcmp(verb_line, "objects.post")) + objects_mode = GH__OBJECTS_MODE__POST; + else if (!strcmp(verb_line, "objects.prefetch")) + objects_mode = GH__OBJECTS_MODE__PREFETCH; + else { + error("server: unexpected objects-mode verb '%s'", verb_line); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + + switch (objects_mode) { + case GH__OBJECTS_MODE__GET: + case GH__OBJECTS_MODE__POST: + while (1) { + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line) + break; + + if (get_oid_hex(line, &oid)) { + error("server: invalid oid syntax '%s'", line); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + + if (!oidset_insert(&oids, &oid)) + nr_oid_total++; + } + + if (!nr_oid_total) { + /* if zero objects requested, trivial OK. */ + if (packet_write_fmt_gently(1, "ok\n")) { + error("server: cannot write 'get' result to client"); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + } else + ec = GH__ERROR_CODE__OK; + goto cleanup; + } + + if (objects_mode == GH__OBJECTS_MODE__GET) + do__http_get__fetch_oidset(&status, &oids, + nr_oid_total, &result_list); + else + do__http_post__fetch_oidset(&status, &oids, + nr_oid_total, &result_list); + break; + + case GH__OBJECTS_MODE__PREFETCH: + /* get optional timestamp line */ + while (1) { + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line) + break; + + seconds_since_epoch = strtoul(line, NULL, 10); + } + + do__http_get__gvfs_prefetch(&status, seconds_since_epoch, + &result_list); + break; + + default: + BUG("unexpected object_mode in switch '%d'", objects_mode); + } + + /* + * Write pathname of the ODB where we wrote all of the objects + * we fetched. + */ + if (packet_write_fmt_gently(1, "odb %s\n", + gh__global.buf_odb_path.buf)) { + error("server: cannot write 'odb' to client"); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + + for (k = 0; k < result_list.nr; k++) + if (packet_write_fmt_gently(1, "%s\n", + result_list.items[k].string)) + { + error("server: cannot write result to client: '%s'", + result_list.items[k].string); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + + /* + * We only use status.ec to tell the client whether the request + * was complete, incomplete, or had IO errors. We DO NOT return + * this value to our caller. + */ + err = 0; + if (status.ec == GH__ERROR_CODE__OK) + err = packet_write_fmt_gently(1, "ok\n"); + else if (status.ec == GH__ERROR_CODE__HTTP_404) + err = packet_write_fmt_gently(1, "partial\n"); + else + err = packet_write_fmt_gently(1, "error %s\n", + status.error_message.buf); + if (err) { + error("server: cannot write result to client"); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + + if (packet_flush_gently(1)) { + error("server: cannot flush result to client"); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + +cleanup: + oidset_clear(&oids); + string_list_clear(&result_list, 0); + + return ec; +} + +typedef enum gh__error_code (fn_subprocess_cmd)(const char *verb_line); + +struct subprocess_capability { + const char *name; + int client_has; + fn_subprocess_cmd *pfn; +}; + +static struct subprocess_capability caps[] = { + { "objects", 0, do_server_subprocess__objects }, + { NULL, 0, NULL }, +}; + +/* + * Handle the subprocess protocol handshake as described in: + * [] Documentation/technical/protocol-common.txt + * [] Documentation/technical/long-running-process-protocol.txt + */ +static int do_protocol_handshake(void) +{ +#define OUR_SUBPROCESS_VERSION "1" + + char *line; + int len; + int k; + int b_support_our_version = 0; + + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line || strcmp(line, "gvfs-helper-client")) { + error("server: subprocess welcome handshake failed: %s", line); + return -1; + } + + while (1) { + const char *v; + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line) + break; + if (!skip_prefix(line, "version=", &v)) { + error("server: subprocess version handshake failed: %s", + line); + return -1; + } + b_support_our_version |= (!strcmp(v, OUR_SUBPROCESS_VERSION)); + } + if (!b_support_our_version) { + error("server: client does not support our version: %s", + OUR_SUBPROCESS_VERSION); + return -1; + } + + if (packet_write_fmt_gently(1, "gvfs-helper-server\n") || + packet_write_fmt_gently(1, "version=%s\n", + OUR_SUBPROCESS_VERSION) || + packet_flush_gently(1)) { + error("server: cannot write version handshake"); + return -1; + } + + while (1) { + const char *v; + int k; + + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line) + break; + if (!skip_prefix(line, "capability=", &v)) { + error("server: subprocess capability handshake failed: %s", + line); + return -1; + } + for (k = 0; caps[k].name; k++) + if (!strcmp(v, caps[k].name)) + caps[k].client_has = 1; + } + + for (k = 0; caps[k].name; k++) + if (caps[k].client_has) + if (packet_write_fmt_gently(1, "capability=%s\n", + caps[k].name)) { + error("server: cannot write capabilities handshake: %s", + caps[k].name); + return -1; + } + if (packet_flush_gently(1)) { + error("server: cannot write capabilities handshake"); + return -1; + } + + return 0; +} + +/* + * Interactively listen to stdin for a series of commands and execute them. + */ +static enum gh__error_code do_sub_cmd__server(int argc, const char **argv) +{ + static struct option server_options[] = { + OPT_MAGNITUDE('b', "block-size", &gh__cmd_opts.block_size, + N_("number of objects to request at a time")), + OPT_INTEGER('d', "depth", &gh__cmd_opts.depth, + N_("Commit depth")), + OPT_INTEGER('r', "max-retries", &gh__cmd_opts.max_retries, + N_("retries for transient network errors")), + OPT_END(), + }; + + enum gh__error_code ec = GH__ERROR_CODE__OK; + char *line; + int len; + int k; + + trace2_cmd_mode("server"); + + if (argc > 1 && !strcmp(argv[1], "-h")) + usage_with_options(server_usage, server_options); + + argc = parse_options(argc, argv, NULL, server_options, server_usage, 0); + if (gh__cmd_opts.depth < 1) + gh__cmd_opts.depth = 1; + if (gh__cmd_opts.max_retries < 0) + gh__cmd_opts.max_retries = 0; + + finish_init(1); + + if (do_protocol_handshake()) { + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + +top_of_loop: + while (1) { + len = packet_read_line_gently(0, NULL, &line); + if (len < 0 || !line) { + /* use extra FLUSH as a QUIT */ + ec = GH__ERROR_CODE__OK; + goto cleanup; + } + + for (k = 0; caps[k].name; k++) { + if (caps[k].client_has && + starts_with(line, caps[k].name)) { + ec = (caps[k].pfn)(line); + if (ec != GH__ERROR_CODE__OK) + goto cleanup; + goto top_of_loop; + } + } + + error("server: unknown command '%s'", line); + ec = GH__ERROR_CODE__SUBPROCESS_SYNTAX; + goto cleanup; + } + +cleanup: + return ec; +} + +static enum gh__error_code do_sub_cmd(int argc, const char **argv) +{ + if (!strcmp(argv[0], "get")) + return do_sub_cmd__get(argc, argv); + + if (!strcmp(argv[0], "post")) + return do_sub_cmd__post(argc, argv); + + if (!strcmp(argv[0], "config")) + return do_sub_cmd__config(argc, argv); + + if (!strcmp(argv[0], "endpoint")) + return do_sub_cmd__endpoint(argc, argv); + + if (!strcmp(argv[0], "prefetch")) + return do_sub_cmd__prefetch(argc, argv); + + /* + * server mode is for talking with git.exe via the "gh_client_" API + * using packet-line format. + */ + if (!strcmp(argv[0], "server")) + return do_sub_cmd__server(argc, argv); + + return GH__ERROR_CODE__USAGE; +} + +/* + * Communicate with the primary Git server or a GVFS cache-server using the + * GVFS Protocol. + * + * https://github.com/microsoft/VFSForGit/blob/master/Protocol.md + */ +int cmd_main(int argc, const char **argv) +{ + static struct option main_options[] = { + OPT_STRING('r', "remote", &gh__cmd_opts.remote_name, + N_("remote"), + N_("Remote name")), + OPT_BOOL('f', "fallback", &gh__cmd_opts.try_fallback, + N_("Fallback to Git server if cache-server fails")), + OPT_CALLBACK(0, "cache-server", NULL, + N_("cache-server"), + N_("cache-server=disable|trust|verify|error"), + option_parse_cache_server_mode), + OPT_CALLBACK(0, "shared-cache", NULL, + N_("pathname"), + N_("Pathname to shared objects directory"), + option_parse_shared_cache_directory), + OPT_BOOL('p', "progress", &gh__cmd_opts.show_progress, + N_("Show progress")), + OPT_END(), + }; + + enum gh__error_code ec = GH__ERROR_CODE__OK; + + if (argc > 1 && !strcmp(argv[1], "-h")) + usage_with_options(main_usage, main_options); + + trace2_cmd_name("gvfs-helper"); + packet_trace_identity("gvfs-helper"); + + setup_git_directory_gently(NULL); + + /* Set any non-zero initial values in gh__cmd_opts. */ + gh__cmd_opts.depth = GH__DEFAULT__OBJECTS_POST__COMMIT_DEPTH; + gh__cmd_opts.block_size = GH__DEFAULT__OBJECTS_POST__BLOCK_SIZE; + gh__cmd_opts.max_retries = GH__DEFAULT_MAX_RETRIES; + gh__cmd_opts.max_transient_backoff_sec = + GH__DEFAULT_MAX_TRANSIENT_BACKOFF_SEC; + + gh__cmd_opts.show_progress = !!isatty(2); + + // TODO use existing gvfs config settings to override our GH__DEFAULT_ + // TODO values in gh__cmd_opts. (And maybe add/remove our command line + // TODO options for them.) + // TODO + // TODO See "scalar.max-retries" (and maybe "gvfs.max-retries") + + git_config(git_default_config, NULL); + + argc = parse_options(argc, argv, NULL, main_options, main_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (argc == 0) + usage_with_options(main_usage, main_options); + + ec = do_sub_cmd(argc, argv); + + gh_http_cleanup(); + + if (ec == GH__ERROR_CODE__USAGE) + usage_with_options(main_usage, main_options); + + return ec; +} diff --git a/gvfs.c b/gvfs.c new file mode 100644 index 00000000000000..9ffb6fcafb01af --- /dev/null +++ b/gvfs.c @@ -0,0 +1,36 @@ +#include "cache.h" +#include "gvfs.h" +#include "config.h" + +static int gvfs_config_loaded; +static int core_gvfs_is_bool; + +static int early_core_gvfs_config(const char *var, const char *value, void *data) +{ + if (!strcmp(var, "core.gvfs")) + core_gvfs = git_config_bool_or_int("core.gvfs", value, &core_gvfs_is_bool); + return 0; +} + +void gvfs_load_config_value(const char *value) +{ + if (value) + core_gvfs = git_config_bool_or_int("core.gvfs", value, &core_gvfs_is_bool); + else if (startup_info->have_repository == 0) + read_early_config(early_core_gvfs_config, NULL); + else + git_config_get_bool_or_int("core.gvfs", &core_gvfs_is_bool, &core_gvfs); + + /* Turn on all bits if a bool was set in the settings */ + if (core_gvfs_is_bool && core_gvfs) + core_gvfs = -1; +} + +int gvfs_config_is_set(int mask) +{ + if (!gvfs_config_loaded) + gvfs_load_config_value(NULL); + + gvfs_config_loaded = 1; + return (core_gvfs & mask) == mask; +} diff --git a/gvfs.h b/gvfs.h new file mode 100644 index 00000000000000..7d999f3e8d234f --- /dev/null +++ b/gvfs.h @@ -0,0 +1,25 @@ +#ifndef GVFS_H +#define GVFS_H + + +/* + * This file is for the specific settings and methods + * used for GVFS functionality + */ + + +/* + * The list of bits in the core_gvfs setting + */ +#define GVFS_SKIP_SHA_ON_INDEX (1 << 0) +#define GVFS_BLOCK_COMMANDS (1 << 1) +#define GVFS_MISSING_OK (1 << 2) +#define GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT (1 << 3) +#define GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK (1 << 4) +#define GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS (1 << 6) +#define GVFS_PREFETCH_DURING_FETCH (1 << 7) + +void gvfs_load_config_value(const char *value); +int gvfs_config_is_set(int mask); + +#endif /* GVFS_H */ diff --git a/hook.c b/hook.c index a493939a4fc590..e4f7c6405acedf 100644 --- a/hook.c +++ b/hook.c @@ -3,12 +3,72 @@ #include "run-command.h" #include "config.h" +static int early_hooks_path_config(const char *var, const char *value, void *data) +{ + if (!strcmp(var, "core.hookspath")) + return git_config_pathname((const char **)data, var, value); + + return 0; +} + +/* Discover the hook before setup_git_directory() was called */ +static const char *hook_path_early(const char *name, struct strbuf *result) +{ + static struct strbuf hooks_dir = STRBUF_INIT; + static int initialized; + + if (initialized < 0) + return NULL; + + if (!initialized) { + struct strbuf gitdir = STRBUF_INIT, commondir = STRBUF_INIT; + const char *early_hooks_dir = NULL; + + if (discover_git_directory(&commondir, &gitdir) < 0) { + initialized = -1; + return NULL; + } + + read_early_config(early_hooks_path_config, &early_hooks_dir); + if (!early_hooks_dir) + strbuf_addf(&hooks_dir, "%s/hooks/", commondir.buf); + else { + strbuf_add_absolute_path(&hooks_dir, early_hooks_dir); + free((void *)early_hooks_dir); + strbuf_addch(&hooks_dir, '/'); + } + + strbuf_release(&gitdir); + strbuf_release(&commondir); + + initialized = 1; + } + + strbuf_addf(result, "%s%s", hooks_dir.buf, name); + return result->buf; +} + const char *find_hook(const char *name) { static struct strbuf path = STRBUF_INIT; strbuf_reset(&path); - strbuf_git_path(&path, "hooks/%s", name); + if (have_git_dir()) { + static int forced_config; + + if (!forced_config) { + if (!git_hooks_path) { + git_config_get_pathname("core.hookspath", + &git_hooks_path); + UNLEAK(git_hooks_path); + } + forced_config = 1; + } + + strbuf_git_path(&path, "hooks/%s", name); + } else if (!hook_path_early(name, &path)) + return NULL; + if (access(path.buf, X_OK) < 0) { int err = errno; @@ -113,10 +173,22 @@ int run_hooks_opt(const char *hook_name, struct run_hooks_opt *options) .hook_name = hook_name, .options = options, }; - const char *const hook_path = find_hook(hook_name); + const char *hook_path = find_hook(hook_name); int jobs = 1; int ret = 0; + /* + * Backwards compatibility hack in VFS for Git: when originally + * introduced (and used!), it was called `post-indexchanged`, but this + * name was changed during the review on the Git mailing list. + * + * Therefore, when the `post-index-change` hook is not found, let's + * look for a hook with the old name (which would be found in case of + * already-existing checkouts). + */ + if (!hook_path && !strcmp(hook_name, "post-index-change")) + hook_path = find_hook("post-indexchanged"); + if (!options) BUG("a struct run_hooks_opt must be provided to run_hooks"); diff --git a/json-parser.c b/json-parser.c new file mode 100644 index 00000000000000..30799e17dc0a04 --- /dev/null +++ b/json-parser.c @@ -0,0 +1,182 @@ +#include "cache.h" +#include "json-parser.h" + +static int reset_iterator(struct json_iterator *it) +{ + it->p = it->begin = it->json; + strbuf_release(&it->key); + strbuf_release(&it->string_value); + it->type = JSON_NULL; + return -1; +} + +static int parse_json_string(struct json_iterator *it, struct strbuf *out) +{ + const char *begin = it->p; + + if (*(it->p)++ != '"') + return error("expected double quote: '%.*s'", 5, begin), + reset_iterator(it); + + strbuf_reset(&it->string_value); +#define APPEND(c) strbuf_addch(out, c) + while (*it->p != '"') { + switch (*it->p) { + case '\0': + return error("incomplete string: '%s'", begin), + reset_iterator(it); + case '\\': + it->p++; + if (*it->p == '\\' || *it->p == '"') + APPEND(*it->p); + else if (*it->p == 'b') + APPEND(8); + else if (*it->p == 't') + APPEND(9); + else if (*it->p == 'n') + APPEND(10); + else if (*it->p == 'f') + APPEND(12); + else if (*it->p == 'r') + APPEND(13); + else if (*it->p == 'u') { + unsigned char binary[2]; + int i; + + if (hex_to_bytes(binary, it->p + 1, 2) < 0) + return error("invalid: '%.*s'", + 6, it->p - 1), + reset_iterator(it); + it->p += 4; + + i = (binary[0] << 8) | binary[1]; + if (i < 0x80) + APPEND(i); + else if (i < 0x0800) { + APPEND(0xc0 | ((i >> 6) & 0x1f)); + APPEND(0x80 | (i & 0x3f)); + } else if (i < 0x10000) { + APPEND(0xe0 | ((i >> 12) & 0x0f)); + APPEND(0x80 | ((i >> 6) & 0x3f)); + APPEND(0x80 | (i & 0x3f)); + } else { + APPEND(0xf0 | ((i >> 18) & 0x07)); + APPEND(0x80 | ((i >> 12) & 0x3f)); + APPEND(0x80 | ((i >> 6) & 0x3f)); + APPEND(0x80 | (i & 0x3f)); + } + } + break; + default: + APPEND(*it->p); + } + it->p++; + } + + it->end = it->p++; + return 0; +} + +static void skip_whitespace(struct json_iterator *it) +{ + while (isspace(*it->p)) + it->p++; +} + +int iterate_json(struct json_iterator *it) +{ + skip_whitespace(it); + it->begin = it->p; + + switch (*it->p) { + case '\0': + return reset_iterator(it), 0; + case 'n': + if (!starts_with(it->p, "null")) + return error("unexpected value: %.*s", 4, it->p), + reset_iterator(it); + it->type = JSON_NULL; + it->end = it->p = it->begin + 4; + break; + case 't': + if (!starts_with(it->p, "true")) + return error("unexpected value: %.*s", 4, it->p), + reset_iterator(it); + it->type = JSON_TRUE; + it->end = it->p = it->begin + 4; + break; + case 'f': + if (!starts_with(it->p, "false")) + return error("unexpected value: %.*s", 5, it->p), + reset_iterator(it); + it->type = JSON_FALSE; + it->end = it->p = it->begin + 5; + break; + case '-': case '.': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + it->type = JSON_NUMBER; + it->end = it->p = it->begin + strspn(it->p, "-.0123456789"); + break; + case '"': + it->type = JSON_STRING; + if (parse_json_string(it, &it->string_value) < 0) + return -1; + break; + case '[': { + const char *save = it->begin; + size_t key_offset = it->key.len; + int i = 0, res; + + for (it->p++, skip_whitespace(it); *it->p != ']'; i++) { + strbuf_addf(&it->key, "[%d]", i); + + if ((res = iterate_json(it))) + return reset_iterator(it), res; + strbuf_setlen(&it->key, key_offset); + + skip_whitespace(it); + if (*it->p == ',') + it->p++; + } + + it->type = JSON_ARRAY; + it->begin = save; + it->end = it->p; + it->p++; + break; + } + case '{': { + const char *save = it->begin; + size_t key_offset = it->key.len; + int res; + + strbuf_addch(&it->key, '.'); + for (it->p++, skip_whitespace(it); *it->p != '}'; ) { + strbuf_setlen(&it->key, key_offset + 1); + if (parse_json_string(it, &it->key) < 0) + return -1; + skip_whitespace(it); + if (*(it->p)++ != ':') + return error("expected colon: %.*s", 5, it->p), + reset_iterator(it); + + if ((res = iterate_json(it))) + return res; + + skip_whitespace(it); + if (*it->p == ',') + it->p++; + } + strbuf_setlen(&it->key, key_offset); + + it->type = JSON_OBJECT; + it->begin = save; + it->end = it->p; + it->p++; + break; + } + } + + return it->fn(it); +} diff --git a/json-parser.h b/json-parser.h new file mode 100644 index 00000000000000..ce1fdc5ee23928 --- /dev/null +++ b/json-parser.h @@ -0,0 +1,29 @@ +#ifndef JSON_PARSER_H +#define JSON_PARSER_H + +#include "strbuf.h" + +struct json_iterator { + const char *json, *p, *begin, *end; + struct strbuf key, string_value; + enum { + JSON_NULL = 0, + JSON_FALSE, + JSON_TRUE, + JSON_NUMBER, + JSON_STRING, + JSON_ARRAY, + JSON_OBJECT + } type; + int (*fn)(struct json_iterator *it); + void *fn_data; +}; +#define JSON_ITERATOR_INIT(json_, fn_, fn_data_) { \ + .json = json_, .p = json_, \ + .key = STRBUF_INIT, .string_value = STRBUF_INIT, \ + .fn = fn_, .fn_data = fn_data_ \ +} + +int iterate_json(struct json_iterator *it); + +#endif diff --git a/merge-recursive.c b/merge-recursive.c index 005b0cfd54b3b1..84fe3f827b9a29 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -5,6 +5,7 @@ */ #include "cache.h" #include "merge-recursive.h" +#include "virtualfilesystem.h" #include "advice.h" #include "alloc.h" @@ -864,15 +865,14 @@ static int would_lose_untracked(struct merge_options *opt, const char *path) static int was_dirty(struct merge_options *opt, const char *path) { struct cache_entry *ce; - int dirty = 1; - if (opt->priv->call_depth || !was_tracked(opt, path)) - return !dirty; + if (opt->priv->call_depth || !was_tracked(opt, path) || + is_excluded_from_virtualfilesystem(path, strlen(path), DT_REG) == 1) + return 0; ce = index_file_exists(opt->priv->unpack_opts.src_index, path, strlen(path), ignore_case); - dirty = verify_uptodate(ce, &opt->priv->unpack_opts) != 0; - return dirty; + return !ce || verify_uptodate(ce, &opt->priv->unpack_opts) != 0; } static int make_room_for_path(struct merge_options *opt, const char *path) @@ -1536,7 +1536,7 @@ static int handle_change_delete(struct merge_options *opt, * path. We could call update_file_flags() with update_cache=0 * and update_wd=0, but that's a no-op. */ - if (change_branch != opt->branch1 || alt_path) + if (change_branch != opt->branch1 || alt_path || !file_exists(update_path)) ret = update_file(opt, 0, changed, update_path); } free(alt_path); diff --git a/name-hash.c b/name-hash.c index cd009c7c8ae455..4ab939b8b84b6a 100644 --- a/name-hash.c +++ b/name-hash.c @@ -730,6 +730,26 @@ struct cache_entry *index_file_exists(struct index_state *istate, const char *na return NULL; } +struct cache_entry *index_file_next_match(struct index_state *istate, struct cache_entry *ce, int igncase) +{ + struct cache_entry *next; + + if (!igncase || !ce) { + return NULL; + } + + next = hashmap_get_next_entry(&istate->name_hash, ce, ent); + if (!next) + return NULL; + + hashmap_for_each_entry_from(&istate->name_hash, next, ent) { + if (same_name(next, ce->name, ce_namelen(ce), igncase)) + return next; + } + + return NULL; +} + void free_name_hash(struct index_state *istate) { if (!istate->name_hash_initialized) diff --git a/object-file.c b/object-file.c index 0071655b698063..94ae5cb74cb627 100644 --- a/object-file.c +++ b/object-file.c @@ -33,6 +33,11 @@ #include "object-store.h" #include "promisor-remote.h" #include "submodule.h" +#include "hook.h" +#include "sigchain.h" +#include "sub-process.h" +#include "pkt-line.h" +#include "gvfs-helper-client.h" /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -443,6 +448,8 @@ const char *loose_object_path(struct repository *r, struct strbuf *buf, return odb_loose_path(r->objects->odb, buf, oid); } +static int gvfs_matched_shared_cache_to_alternate; + /* * Return non-zero iff the path is usable as an alternate object database. */ @@ -452,6 +459,52 @@ static int alt_odb_usable(struct raw_object_store *o, { int r; + if (!strbuf_cmp(path, &gvfs_shared_cache_pathname)) { + /* + * `gvfs.sharedCache` is the preferred alternate that we + * will use with `gvfs-helper.exe` to dynamically fetch + * missing objects. It is set during git_default_config(). + * + * Make sure the directory exists on disk before we let the + * stock code discredit it. + */ + struct strbuf buf_pack_foo = STRBUF_INIT; + enum scld_error scld; + + /* + * Force create the "" and "/pack" directories, if + * not present on disk. Append an extra bogus directory to + * get safe_create_leading_directories() to see "/pack" + * as a leading directory of something deeper (which it + * won't create). + */ + strbuf_addf(&buf_pack_foo, "%s/pack/foo", path->buf); + + scld = safe_create_leading_directories(buf_pack_foo.buf); + if (scld != SCLD_OK && scld != SCLD_EXISTS) { + error_errno(_("could not create shared-cache ODB '%s'"), + gvfs_shared_cache_pathname.buf); + + strbuf_release(&buf_pack_foo); + + /* + * Pretend no shared-cache was requested and + * effectively fallback to ".git/objects" for + * fetching missing objects. + */ + strbuf_release(&gvfs_shared_cache_pathname); + return 0; + } + + /* + * We know that there is an alternate (either from + * .git/objects/info/alternates or from a memory-only + * entry) associated with the shared-cache directory. + */ + gvfs_matched_shared_cache_to_alternate++; + strbuf_release(&buf_pack_foo); + } + /* Detect cases where alternate disappeared */ if (!is_directory(path->buf)) { error(_("object directory %s does not exist; " @@ -935,9 +988,154 @@ void prepare_alt_odb(struct repository *r) link_alt_odb_entries(r, r->objects->alternate_db, PATH_SEP, NULL, 0); read_info_alternates(r, r->objects->odb->path, 0); + + if (gvfs_shared_cache_pathname.len && + !gvfs_matched_shared_cache_to_alternate) { + /* + * There is no entry in .git/objects/info/alternates for + * the requested shared-cache directory. Therefore, the + * odb-list does not contain this directory. + * + * Force this directory into the odb-list as an in-memory + * alternate. Implicitly create the directory on disk, if + * necessary. + * + * See GIT_ALTERNATE_OBJECT_DIRECTORIES for another example + * of this kind of usage. + * + * Note: This has the net-effect of allowing Git to treat + * `gvfs.sharedCache` as an unofficial alternate. This + * usage should be discouraged for compatbility reasons + * with other tools in the overall Git ecosystem (that + * won't know about this trick). It would be much better + * for us to update .git/objects/info/alternates instead. + * The code here is considered a backstop. + */ + link_alt_odb_entries(r, gvfs_shared_cache_pathname.buf, + '\n', NULL, 0); + } + r->objects->loaded_alternates = 1; } +#define CAP_GET (1u<<0) + +static int subprocess_map_initialized; +static struct hashmap subprocess_map; + +struct read_object_process { + struct subprocess_entry subprocess; + unsigned int supported_capabilities; +}; + +static int start_read_object_fn(struct subprocess_entry *subprocess) +{ + struct read_object_process *entry = (struct read_object_process *)subprocess; + static int versions[] = {1, 0}; + static struct subprocess_capability capabilities[] = { + { "get", CAP_GET }, + { NULL, 0 } + }; + + return subprocess_handshake(subprocess, "git-read-object", versions, + NULL, capabilities, + &entry->supported_capabilities); +} + +static int read_object_process(const struct object_id *oid) +{ + int err; + struct read_object_process *entry; + struct child_process *process; + struct strbuf status = STRBUF_INIT; + const char *cmd = find_hook("read-object"); + uint64_t start; + + start = getnanotime(); + + trace2_region_enter("subprocess", "read_object", the_repository); + + if (!subprocess_map_initialized) { + subprocess_map_initialized = 1; + hashmap_init(&subprocess_map, (hashmap_cmp_fn)cmd2process_cmp, + NULL, 0); + entry = NULL; + } else { + entry = (struct read_object_process *) subprocess_find_entry(&subprocess_map, cmd); + } + + if (!entry) { + entry = xmalloc(sizeof(*entry)); + entry->supported_capabilities = 0; + + if (subprocess_start(&subprocess_map, &entry->subprocess, cmd, + start_read_object_fn)) { + free(entry); + err = -1; + goto leave_region; + } + } + process = &entry->subprocess.process; + + if (!(CAP_GET & entry->supported_capabilities)) { + err = -1; + goto leave_region; + } + + sigchain_push(SIGPIPE, SIG_IGN); + + err = packet_write_fmt_gently(process->in, "command=get\n"); + if (err) + goto done; + + err = packet_write_fmt_gently(process->in, "sha1=%s\n", oid_to_hex(oid)); + if (err) + goto done; + + err = packet_flush_gently(process->in); + if (err) + goto done; + + err = subprocess_read_status(process->out, &status); + err = err ? err : strcmp(status.buf, "success"); + +done: + sigchain_pop(SIGPIPE); + + if (err || errno == EPIPE) { + err = err ? err : errno; + if (!strcmp(status.buf, "error")) { + /* The process signaled a problem with the file. */ + } + else if (!strcmp(status.buf, "abort")) { + /* + * The process signaled a permanent problem. Don't try to read + * objects with the same command for the lifetime of the current + * Git process. + */ + entry->supported_capabilities &= ~CAP_GET; + } + else { + /* + * Something went wrong with the read-object process. + * Force shutdown and restart if needed. + */ + error("external process '%s' failed", cmd); + subprocess_stop(&subprocess_map, + (struct subprocess_entry *)entry); + free(entry); + } + } + + trace_performance_since(start, "read_object_process"); + +leave_region: + trace2_region_leave_printf("subprocess", "read_object", the_repository, + "result %d", err); + + return err; +} + /* Returns 1 if we have successfully freshened the file, 0 otherwise. */ static int freshen_file(const char *fn) { @@ -986,10 +1184,23 @@ static int check_and_freshen_nonlocal(const struct object_id *oid, int freshen) return 0; } -static int check_and_freshen(const struct object_id *oid, int freshen) +static int check_and_freshen(const struct object_id *oid, int freshen, + int skip_virtualized_objects) { - return check_and_freshen_local(oid, freshen) || + int ret; + int tried_hook = 0; + +retry: + ret = check_and_freshen_local(oid, freshen) || check_and_freshen_nonlocal(oid, freshen); + if (!ret && core_virtualize_objects && !skip_virtualized_objects && + !tried_hook) { + tried_hook = 1; + if (!read_object_process(oid)) + goto retry; + } + + return ret; } int has_loose_object_nonlocal(const struct object_id *oid) @@ -999,7 +1210,7 @@ int has_loose_object_nonlocal(const struct object_id *oid) int has_loose_object(const struct object_id *oid) { - return check_and_freshen(oid, 0); + return check_and_freshen(oid, 0, 0); } static void mmap_limit_check(size_t length) @@ -1537,7 +1748,8 @@ static int do_oid_object_info_extended(struct repository *r, int rtype; const struct object_id *real = oid; int already_retried = 0; - + int tried_hook = 0; + int tried_gvfs_helper = 0; if (flags & OBJECT_INFO_LOOKUP_REPLACE) real = lookup_replace_object(r, oid); @@ -1548,6 +1760,7 @@ static int do_oid_object_info_extended(struct repository *r, if (!oi) oi = &blank_oi; +retry: co = find_cached_object(real); if (co) { if (oi->typep) @@ -1577,11 +1790,44 @@ static int do_oid_object_info_extended(struct repository *r, if (!loose_object_info(r, real, oi, flags)) return 0; + if (core_use_gvfs_helper && !tried_gvfs_helper) { + enum gh_client__created ghc; + + if (flags & OBJECT_INFO_SKIP_FETCH_OBJECT) + return -1; + + gh_client__get_immediate(real, &ghc); + tried_gvfs_helper = 1; + + /* + * Retry the lookup IIF `gvfs-helper` created one + * or more new packfiles or loose objects. + */ + if (ghc != GHC__CREATED__NOTHING) + continue; + + /* + * If `gvfs-helper` fails, we just want to return -1. + * But allow the other providers to have a shot at it. + * (At least until we have a chance to consolidate + * them.) + */ + } + /* Not a loose object; someone else may have just packed it. */ if (!(flags & OBJECT_INFO_QUICK)) { reprepare_packed_git(r); if (find_pack_entry(r, real, &e)) break; + if (core_virtualize_objects && !tried_hook) { + // TODO Assert or at least trace2 if gvfs-helper + // TODO was tried and failed and then read-object-hook + // TODO is successful at getting this object. + tried_hook = 1; + // TODO BUG? Should 'oid' be 'real' ? + if (!read_object_process(oid)) + goto retry; + } } /* @@ -2099,9 +2345,10 @@ static int write_loose_object(const struct object_id *oid, char *hdr, return finalize_object_file(tmp_file.buf, filename.buf); } -static int freshen_loose_object(const struct object_id *oid) +static int freshen_loose_object(const struct object_id *oid, + int skip_virtualized_objects) { - return check_and_freshen(oid, 1); + return check_and_freshen(oid, 1, skip_virtualized_objects); } static int freshen_packed_object(const struct object_id *oid) @@ -2195,7 +2442,7 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, die(_("deflateEnd on stream object failed (%d)"), ret); close_loose_object(fd, tmp_file.buf); - if (freshen_packed_object(oid) || freshen_loose_object(oid)) { + if (freshen_packed_object(oid) || freshen_loose_object(oid, 1)) { unlink_or_warn(tmp_file.buf); goto cleanup; } @@ -2235,7 +2482,7 @@ int write_object_file_flags(const void *buf, size_t len, */ write_object_file_prepare(the_hash_algo, buf, len, type, oid, hdr, &hdrlen); - if (freshen_packed_object(oid) || freshen_loose_object(oid)) + if (freshen_packed_object(oid) || freshen_loose_object(oid, 1)) return 0; return write_loose_object(oid, hdr, hdrlen, buf, len, 0, flags); } @@ -2256,7 +2503,7 @@ int write_object_file_literally(const void *buf, size_t len, if (!(flags & HASH_WRITE_OBJECT)) goto cleanup; - if (freshen_packed_object(oid) || freshen_loose_object(oid)) + if (freshen_packed_object(oid) || freshen_loose_object(oid, 1)) goto cleanup; status = write_loose_object(oid, header, hdrlen, buf, len, 0, 0); @@ -2719,6 +2966,13 @@ struct oidtree *odb_loose_cache(struct object_directory *odb, return odb->loose_objects_cache; } +void odb_loose_cache_add_new_oid(struct object_directory *odb, + const struct object_id *oid) +{ + struct oidtree *cache = odb_loose_cache(odb, oid); + append_loose_object(oid, NULL, cache); +} + void odb_clear_loose_cache(struct object_directory *odb) { oidtree_clear(odb->loose_objects_cache); diff --git a/object-store.h b/object-store.h index de48c9a78d76b1..7e57c73fbe4e32 100644 --- a/object-store.h +++ b/object-store.h @@ -94,6 +94,14 @@ void restore_primary_odb(struct object_directory *restore_odb, const char *old_p struct oidtree *odb_loose_cache(struct object_directory *odb, const struct object_id *oid); +/* + * Add a new object to the loose object cache (possibly after the + * cache was populated). This might be used after dynamically + * fetching a missing object. + */ +void odb_loose_cache_add_new_oid(struct object_directory *odb, + const struct object_id *oid); + /* Empty the loose object cache for the specified object directory. */ void odb_clear_loose_cache(struct object_directory *odb); diff --git a/packfile.c b/packfile.c index c0d7dd93f46dfb..22c5aaed638b0d 100644 --- a/packfile.c +++ b/packfile.c @@ -762,6 +762,12 @@ void install_packed_git(struct repository *r, struct packed_git *pack) hashmap_add(&r->objects->pack_map, &pack->packmap_ent); } +void install_packed_git_and_mru(struct repository *r, struct packed_git *pack) +{ + install_packed_git(r, pack); + list_add(&pack->mru, &r->objects->packed_git_mru); +} + void (*report_garbage)(unsigned seen_bits, const char *path); static void report_helper(const struct string_list *list, @@ -1666,6 +1672,13 @@ static void *read_object(struct repository *r, return content; } +static unsigned long g_nr_unpack_entry; + +unsigned long get_nr_unpack_entry(void) +{ + return g_nr_unpack_entry; +} + void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, enum object_type *final_type, unsigned long *final_size) { @@ -1679,6 +1692,8 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, int delta_stack_nr = 0, delta_stack_alloc = UNPACK_ENTRY_STACK_PREALLOC; int base_from_cache = 0; + g_nr_unpack_entry++; + write_pack_access_log(p, obj_offset); /* PHASE 1: drill down to the innermost base object */ diff --git a/packfile.h b/packfile.h index a3f6723857bf12..5a915a92774aaa 100644 --- a/packfile.h +++ b/packfile.h @@ -53,6 +53,7 @@ extern void (*report_garbage)(unsigned seen_bits, const char *path); void reprepare_packed_git(struct repository *r); void install_packed_git(struct repository *r, struct packed_git *pack); +void install_packed_git_and_mru(struct repository *r, struct packed_git *pack); struct packed_git *get_packed_git(struct repository *r); struct list_head *get_packed_git_mru(struct repository *r); @@ -195,4 +196,9 @@ int is_promisor_object(const struct object_id *oid); int load_idx(const char *path, const unsigned int hashsz, void *idx_map, size_t idx_size, struct packed_git *p); +/* + * Return the number of objects fetched from a packfile. + */ +unsigned long get_nr_unpack_entry(void); + #endif diff --git a/pkt-line.c b/pkt-line.c index ce4e73b6833a48..2b660c4332a7dc 100644 --- a/pkt-line.c +++ b/pkt-line.c @@ -225,7 +225,7 @@ static int do_packet_write(const int fd_out, const char *buf, size_t size, return 0; } -static int packet_write_gently(const int fd_out, const char *buf, size_t size) +int packet_write_gently(const int fd_out, const char *buf, size_t size) { struct strbuf err = STRBUF_INIT; if (do_packet_write(fd_out, buf, size, &err)) { diff --git a/pkt-line.h b/pkt-line.h index 79c538b99e4776..2c83c8270add8b 100644 --- a/pkt-line.h +++ b/pkt-line.h @@ -31,6 +31,7 @@ void packet_write(int fd_out, const char *buf, size_t size); void packet_buf_write(struct strbuf *buf, const char *fmt, ...) __attribute__((format (printf, 2, 3))); int packet_flush_gently(int fd); int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3))); +int packet_write_gently(const int fd_out, const char *buf, size_t size); int write_packetized_from_fd_no_flush(int fd_in, int fd_out); int write_packetized_from_buf_no_flush_count(const char *src_in, size_t len, int fd_out, int *packet_counter); diff --git a/promisor-remote.c b/promisor-remote.c index 68f46f5ec70b93..cb04cb6b169ecf 100644 --- a/promisor-remote.c +++ b/promisor-remote.c @@ -1,5 +1,6 @@ #include "cache.h" #include "object-store.h" +#include "gvfs-helper-client.h" #include "promisor-remote.h" #include "config.h" #include "transport.h" @@ -195,7 +196,7 @@ struct promisor_remote *repo_promisor_remote_find(struct repository *r, int repo_has_promisor_remote(struct repository *r) { - return !!repo_promisor_remote_find(r, NULL); + return core_use_gvfs_helper || !!repo_promisor_remote_find(r, NULL); } static int remove_fetched_oids(struct repository *repo, @@ -242,6 +243,13 @@ int promisor_remote_get_direct(struct repository *repo, if (oid_nr == 0) return 0; + if (core_use_gvfs_helper) { + enum gh_client__created ghc = GHC__CREATED__NOTHING; + + trace2_data_intmax("bug", the_repository, "fetch_objects/gvfs-helper", oid_nr); + gh_client__queue_oid_array(oids, oid_nr); + return gh_client__drain_queue(&ghc); + } promisor_remote_init(repo); diff --git a/read-cache.c b/read-cache.c index 7f94bc68d060aa..5666ef8ad89294 100644 --- a/read-cache.c +++ b/read-cache.c @@ -4,6 +4,8 @@ * Copyright (C) Linus Torvalds, 2005 */ #include "cache.h" +#include "gvfs.h" +#include "virtualfilesystem.h" #include "config.h" #include "diff.h" #include "diffcore.h" @@ -1853,7 +1855,10 @@ static int read_index_extension(struct index_state *istate, { switch (CACHE_EXT(ext)) { case CACHE_EXT_TREE: + trace2_region_enter("index", "read/extension/cache_tree", NULL); istate->cache_tree = cache_tree_read(data, sz); + trace2_data_intmax("index", NULL, "read/extension/cache_tree/bytes", (intmax_t)sz); + trace2_region_leave("index", "read/extension/cache_tree", NULL); break; case CACHE_EXT_RESOLVE_UNDO: istate->resolve_undo = resolve_undo_read(data, sz); @@ -2046,6 +2051,7 @@ static void post_read_index_from(struct index_state *istate) tweak_untracked_cache(istate); tweak_split_index(istate); tweak_fsmonitor(istate); + apply_virtualfilesystem(istate); } static size_t estimate_cache_size_from_compressed(unsigned int entries) @@ -2118,6 +2124,17 @@ static void *load_index_extensions(void *_data) return NULL; } +static void *load_index_extensions_threadproc(void *_data) +{ + void *result; + + trace2_thread_start("load_index_extensions"); + result = load_index_extensions(_data); + trace2_thread_exit(); + + return result; +} + /* * A helper function that will load the specified range of cache entries * from the memory mapped file and add them to the given index. @@ -2194,12 +2211,17 @@ static void *load_cache_entries_thread(void *_data) struct load_cache_entries_thread_data *p = _data; int i; + trace2_thread_start("load_cache_entries"); + /* iterate across all ieot blocks assigned to this thread */ for (i = p->ieot_start; i < p->ieot_start + p->ieot_blocks; i++) { p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool, p->offset, p->ieot->entries[i].nr, p->mmap, p->ieot->entries[i].offset, NULL); p->offset += p->ieot->entries[i].nr; } + + trace2_thread_exit(); + return NULL; } @@ -2370,7 +2392,7 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) int err; p.src_offset = extension_offset; - err = pthread_create(&p.pthread, NULL, load_index_extensions, &p); + err = pthread_create(&p.pthread, NULL, load_index_extensions_threadproc, &p); if (err) die(_("unable to create load_index_extensions thread: %s"), strerror(err)); @@ -2910,6 +2932,9 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, f = hashfd(tempfile->fd, tempfile->filename.buf); + if (gvfs_config_is_set(GVFS_SKIP_SHA_ON_INDEX)) + f->skip_hash = 1; + for (i = removed = extended = 0; i < entries; i++) { if (cache[i]->ce_flags & CE_REMOVE) removed++; @@ -3076,9 +3101,13 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, if (!strip_extensions && !drop_cache_tree && istate->cache_tree) { struct strbuf sb = STRBUF_INIT; + trace2_region_enter("index", "write/extension/cache_tree", NULL); cache_tree_write(&sb, istate->cache_tree); err = write_index_ext_header(f, eoie_c, CACHE_EXT_TREE, sb.len) < 0; hashwrite(f, sb.buf, sb.len); + trace2_data_intmax("index", NULL, "write/extension/cache_tree/bytes", (intmax_t)sb.len); + trace2_region_leave("index", "write/extension/cache_tree", NULL); + strbuf_release(&sb); if (err) return -1; diff --git a/remote-curl.c b/remote-curl.c index 72dfb8fb86aab0..7fa06693c6862f 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -1169,6 +1169,9 @@ static int fetch_git(struct discovery *heads, struct strvec args = STRVEC_INIT; struct strbuf rpc_result = STRBUF_INIT; + if (core_use_gvfs_helper) + return 0; + strvec_pushl(&args, "fetch-pack", "--stateless-rpc", "--stdin", "--lock-pack", NULL); if (options.followtags) diff --git a/remote.c b/remote.c index 60869beebe7364..5e72e7d2de192a 100644 --- a/remote.c +++ b/remote.c @@ -2255,7 +2255,16 @@ int format_tracking_info(struct branch *branch, struct strbuf *sb, char *base; int upstream_is_gone = 0; + trace2_region_enter("tracking", "stat_tracking_info", NULL); sti = stat_tracking_info(branch, &ours, &theirs, &full_base, 0, abf); + trace2_data_intmax("tracking", NULL, "stat_tracking_info/ab_flags", abf); + trace2_data_intmax("tracking", NULL, "stat_tracking_info/ab_result", sti); + if (sti >= 0 && abf == AHEAD_BEHIND_FULL) { + trace2_data_intmax("tracking", NULL, "stat_tracking_info/ab_ahead", ours); + trace2_data_intmax("tracking", NULL, "stat_tracking_info/ab_behind", theirs); + } + trace2_region_leave("tracking", "stat_tracking_info", NULL); + if (sti < 0) { if (!full_base) return 0; diff --git a/repo-settings.c b/repo-settings.c index 80157597c93f17..aee45e7c6902df 100644 --- a/repo-settings.c +++ b/repo-settings.c @@ -84,7 +84,7 @@ void prepare_repo_settings(struct repository *r) /* Boolean config or default, does not cascade (simple) */ repo_cfg_bool(r, "pack.usesparse", &r->settings.pack_use_sparse, 1); repo_cfg_bool(r, "core.multipackindex", &r->settings.core_multi_pack_index, 1); - repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 0); + repo_cfg_bool(r, "index.sparse", &r->settings.sparse_index, 1); /* * The GIT_TEST_MULTI_PACK_INDEX variable is special in that diff --git a/scalar.c b/scalar.c index c5c1ce68919908..b2e8c39107437e 100644 --- a/scalar.c +++ b/scalar.c @@ -14,6 +14,12 @@ #include "dir.h" #include "packfile.h" #include "help.h" +#include "json-parser.h" +#include "remote.h" + +static int is_unattended(void) { + return git_env_bool("Scalar_UNATTENDED", 0); +} static void setup_enlistment_directory(int argc, const char **argv, const char * const *usagestr, @@ -41,6 +47,9 @@ static void setup_enlistment_directory(int argc, const char **argv, die(_("need a working directory")); strbuf_trim_trailing_dir_sep(&path); +#ifdef GIT_WINDOWS_NATIVE + convert_slashes(path.buf); +#endif /* check if currently in enlistment root with src/ workdir */ len = path.len; @@ -67,12 +76,14 @@ static void setup_enlistment_directory(int argc, const char **argv, strbuf_release(&path); } +static int git_retries = 3; + static int run_git(const char *arg, ...) { struct strvec argv = STRVEC_INIT; va_list args; const char *p; - int res; + int res, attempts; va_start(args, arg); strvec_push(&argv, arg); @@ -80,12 +91,28 @@ static int run_git(const char *arg, ...) strvec_push(&argv, p); va_end(args); - res = run_command_v_opt(argv.v, RUN_GIT_CMD); + for (attempts = 0, res = 1; + res && attempts < git_retries; + attempts++) + res = run_command_v_opt(argv.v, RUN_GIT_CMD); strvec_clear(&argv); return res; } +static const char *ensure_absolute_path(const char *path, char **absolute) +{ + struct strbuf buf = STRBUF_INIT; + + if (is_absolute_path(path)) + return path; + + strbuf_realpath_forgiving(&buf, path, 1); + free(*absolute); + *absolute = strbuf_detach(&buf, NULL); + return *absolute; +} + struct scalar_config { const char *key; const char *value; @@ -124,23 +151,7 @@ static int set_recommended_config(int reconfigure) { "core.FSCache", "true", 1 }, { "core.multiPackIndex", "true", 1 }, { "core.preloadIndex", "true", 1 }, -#ifndef WIN32 { "core.untrackedCache", "true", 1 }, -#else - /* - * Unfortunately, Scalar's Functional Tests demonstrated - * that the untracked cache feature is unreliable on Windows - * (which is a bummer because that platform would benefit the - * most from it). For some reason, freshly created files seem - * not to update the directory's `lastModified` time - * immediately, but the untracked cache would need to rely on - * that. - * - * Therefore, with a sad heart, we disable this very useful - * feature on Windows. - */ - { "core.untrackedCache", "false", 1 }, -#endif { "core.logAllRefUpdates", "true", 1 }, { "credential.https://dev.azure.com.useHttpPath", "true", 1 }, { "credential.validate", "false", 1 }, /* GCM4W-only */ @@ -166,11 +177,29 @@ static int set_recommended_config(int reconfigure) { "core.autoCRLF", "false" }, { "core.safeCRLF", "false" }, { "fetch.showForcedUpdates", "false" }, + { "core.configWriteLockTimeoutMS", "150" }, { NULL, NULL }, }; int i; char *value; + /* + * If a user has "core.usebuiltinfsmonitor" enabled, try to switch to + * the new (non-deprecated) setting (core.fsmonitor). + */ + if (!git_config_get_string("core.usebuiltinfsmonitor", &value)) { + char *dummy = NULL; + if (git_config_get_string("core.fsmonitor", &dummy) && + git_config_set_gently("core.fsmonitor", value) < 0) + return error(_("could not configure %s=%s"), + "core.fsmonitor", value); + if (git_config_set_gently("core.usebuiltinfsmonitor", NULL) < 0) + return error(_("could not configure %s=%s"), + "core.useBuiltinFSMonitor", "NULL"); + free(value); + free(dummy); + } + for (i = 0; config[i].key; i++) { if (set_scalar_config(config + i, reconfigure)) return error(_("could not configure %s=%s"), @@ -207,16 +236,25 @@ static int set_recommended_config(int reconfigure) static int toggle_maintenance(int enable) { + unsigned long ul; + + if (git_config_get_ulong("core.configWriteLockTimeoutMS", &ul)) + git_config_push_parameter("core.configWriteLockTimeoutMS=150"); + return run_git("maintenance", enable ? "start" : "unregister", NULL); } static int add_or_remove_enlistment(int add) { int res; + unsigned long ul; if (!the_repository->worktree) die(_("Scalar enlistments require a worktree")); + if (git_config_get_ulong("core.configWriteLockTimeoutMS", &ul)) + git_config_push_parameter("core.configWriteLockTimeoutMS=150"); + res = run_git("config", "--global", "--get", "--fixed-value", "scalar.repo", the_repository->worktree, NULL); @@ -304,6 +342,210 @@ static int set_config(const char *fmt, ...) return res; } +static int list_cache_server_urls(struct json_iterator *it) +{ + const char *p; + char *q; + long l; + + if (it->type == JSON_STRING && + skip_iprefix(it->key.buf, ".CacheServers[", &p) && + (l = strtol(p, &q, 10)) >= 0 && p != q && + !strcasecmp(q, "].Url")) + printf("#%ld: %s\n", l, it->string_value.buf); + + return 0; +} + +/* Find N for which .CacheServers[N].GlobalDefault == true */ +static int get_cache_server_index(struct json_iterator *it) +{ + const char *p; + char *q; + long l; + + if (it->type == JSON_TRUE && + skip_iprefix(it->key.buf, ".CacheServers[", &p) && + (l = strtol(p, &q, 10)) >= 0 && p != q && + !strcasecmp(q, "].GlobalDefault")) { + *(long *)it->fn_data = l; + return 1; + } + + return 0; +} + +struct cache_server_url_data { + char *key, *url; +}; + +/* Get .CacheServers[N].Url */ +static int get_cache_server_url(struct json_iterator *it) +{ + struct cache_server_url_data *data = it->fn_data; + + if (it->type == JSON_STRING && + !strcasecmp(data->key, it->key.buf)) { + data->url = strbuf_detach(&it->string_value, NULL); + return 1; + } + + return 0; +} + +static int can_url_support_gvfs(const char *url) +{ + return starts_with(url, "https://") || + (git_env_bool("GIT_TEST_ALLOW_GVFS_VIA_HTTP", 0) && + starts_with(url, "http://")); +} + +/* + * If `cache_server_url` is `NULL`, print the list to `stdout`. + * + * Since `gvfs-helper` requires a Git directory, this _must_ be run in + * a worktree. + */ +static int supports_gvfs_protocol(const char *url, char **cache_server_url) +{ + struct child_process cp = CHILD_PROCESS_INIT; + struct strbuf out = STRBUF_INIT; + + /* + * The GVFS protocol is only supported via https://; For testing, we + * also allow http://. + */ + if (!can_url_support_gvfs(url)) + return 0; + + cp.git_cmd = 1; + strvec_pushl(&cp.args, "gvfs-helper", "--remote", url, "config", NULL); + if (!pipe_command(&cp, NULL, 0, &out, 512, NULL, 0)) { + long l = 0; + struct json_iterator it = + JSON_ITERATOR_INIT(out.buf, get_cache_server_index, &l); + struct cache_server_url_data data = { .url = NULL }; + + if (!cache_server_url) { + it.fn = list_cache_server_urls; + if (iterate_json(&it) < 0) { + strbuf_release(&out); + return error("JSON parse error"); + } + strbuf_release(&out); + return 0; + } + + if (iterate_json(&it) < 0) { + strbuf_release(&out); + return error("JSON parse error"); + } + data.key = xstrfmt(".CacheServers[%ld].Url", l); + it.fn = get_cache_server_url; + it.fn_data = &data; + if (iterate_json(&it) < 0) { + strbuf_release(&out); + return error("JSON parse error"); + } + *cache_server_url = data.url; + free(data.key); + return 1; + } + strbuf_release(&out); + /* error out quietly, unless we wanted to list URLs */ + return cache_server_url ? + 0 : error(_("Could not access gvfs/config endpoint")); +} + +static char *default_cache_root(const char *root) +{ + const char *env; + + if (is_unattended()) + return xstrfmt("%s/.scalarCache", root); + +#ifdef WIN32 + (void)env; + return xstrfmt("%.*s.scalarCache", offset_1st_component(root), root); +#elif defined(__APPLE__) + if ((env = getenv("HOME")) && *env) + return xstrfmt("%s/.scalarCache", env); + return NULL; +#else + if ((env = getenv("XDG_CACHE_HOME")) && *env) + return xstrfmt("%s/scalar", env); + if ((env = getenv("HOME")) && *env) + return xstrfmt("%s/.cache/scalar", env); + return NULL; +#endif +} + +static int get_repository_id(struct json_iterator *it) +{ + if (it->type == JSON_STRING && + !strcasecmp(".repository.id", it->key.buf)) { + *(char **)it->fn_data = strbuf_detach(&it->string_value, NULL); + return 1; + } + + return 0; +} + +/* Needs to run this in a worktree; gvfs-helper requires a Git repository */ +static char *get_cache_key(const char *url) +{ + struct child_process cp = CHILD_PROCESS_INIT; + struct strbuf out = STRBUF_INIT; + char *cache_key = NULL; + + /* + * The GVFS protocol is only supported via https://; For testing, we + * also allow http://. + */ + if (!git_env_bool("SCALAR_TEST_SKIP_VSTS_INFO", 0) && + can_url_support_gvfs(url)) { + cp.git_cmd = 1; + strvec_pushl(&cp.args, "gvfs-helper", "--remote", url, + "endpoint", "vsts/info", NULL); + if (!pipe_command(&cp, NULL, 0, &out, 512, NULL, 0)) { + char *id = NULL; + struct json_iterator it = + JSON_ITERATOR_INIT(out.buf, get_repository_id, + &id); + + if (iterate_json(&it) < 0) + warning("JSON parse error (%s)", out.buf); + else if (id) + cache_key = xstrfmt("id_%s", id); + free(id); + } + } + + if (!cache_key) { + struct strbuf downcased = STRBUF_INIT; + int hash_algo_index = hash_algo_by_name("sha1"); + const struct git_hash_algo *hash_algo = hash_algo_index < 0 ? + the_hash_algo : &hash_algos[hash_algo_index]; + git_hash_ctx ctx; + unsigned char hash[GIT_MAX_RAWSZ]; + + strbuf_addstr(&downcased, url); + strbuf_tolower(&downcased); + + hash_algo->init_fn(&ctx); + hash_algo->update_fn(&ctx, downcased.buf, downcased.len); + hash_algo->final_fn(hash, &ctx); + + strbuf_release(&downcased); + + cache_key = xstrfmt("url_%s", + hash_to_hex_algop(hash, hash_algo)); + } + + strbuf_release(&out); + return cache_key; +} + static char *remote_default_branch(const char *url) { struct child_process cp = CHILD_PROCESS_INIT; @@ -401,10 +643,48 @@ void load_builtin_commands(const char *prefix, struct cmdnames *cmds) die("not implemented"); } +static int init_shared_object_cache(const char *url, + const char *local_cache_root) +{ + struct strbuf buf = STRBUF_INIT; + int res = 0; + char *cache_key = NULL, *shared_cache_path = NULL; + + if (!(cache_key = get_cache_key(url))) { + res = error(_("could not determine cache key for '%s'"), url); + goto cleanup; + } + + shared_cache_path = xstrfmt("%s/%s", local_cache_root, cache_key); + if (set_config("gvfs.sharedCache=%s", shared_cache_path)) { + res = error(_("could not configure shared cache")); + goto cleanup; + } + + strbuf_addf(&buf, "%s/pack", shared_cache_path); + switch (safe_create_leading_directories(buf.buf)) { + case SCLD_OK: case SCLD_EXISTS: + break; /* okay */ + default: + res = error_errno(_("could not initialize '%s'"), buf.buf); + goto cleanup; + } + + write_file(git_path("objects/info/alternates"),"%s\n", shared_cache_path); + + cleanup: + strbuf_release(&buf); + free(shared_cache_path); + free(cache_key); + return res; +} + static int cmd_clone(int argc, const char **argv) { const char *branch = NULL; - int full_clone = 0, single_branch = 0; + int full_clone = 0, single_branch = 0, dummy = 0; + const char *cache_server_url = NULL, *local_cache_root = NULL; + char *default_cache_server_url = NULL, *local_cache_root_abs = NULL; struct option clone_options[] = { OPT_STRING('b', "branch", &branch, N_(""), N_("branch to checkout after clone")), @@ -413,6 +693,14 @@ static int cmd_clone(int argc, const char **argv) OPT_BOOL(0, "single-branch", &single_branch, N_("only download metadata for the branch that will " "be checked out")), + OPT_STRING(0, "cache-server-url", &cache_server_url, + N_(""), + N_("the url or friendly name of the cache server")), + OPT_STRING(0, "local-cache-path", &local_cache_root, + N_(""), + N_("override the path for the local Scalar cache")), + OPT_HIDDEN_BOOL(0, "no-fetch-commits-and-trees", + &dummy, N_("no longer used")), OPT_END(), }; const char * const clone_usage[] = { @@ -423,6 +711,7 @@ static int cmd_clone(int argc, const char **argv) char *enlistment = NULL, *dir = NULL; struct strbuf buf = STRBUF_INIT; int res; + int gvfs_protocol; argc = parse_options(argc, argv, NULL, clone_options, clone_usage, 0); @@ -452,8 +741,20 @@ static int cmd_clone(int argc, const char **argv) if (is_directory(enlistment)) die(_("directory '%s' exists already"), enlistment); + ensure_absolute_path(enlistment, &enlistment); + dir = xstrfmt("%s/src", enlistment); + if (!local_cache_root) + local_cache_root = local_cache_root_abs = + default_cache_root(enlistment); + else + local_cache_root = ensure_absolute_path(local_cache_root, + &local_cache_root_abs); + + if (!local_cache_root) + die(_("could not determine local cache root")); + strbuf_reset(&buf); if (branch) strbuf_addf(&buf, "init.defaultBranch=%s", branch); @@ -473,8 +774,28 @@ static int cmd_clone(int argc, const char **argv) setup_git_directory(); + git_config(git_default_config, NULL); + + /* + * This `dir_inside_of()` call relies on git_config() having parsed the + * newly-initialized repository config's `core.ignoreCase` value. + */ + if (dir_inside_of(local_cache_root, dir) >= 0) { + struct strbuf path = STRBUF_INIT; + + strbuf_addstr(&path, enlistment); + if (chdir("../..") < 0 || + remove_dir_recursively(&path, 0) < 0) + die(_("'--local-cache-path' cannot be inside the src " + "folder;\nCould not remove '%s'"), enlistment); + + die(_("'--local-cache-path' cannot be inside the src folder")); + } + /* common-main already logs `argv` */ trace2_def_repo(the_repository); + trace2_data_intmax("scalar", the_repository, "unattended", + is_unattended()); if (!branch && !(branch = remote_default_branch(url))) { res = error(_("failed to get default branch for '%s'"), url); @@ -485,13 +806,48 @@ static int cmd_clone(int argc, const char **argv) set_config("remote.origin.fetch=" "+refs/heads/%s:refs/remotes/origin/%s", single_branch ? branch : "*", - single_branch ? branch : "*") || - set_config("remote.origin.promisor=true") || - set_config("remote.origin.partialCloneFilter=blob:none")) { + single_branch ? branch : "*")) { res = error(_("could not configure remote in '%s'"), dir); goto cleanup; } + if (set_config("credential.https://dev.azure.com.useHttpPath=true")) { + res = error(_("could not configure credential.useHttpPath")); + goto cleanup; + } + + gvfs_protocol = cache_server_url || + supports_gvfs_protocol(url, &default_cache_server_url); + + if (gvfs_protocol) { + if ((res = init_shared_object_cache(url, local_cache_root))) + goto cleanup; + if (!cache_server_url) + cache_server_url = default_cache_server_url; + if (set_config("core.useGVFSHelper=true") || + set_config("core.gvfs=150") || + set_config("http.version=HTTP/1.1")) { + res = error(_("could not turn on GVFS helper")); + goto cleanup; + } + if (cache_server_url && + set_config("gvfs.cache-server=%s", cache_server_url)) { + res = error(_("could not configure cache server")); + goto cleanup; + } + if (cache_server_url) + fprintf(stderr, "Cache server URL: %s\n", + cache_server_url); + } else { + if (set_config("core.useGVFSHelper=false") || + set_config("remote.origin.promisor=true") || + set_config("remote.origin.partialCloneFilter=blob:none")) { + res = error(_("could not configure partial clone in " + "'%s'"), dir); + goto cleanup; + } + } + if (!full_clone && (res = run_git("sparse-checkout", "init", "--cone", NULL))) goto cleanup; @@ -500,6 +856,11 @@ static int cmd_clone(int argc, const char **argv) return error(_("could not configure '%s'"), dir); if ((res = run_git("fetch", "--quiet", "origin", NULL))) { + if (gvfs_protocol) { + res = error(_("failed to prefetch commits and trees")); + goto cleanup; + } + warning(_("partial clone failed; attempting full clone")); if (set_config("remote.origin.promisor") || @@ -530,6 +891,8 @@ static int cmd_clone(int argc, const char **argv) free(enlistment); free(dir); strbuf_release(&buf); + free(default_cache_server_url); + free(local_cache_root_abs); return res; } @@ -629,6 +992,7 @@ static int cmd_reconfigure(int argc, const char **argv) git_config(get_scalar_repos, &scalar_repos); for (i = 0; i < scalar_repos.nr; i++) { + int failed = 0; const char *dir = scalar_repos.items[i].string; strbuf_reset(&commondir); @@ -636,19 +1000,40 @@ static int cmd_reconfigure(int argc, const char **argv) if (chdir(dir) < 0) { warning_errno(_("could not switch to '%s'"), dir); - res = -1; - } else if (discover_git_directory(&commondir, &gitdir) < 0) { - warning_errno(_("git repository gone in '%s'"), dir); - res = -1; - } else { - git_config_clear(); - - the_repository = &r; - r.commondir = commondir.buf; - r.gitdir = gitdir.buf; - - if (set_recommended_config(1) < 0) - res = -1; + failed = -1; + goto loop_end; + } + + switch (discover_git_directory_reason(&commondir, &gitdir)) { + case GIT_DIR_INVALID_OWNERSHIP: + warning(_("repository at '%s' has different owner"), dir); + failed = -1; + goto loop_end; + + case GIT_DIR_DISCOVERED: + break; + + default: + warning(_("repository not found in '%s'"), dir); + failed = -1; + break; + } + + git_config_clear(); + + the_repository = &r; + r.commondir = commondir.buf; + r.gitdir = gitdir.buf; + + if (set_recommended_config(1) < 0) + failed = -1; + +loop_end: + if (failed) { + res = failed; + warning(_("to unregister this repository from Scalar, run\n" + "\tgit config --global --unset --fixed-value scalar.repo \"%s\""), + dir); } } @@ -866,6 +1251,77 @@ static int cmd_version(int argc, const char **argv) return 0; } +static int cmd_cache_server(int argc, const char **argv) +{ + int get = 0; + char *set = NULL, *list = NULL; + const char *default_remote = "(default)"; + struct option options[] = { + OPT_BOOL(0, "get", &get, + N_("get the configured cache-server URL")), + OPT_STRING(0, "set", &set, N_("URL"), + N_("configure the cache-server to use")), + { OPTION_STRING, 0, "list", &list, N_("remote"), + N_("list the possible cache-server URLs"), + PARSE_OPT_OPTARG, NULL, (intptr_t) default_remote }, + OPT_END(), + }; + const char * const usage[] = { + N_("scalar cache_server " + "[--get | --set | --list []] []"), + NULL + }; + int res = 0; + + argc = parse_options(argc, argv, NULL, options, + usage, 0); + + if (get + !!set + !!list > 1) + usage_msg_opt(_("--get/--set/--list are mutually exclusive"), + usage, options); + + setup_enlistment_directory(argc, argv, usage, options, NULL); + + if (list) { + const char *name = list, *url = list; + + if (list == default_remote) + list = NULL; + + if (!list || !strchr(list, '/')) { + struct remote *remote; + + /* Look up remote */ + remote = remote_get(list); + if (!remote) { + error("no such remote: '%s'", name); + free(list); + return 1; + } + if (!remote->url) { + free(list); + return error(_("remote '%s' has no URLs"), + name); + } + url = remote->url[0]; + } + res = supports_gvfs_protocol(url, NULL); + free(list); + } else if (set) { + res = set_config("gvfs.cache-server=%s", set); + free(set); + } else { + char *url = NULL; + + printf("Using cache server: %s\n", + git_config_get_string("gvfs.cache-server", &url) ? + "(undefined)" : url); + free(url); + } + + return !!res; +} + static struct { const char *name; int (*fn)(int, const char **); @@ -880,6 +1336,7 @@ static struct { { "help", cmd_help }, { "version", cmd_version }, { "diagnose", cmd_diagnose }, + { "cache-server", cmd_cache_server }, { NULL, NULL}, }; @@ -888,6 +1345,12 @@ int cmd_main(int argc, const char **argv) struct strbuf scalar_usage = STRBUF_INIT; int i; + if (is_unattended()) { + setenv("GIT_ASKPASS", "", 0); + setenv("GIT_TERMINAL_PROMPT", "false", 0); + git_config_push_parameter("credential.interactive=never"); + } + while (argc > 1 && *argv[1] == '-') { if (!strcmp(argv[1], "-C")) { if (argc < 3) @@ -911,6 +1374,9 @@ int cmd_main(int argc, const char **argv) argv++; argc--; + if (!strcmp(argv[0], "config")) + argv[0] = "reconfigure"; + for (i = 0; builtins[i].name; i++) if (!strcmp(builtins[i].name, argv[0])) return !!builtins[i].fn(argc, argv); diff --git a/send-pack.c b/send-pack.c index 03356faae66710..e856d2c0d55fd3 100644 --- a/send-pack.c +++ b/send-pack.c @@ -2,6 +2,7 @@ #include "config.h" #include "commit.h" #include "refs.h" +#include "gvfs.h" #include "object-store.h" #include "pkt-line.h" #include "sideband.h" @@ -51,7 +52,7 @@ static int send_pack_config(const char *var, const char *value, void *unused) static void feed_object(const struct object_id *oid, FILE *fh, int negative) { - if (negative && + if (negative && !gvfs_config_is_set(GVFS_MISSING_OK) && !has_object_file_with_flags(oid, OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK)) diff --git a/sequencer.c b/sequencer.c index d26ede83c4b2be..43ddad7692a53c 100644 --- a/sequencer.c +++ b/sequencer.c @@ -683,7 +683,7 @@ static int do_recursive_merge(struct repository *r, o.branch2 = next ? next_label : "(empty tree)"; if (is_rebase_i(opts)) o.buffer_output = 2; - o.show_rename_progress = 1; + o.show_rename_progress = isatty(2); head_tree = parse_tree_indirect(head); next_tree = next ? get_commit_tree(next) : empty_tree(r); diff --git a/setup.c b/setup.c index 852b3d9141c6ed..ca3f83cc6d3538 100644 --- a/setup.c +++ b/setup.c @@ -1205,19 +1205,6 @@ static const char *allowed_bare_repo_to_string( return NULL; } -enum discovery_result { - GIT_DIR_NONE = 0, - GIT_DIR_EXPLICIT, - GIT_DIR_DISCOVERED, - GIT_DIR_BARE, - /* these are errors */ - GIT_DIR_HIT_CEILING = -1, - GIT_DIR_HIT_MOUNT_POINT = -2, - GIT_DIR_INVALID_GITFILE = -3, - GIT_DIR_INVALID_OWNERSHIP = -4, - GIT_DIR_DISALLOWED_BARE = -5, -}; - /* * We cannot decide in this function whether we are in the work tree or * not, since the config can only be read _after_ this function was called. @@ -1368,21 +1355,22 @@ static enum discovery_result setup_git_directory_gently_1(struct strbuf *dir, } } -int discover_git_directory(struct strbuf *commondir, - struct strbuf *gitdir) +enum discovery_result discover_git_directory_reason(struct strbuf *commondir, + struct strbuf *gitdir) { struct strbuf dir = STRBUF_INIT, err = STRBUF_INIT; size_t gitdir_offset = gitdir->len, cwd_len; size_t commondir_offset = commondir->len; struct repository_format candidate = REPOSITORY_FORMAT_INIT; + enum discovery_result result; if (strbuf_getcwd(&dir)) - return -1; + return GIT_DIR_CWD_FAILURE; cwd_len = dir.len; - if (setup_git_directory_gently_1(&dir, gitdir, NULL, 0) <= 0) { + if ((result = setup_git_directory_gently_1(&dir, gitdir, NULL, 0)) <= 0) { strbuf_release(&dir); - return -1; + return result; } /* @@ -1412,7 +1400,7 @@ int discover_git_directory(struct strbuf *commondir, strbuf_setlen(commondir, commondir_offset); strbuf_setlen(gitdir, gitdir_offset); clear_repository_format(&candidate); - return -1; + return GIT_DIR_INVALID_FORMAT; } /* take ownership of candidate.partial_clone */ @@ -1421,7 +1409,7 @@ int discover_git_directory(struct strbuf *commondir, candidate.partial_clone = NULL; clear_repository_format(&candidate); - return 0; + return result; } const char *setup_git_directory_gently(int *nongit_ok) @@ -1513,9 +1501,11 @@ const char *setup_git_directory_gently(int *nongit_ok) *nongit_ok = 1; break; case GIT_DIR_NONE: + case GIT_DIR_CWD_FAILURE: + case GIT_DIR_INVALID_FORMAT: /* * As a safeguard against setup_git_directory_gently_1 returning - * this value, fallthrough to BUG. Otherwise it is possible to + * these values, fallthrough to BUG. Otherwise it is possible to * set startup_info->have_repository to 1 when we did nothing to * find a repository. */ diff --git a/sparse-index.c b/sparse-index.c index e4a54ce19433dd..39d35e233f2ad2 100644 --- a/sparse-index.c +++ b/sparse-index.c @@ -495,6 +495,7 @@ void clear_skip_worktree_from_present_files(struct index_state *istate) int i; if (!core_apply_sparse_checkout || + core_virtualfilesystem || sparse_expect_files_outside_of_patterns) return; diff --git a/sub-process.c b/sub-process.c index 6d4232294dbee7..4635f9c39102db 100644 --- a/sub-process.c +++ b/sub-process.c @@ -4,6 +4,7 @@ #include "sub-process.h" #include "sigchain.h" #include "pkt-line.h" +#include "quote.h" int cmd2process_cmp(const void *cmp_data UNUSED, const struct hashmap_entry *eptr, @@ -80,7 +81,12 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co int err; struct child_process *process; - entry->cmd = cmd; + // BUGBUG most callers to subprocess_start() pass in "cmd" the value + // BUGBUG of find_hook() which returns a static buffer (that's only + // BUGBUG good until the next call to find_hook()). + // BUGFIX Defer assignment until we copy the string in our argv. + // entry->cmd = cmd; + process = &entry->process; child_process_init(process); @@ -92,6 +98,8 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co process->clean_on_exit_handler = subprocess_exit_handler; process->trace2_child_class = "subprocess"; + entry->cmd = process->args.v[0]; + err = start_command(process); if (err) { error("cannot fork to run subprocess '%s'", cmd); @@ -111,6 +119,52 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co return 0; } +int subprocess_start_strvec(struct hashmap *hashmap, + struct subprocess_entry *entry, + int is_git_cmd, + const struct strvec *argv, + subprocess_start_fn startfn) +{ + int err; + int k; + struct child_process *process; + struct strbuf quoted = STRBUF_INIT; + + process = &entry->process; + + child_process_init(process); + for (k = 0; k < argv->nr; k++) + strvec_push(&process->args, argv->v[k]); + process->use_shell = 1; + process->in = -1; + process->out = -1; + process->git_cmd = is_git_cmd; + process->clean_on_exit = 1; + process->clean_on_exit_handler = subprocess_exit_handler; + process->trace2_child_class = "subprocess"; + + sq_quote_argv_pretty("ed, argv->v); + entry->cmd = strbuf_detach("ed, NULL); + + err = start_command(process); + if (err) { + error("cannot fork to run subprocess '%s'", entry->cmd); + return err; + } + + hashmap_entry_init(&entry->ent, strhash(entry->cmd)); + + err = startfn(entry); + if (err) { + error("initialization for subprocess '%s' failed", entry->cmd); + subprocess_stop(hashmap, entry); + return err; + } + + hashmap_add(hashmap, &entry->ent); + return 0; +} + static int handshake_version(struct child_process *process, const char *welcome_prefix, int *versions, int *chosen_version) diff --git a/sub-process.h b/sub-process.h index e85f21fa1a7c2b..3d16701a0c7e9c 100644 --- a/sub-process.h +++ b/sub-process.h @@ -57,6 +57,12 @@ typedef int(*subprocess_start_fn)(struct subprocess_entry *entry); int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, const char *cmd, subprocess_start_fn startfn); +int subprocess_start_strvec(struct hashmap *hashmap, + struct subprocess_entry *entry, + int is_git_cmd, + const struct strvec *argv, + subprocess_start_fn startfn); + /* Kill a subprocess and remove it from the subprocess hashmap. */ void subprocess_stop(struct hashmap *hashmap, struct subprocess_entry *entry); diff --git a/t/helper/.gitignore b/t/helper/.gitignore index 8c2ddcce95f7aa..4687ed470c5978 100644 --- a/t/helper/.gitignore +++ b/t/helper/.gitignore @@ -1,2 +1,3 @@ +/test-gvfs-protocol /test-tool /test-fake-ssh diff --git a/t/helper/test-gvfs-protocol.c b/t/helper/test-gvfs-protocol.c new file mode 100644 index 00000000000000..a437248a10feff --- /dev/null +++ b/t/helper/test-gvfs-protocol.c @@ -0,0 +1,2192 @@ +#include "cache.h" +#include "config.h" +#include "pkt-line.h" +#include "run-command.h" +#include "strbuf.h" +#include "string-list.h" +#include "trace2.h" +#include "object.h" +#include "object-store.h" +#include "replace-object.h" +#include "repository.h" +#include "version.h" +#include "dir.h" +#include "json-writer.h" +#include "oidset.h" +#include "date.h" +#include "packfile.h" + +#define TR2_CAT "test-gvfs-protocol" + +static const char *pid_file; +static int verbose; +static int reuseaddr; +static struct string_list mayhem_list = STRING_LIST_INIT_DUP; +static int mayhem_child = 0; +static struct json_writer jw_config = JSON_WRITER_INIT; + +/* + * We look for one of these "servertypes" in the uri-base + * so we can behave differently when we need to. + */ +#define MY_SERVER_TYPE__ORIGIN "servertype/origin" +#define MY_SERVER_TYPE__CACHE "servertype/cache" + +static const char test_gvfs_protocol_usage[] = +"gvfs-protocol [--verbose]\n" +" [--timeout=] [--init-timeout=] [--max-connections=]\n" +" [--reuseaddr] [--pid-file=]\n" +" [--listen=]* [--port=]\n" +" [--mayhem=]*\n" +; + +/* Timeout, and initial timeout */ +static unsigned int timeout; +static unsigned int init_timeout; + +static void logreport(const char *label, const char *err, va_list params) +{ + struct strbuf msg = STRBUF_INIT; + + strbuf_addf(&msg, "[%"PRIuMAX"] %s: ", (uintmax_t)getpid(), label); + strbuf_vaddf(&msg, err, params); + strbuf_addch(&msg, '\n'); + + fwrite(msg.buf, sizeof(char), msg.len, stderr); + fflush(stderr); + + strbuf_release(&msg); +} + +__attribute__((format (printf, 1, 2))) +static void logerror(const char *err, ...) +{ + va_list params; + va_start(params, err); + logreport("error", err, params); + va_end(params); +} + +__attribute__((format (printf, 1, 2))) +static void loginfo(const char *err, ...) +{ + va_list params; + if (!verbose) + return; + va_start(params, err); + logreport("info", err, params); + va_end(params); +} + +__attribute__((format (printf, 1, 2))) +static void logmayhem(const char *err, ...) +{ + va_list params; + if (!verbose) + return; + va_start(params, err); + logreport("mayhem", err, params); + va_end(params); +} + +static void set_keep_alive(int sockfd) +{ + int ka = 1; + + if (setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &ka, sizeof(ka)) < 0) { + if (errno != ENOTSOCK) + logerror("unable to set SO_KEEPALIVE on socket: %s", + strerror(errno)); + } +} + +////////////////////////////////////////////////////////////////// +// The code in this section is used by "worker" instances to service +// a single connection from a client. The worker talks to the client +// on 0 and 1. +////////////////////////////////////////////////////////////////// + +enum worker_result { + /* + * Operation successful. + * Caller *might* keep the socket open and allow keep-alive. + */ + WR_OK = 0, + /* + * Various errors while processing the request and/or the response. + * Close the socket and clean up. + * Exit child-process with non-zero status. + */ + WR_IO_ERROR = 1<<0, + /* + * Close the socket and clean up. Does not imply an error. + */ + WR_HANGUP = 1<<1, + /* + * The result of a function was influenced by the mayhem settings. + * Does not imply that we need to exit or close the socket. + * Just advice to callers in the worker stack. + */ + WR_MAYHEM = 1<<2, + + WR_STOP_THE_MUSIC = (WR_IO_ERROR | WR_HANGUP), +}; + +/* + * Fields from a parsed HTTP request. + */ +struct req { + struct strbuf start_line; + struct string_list start_line_fields; + + struct strbuf uri_base; + struct strbuf gvfs_api; + struct strbuf slash_args; + struct strbuf quest_args; + + struct string_list header_list; +}; + +#define REQ__INIT { \ + .start_line = STRBUF_INIT, \ + .start_line_fields = STRING_LIST_INIT_DUP, \ + .uri_base = STRBUF_INIT, \ + .gvfs_api = STRBUF_INIT, \ + .slash_args = STRBUF_INIT, \ + .quest_args = STRBUF_INIT, \ + .header_list = STRING_LIST_INIT_NODUP, \ + } + +static void req__release(struct req *req) +{ + strbuf_release(&req->start_line); + string_list_clear(&req->start_line_fields, 0); + + strbuf_release(&req->uri_base); + strbuf_release(&req->gvfs_api); + strbuf_release(&req->slash_args); + strbuf_release(&req->quest_args); + + string_list_clear(&req->header_list, 0); +} + +/* + * Generate a somewhat bogus UUID/GUID that is good enough for + * a test suite, but without requiring platform-specific UUID + * or GUID libraries. + */ +static void gen_fake_uuid(struct strbuf *uuid) +{ + static unsigned int seq = 0; + static struct timeval tv; + static struct tm tm; + static time_t secs; + + strbuf_setlen(uuid, 0); + + if (!seq) { + gettimeofday(&tv, NULL); + secs = tv.tv_sec; + gmtime_r(&secs, &tm); + } + + /* + * Build a string that looks like: + * + * "ffffffff-eeee-dddd-cccc-bbbbbbbbbbbb" + * + * Note that the first digit in the "dddd" section gives the + * UUID type. We set it to zero so that we won't collide with + * any "real" UUIDs. + */ + strbuf_addf(uuid, "%04d%02d%02d-%02d%02d-00%02d-%04x-%08x%04x", + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, + tm.tm_sec, + (unsigned)(getpid() & 0xffff), + (unsigned)(tv.tv_usec & 0xffffffff), + (seq++ & 0xffff)); +} + +/* + * Send a chunk of data to the client using HTTP chunked + * transfer coding rules. + * + * https://tools.ietf.org/html/rfc7230#section-4.1 + */ +static enum worker_result send_chunk(int fd, const unsigned char *buf, + size_t len_buf) +{ + char chunk_size[100]; + int chunk_size_len = xsnprintf(chunk_size, sizeof(chunk_size), + "%x\r\n", (unsigned int)len_buf); + + if ((write_in_full(fd, chunk_size, chunk_size_len) < 0) || + (write_in_full(fd, buf, len_buf) < 0) || + (write_in_full(fd, "\r\n", 2) < 0)) { + logerror("unable to send chunk"); + return WR_IO_ERROR; + } + + return WR_OK; +} + +static enum worker_result send_final_chunk(int fd) +{ + if (write_in_full(fd, "0\r\n\r\n", 5) < 0) { + logerror("unable to send final chunk"); + return WR_IO_ERROR; + } + + return WR_OK; +} + +static enum worker_result send_http_error( + int fd, + int http_code, const char *http_code_name, + int retry_after_seconds, enum worker_result wr_in) +{ + struct strbuf response_header = STRBUF_INIT; + struct strbuf response_content = STRBUF_INIT; + struct strbuf uuid = STRBUF_INIT; + enum worker_result wr; + + strbuf_addf(&response_content, "Error: %d %s\r\n", + http_code, http_code_name); + if (retry_after_seconds > 0) + strbuf_addf(&response_content, "Retry-After: %d\r\n", + retry_after_seconds); + + strbuf_addf (&response_header, "HTTP/1.1 %d %s\r\n", http_code, http_code_name); + strbuf_addstr(&response_header, "Cache-Control: private\r\n"); + strbuf_addstr(&response_header, "Content-Type: text/plain\r\n"); + strbuf_addf (&response_header, "Content-Length: %d\r\n", (int)response_content.len); + if (retry_after_seconds > 0) + strbuf_addf (&response_header, "Retry-After: %d\r\n", retry_after_seconds); + strbuf_addf( &response_header, "Server: test-gvfs-protocol/%s\r\n", git_version_string); + strbuf_addf( &response_header, "Date: %s\r\n", show_date(time(NULL), 0, DATE_MODE(RFC2822))); + gen_fake_uuid(&uuid); + strbuf_addf( &response_header, "X-VSS-E2EID: %s\r\n", uuid.buf); + strbuf_addstr(&response_header, "\r\n"); + + if (write_in_full(fd, response_header.buf, response_header.len) < 0) { + logerror("unable to write response header"); + wr = WR_IO_ERROR; + goto done; + } + + if (write_in_full(fd, response_content.buf, response_content.len) < 0) { + logerror("unable to write response content body"); + wr = WR_IO_ERROR; + goto done; + } + + wr = wr_in; + +done: + strbuf_release(&uuid); + strbuf_release(&response_header); + strbuf_release(&response_content); + + return wr; +} + +/* + * Return 1 if we send an AUTH error to the client. + */ +static int mayhem_try_auth(struct req *req, enum worker_result *wr_out) +{ + *wr_out = WR_OK; + + if (string_list_has_string(&mayhem_list, "http_401")) { + struct string_list_item *item; + int has_auth = 0; + for_each_string_list_item(item, &req->header_list) { + if (starts_with(item->string, "Authorization: Basic")) { + has_auth = 1; + break; + } + } + if (!has_auth) { + if (strstr(req->uri_base.buf, MY_SERVER_TYPE__ORIGIN)) { + logmayhem("http_401 (origin)"); + *wr_out = send_http_error(1, 401, "Unauthorized", -1, + WR_MAYHEM); + return 1; + } + + else if (strstr(req->uri_base.buf, MY_SERVER_TYPE__CACHE)) { + /* + * Cache servers use a non-standard 400 rather than a 401. + */ + logmayhem("http_400 (cacheserver)"); + *wr_out = send_http_error(1, 400, "Bad Request", -1, + WR_MAYHEM); + return 1; + } + + else { + /* + * Non-qualified server type. + */ + logmayhem("http_401"); + *wr_out = send_http_error(1, 401, "Unauthorized", -1, + WR_MAYHEM); + return 1; + } + } + } + + return 0; +} + +/* + * Build fake gvfs/config data using our IP address and port. + * + * The Min/Max data is just random noise copied from the example + * in the documentation. + */ +static void build_gvfs_config_json(struct json_writer *jw, + struct string_list *listen_addr, + int listen_port) +{ + jw_object_begin(jw, 0); + { + jw_object_inline_begin_array(jw, "AllowedGvfsClientVersions"); + { + jw_array_inline_begin_object(jw); + { + jw_object_inline_begin_object(jw, "Max"); + { + jw_object_intmax(jw, "Major", 0); + jw_object_intmax(jw, "Minor", 4); + jw_object_intmax(jw, "Build", 0); + jw_object_intmax(jw, "Revision", 0); + } + jw_end(jw); + + jw_object_inline_begin_object(jw, "Min"); + { + jw_object_intmax(jw, "Major", 0); + jw_object_intmax(jw, "Minor", 2); + jw_object_intmax(jw, "Build", 0); + jw_object_intmax(jw, "Revision", 0); + } + jw_end(jw); + } + jw_end(jw); + + jw_array_inline_begin_object(jw); + { + jw_object_null(jw, "Max"); + jw_object_inline_begin_object(jw, "Min"); + { + jw_object_intmax(jw, "Major", 0); + jw_object_intmax(jw, "Minor", 5); + jw_object_intmax(jw, "Build", 16326); + jw_object_intmax(jw, "Revision", 1); + } + jw_end(jw); + } + jw_end(jw); + } + jw_end(jw); + + jw_object_inline_begin_array(jw, "CacheServers"); + { + struct string_list_item *item; + int k = 0; + + for_each_string_list_item(item, listen_addr) { + jw_array_inline_begin_object(jw); + { + struct strbuf buf = STRBUF_INIT; + + strbuf_addf(&buf, "http://%s:%d/%s", + item->string, + listen_port, + MY_SERVER_TYPE__CACHE); + jw_object_string(jw, "Url", buf.buf); + strbuf_release(&buf); + + strbuf_addf(&buf, "cs%02d", k); + jw_object_string(jw, "Name", buf.buf); + strbuf_release(&buf); + + jw_object_bool(jw, "GlobalDefault", + k++ == 0); + } + jw_end(jw); + } + } + jw_end(jw); + } + jw_end(jw); +} +/* + * Per the GVFS Protocol, this should only be recognized on the origin + * server (not the cache-server). It returns a JSON payload of config + * data. + */ +static enum worker_result do__gvfs_config__get(struct req *req) +{ + struct strbuf response_header = STRBUF_INIT; + struct strbuf uuid = STRBUF_INIT; + enum worker_result wr; + + if (strstr(req->uri_base.buf, MY_SERVER_TYPE__CACHE)) + return send_http_error(1, 404, "Not Found", -1, WR_OK); + + strbuf_addstr(&response_header, "HTTP/1.1 200 OK\r\n"); + strbuf_addstr(&response_header, "Cache-Control: private\r\n"); + strbuf_addstr(&response_header, "Content-Type: text/plain\r\n"); + strbuf_addf( &response_header, "Content-Length: %d\r\n", (int)jw_config.json.len); + strbuf_addf( &response_header, "Server: test-gvfs-protocol/%s\r\n", git_version_string); + strbuf_addf( &response_header, "Date: %s\r\n", show_date(time(NULL), 0, DATE_MODE(RFC2822))); + gen_fake_uuid(&uuid); + strbuf_addf( &response_header, "X-VSS-E2EID: %s\r\n", uuid.buf); + strbuf_addstr(&response_header, "\r\n"); + + if (write_in_full(1, response_header.buf, response_header.len) < 0) { + logerror("unable to write response header"); + wr = WR_IO_ERROR; + goto done; + } + + if (write_in_full(1, jw_config.json.buf, jw_config.json.len) < 0) { + logerror("unable to write response content body"); + wr = WR_IO_ERROR; + goto done; + } + + wr = WR_OK; + +done: + strbuf_release(&uuid); + strbuf_release(&response_header); + + return wr; +} + +/* + * Send the contents of the in-memory inflated object in "compressed + * loose object" format over the socket. + * + * Because we are using keep-alive and are streaming the compressed + * chunks as we produce them, we set the transport-encoding and not + * the content-length. + * + * Our usage here is different from `git-http-backend` because it will + * only send a loose object if it exists as a loose object in the ODB + * (see the "/objects/[0-9a-f]{2}/[0-9a-f]{38}$" regex_t declarations) + * by doing a file-copy. + * + * We want to send an arbitrary object without regard for how it is + * currently stored in the local ODB. + * + * Also, we don't want any of the type-specific branching found in the + * sha1-file.c functions (such as special casing BLOBs). Specifically, + * we DO NOT want any of the content conversion filters. We just want + * to send the raw content as is. + * + * So, we steal freely from sha1-file.c routines: + * write_object_file_prepare() + * write_loose_object() + */ +static enum worker_result send_loose_object(const struct object_id *oid, + int fd) +{ +#define MAX_HEADER_LEN 32 + struct strbuf response_header = STRBUF_INIT; + struct strbuf uuid = STRBUF_INIT; + char object_header[MAX_HEADER_LEN]; + unsigned char compressed[4096]; + git_zstream stream; + struct object_id oid_check; + git_hash_ctx c; + int object_header_len; + int ret; + unsigned flags = 0; + void *content; + unsigned long size; + enum object_type type; + struct object_info oi = OBJECT_INFO_INIT; + int mayhem__corrupt_loose = string_list_has_string(&mayhem_list, + "corrupt_loose"); + + /* + * Since `test-gvfs-protocol` is mocking a real GVFS server (cache or + * main), we don't want a request for a missing object to cause the + * implicit dynamic fetch mechanism to try to fault-it-in (and cause + * our call to oid_object_info_extended() to launch another instance + * of `gvfs-helper` to magically fetch it (which would connect to a + * new instance of `test-gvfs-protocol`)). + * + * Rather, we want a missing object to fail, so we can respond with + * a 404, for example. + */ + flags |= OBJECT_INFO_FOR_PREFETCH; + flags |= OBJECT_INFO_LOOKUP_REPLACE; + + oi.typep = &type; + oi.sizep = &size; + oi.contentp = &content; + + if (oid_object_info_extended(the_repository, oid, &oi, flags)) { + logerror("Could not find OID: '%s'", oid_to_hex(oid)); + return send_http_error(1, 404, "Not Found", -1, WR_OK); + } + + if (string_list_has_string(&mayhem_list, "http_404")) { + logmayhem("http_404"); + return send_http_error(1, 404, "Not Found", -1, WR_MAYHEM); + } + + /* + * We are blending several somewhat independent concepts here: + * + * [1] reconstructing the object format in parts: + * + * ::= + * + * [1a] ::= SP NUL + * [1b] ::= + * + * [2] verify that we constructed [1] correctly by computing + * the hash of [1] and verify it matches the passed OID. + * + * [3] compress [1] because that is how loose objects are + * stored on disk. We compress it as we stream it to + * the client. + * + * [4] send HTTP response headers to the client. + * + * [5] stream each chunk from [3] to the client using the HTTP + * chunked transfer coding. + * + * [6] for extra credit, we repeat the hash construction in [2] + * as we stream it. + */ + + /* [4] */ + strbuf_addstr(&response_header, "HTTP/1.1 200 OK\r\n"); + strbuf_addstr(&response_header, "Cache-Control: private\r\n"); + strbuf_addstr(&response_header, "Content-Type: application/x-git-loose-object\r\n"); + strbuf_addf( &response_header, "Server: test-gvfs-protocol/%s\r\n", git_version_string); + strbuf_addstr(&response_header, "Transfer-Encoding: chunked\r\n"); + strbuf_addf( &response_header, "Date: %s\r\n", show_date(time(NULL), 0, DATE_MODE(RFC2822))); + gen_fake_uuid(&uuid); + strbuf_addf( &response_header, "X-VSS-E2EID: %s\r\n", uuid.buf); + strbuf_addstr(&response_header, "\r\n"); + + if (write_in_full(fd, response_header.buf, response_header.len) < 0) { + logerror("unable to write response header"); + return WR_IO_ERROR; + } + + strbuf_release(&uuid); + strbuf_release(&response_header); + + if (string_list_has_string(&mayhem_list, "close_write")) { + logmayhem("close_write"); + return WR_MAYHEM | WR_HANGUP; + } + + /* [1a] */ + object_header_len = 1 + xsnprintf(object_header, MAX_HEADER_LEN, + "%s %"PRIuMAX, + type_name(*oi.typep), + (uintmax_t)*oi.sizep); + + /* [2] */ + the_hash_algo->init_fn(&c); + the_hash_algo->update_fn(&c, object_header, object_header_len); + the_hash_algo->update_fn(&c, *oi.contentp, *oi.sizep); + the_hash_algo->final_fn(oid_check.hash, &c); + if (!oideq(oid, &oid_check)) + BUG("send_loose_object[2]: invalid construction '%s' '%s'", + oid_to_hex(oid), oid_to_hex(&oid_check)); + + /* [3, 6] */ + git_deflate_init(&stream, zlib_compression_level); + stream.next_out = compressed; + stream.avail_out = sizeof(compressed); + the_hash_algo->init_fn(&c); + + /* [3, 1a, 6] */ + stream.next_in = (unsigned char *)object_header; + stream.avail_in = object_header_len; + while (git_deflate(&stream, 0) == Z_OK) + ; /* nothing */ + the_hash_algo->update_fn(&c, object_header, object_header_len); + + /* [3, 1b, 5, 6] */ + stream.next_in = *oi.contentp; + stream.avail_in = *oi.sizep; + do { + enum worker_result wr; + unsigned char *in0 = stream.next_in; + + /* + * Corrupt a byte in the buffer we compress, but undo it + * before we compute the SHA on the portion of the raw + * buffer included in the chunk we compressed. + */ + if (mayhem__corrupt_loose) { + logmayhem("corrupt_loose"); + *in0 = *in0 ^ 0xff; + } + + ret = git_deflate(&stream, Z_FINISH); + + if (mayhem__corrupt_loose) + *in0 = *in0 ^ 0xff; + + the_hash_algo->update_fn(&c, in0, stream.next_in - in0); + + /* [5] */ + wr = send_chunk(fd, compressed, stream.next_out - compressed); + if (wr & WR_STOP_THE_MUSIC) + return wr; + + stream.next_out = compressed; + stream.avail_out = sizeof(compressed); + + } while (ret == Z_OK); + + /* [3] */ + if (ret != Z_STREAM_END) + BUG("unable to deflate object '%s' (%d)", oid_to_hex(oid), ret); + ret = git_deflate_end_gently(&stream); + if (ret != Z_OK) + BUG("deflateEnd on object '%s' failed (%d)", oid_to_hex(oid), ret); + + /* [6] */ + the_hash_algo->final_fn(oid_check.hash, &c); + if (!oideq(oid, &oid_check)) + BUG("send_loose_object[6]: invalid construction '%s' '%s'", + oid_to_hex(oid), oid_to_hex(&oid_check)); + + /* [5] */ + return send_final_chunk(fd); +} + +/* + * Per the GVFS Protocol, a single OID should be in the slash-arg: + * + * GET /gvfs/objects/fc3fff3a25559d2d30d1719c4f4a6d9fe7e05170 HTTP/1.1 + * + * Look it up in our repo (loose or packed) and send it to gvfs-helper + * over the socket as a loose object. + */ +static enum worker_result do__gvfs_objects__get(struct req *req) +{ + struct object_id oid; + + if (!req->slash_args.len || + get_oid_hex(req->slash_args.buf, &oid)) { + logerror("invalid OID in GET gvfs/objects: '%s'", + req->slash_args.buf); + return WR_IO_ERROR; + } + + trace2_printf("%s: GET %s", TR2_CAT, oid_to_hex(&oid)); + + return send_loose_object(&oid, 1); +} + +static enum worker_result read_json_post_body( + struct req *req, + struct oidset *oids, + int *nr_oids) +{ + struct object_id oid; + struct string_list_item *item; + char *post_body = NULL; + const char *v; + ssize_t len_expected = 0; + ssize_t len_received; + const char *pkey; + const char *plbracket; + const char *pstart; + const char *pend; + + for_each_string_list_item(item, &req->header_list) { + if (skip_prefix(item->string, "Content-Length: ", &v)) { + char *p; + len_expected = strtol(v, &p, 10); + break; + } + } + if (!len_expected) { + logerror("no content length in POST"); + return WR_IO_ERROR; + } + post_body = xcalloc(1, len_expected + 1); + if (!post_body) { + logerror("could not malloc buffer for POST body"); + return WR_IO_ERROR; + } + len_received = read_in_full(0, post_body, len_expected); + if (len_received != len_expected) { + logerror("short read in POST (expected %d, received %d)", + (int)len_expected, (int)len_received); + return WR_IO_ERROR; + } + + /* + * A very primitive JSON parser for a very fixed and well-known + * message format. Please don't judge me. + * + * We expect: + * + * ..."objectIds":["","",...""]... + * + * We expect compact (non-pretty) JSON, but do allow it. + */ + pkey = strstr(post_body, "\"objectIds\""); + if (!pkey) + goto could_not_parse_json; + plbracket = strchr(pkey, '['); + if (!plbracket) + goto could_not_parse_json; + pstart = plbracket + 1; + + while (1) { + /* Eat leading whitespace before opening DQUOTE */ + while (*pstart && isspace(*pstart)) + pstart++; + if (!*pstart) + goto could_not_parse_json; + pstart++; + + /* find trailing DQUOTE */ + pend = strchr(pstart, '"'); + if (!pend) + goto could_not_parse_json; + + if (get_oid_hex(pstart, &oid)) + goto could_not_parse_json; + if (!oidset_insert(oids, &oid)) + *nr_oids += 1; + trace2_printf("%s: POST %s", TR2_CAT, oid_to_hex(&oid)); + + /* Eat trailing whitespace after trailing DQUOTE */ + pend++; + while (*pend && isspace(*pend)) + pend++; + if (!*pend) + goto could_not_parse_json; + + /* End of list or is there another OID */ + if (*pend == ']') + break; + if (*pend != ',') + goto could_not_parse_json; + + pstart = pend + 1; + } + + /* + * We do not care about the "commitDepth" parameter. + */ + + free(post_body); + return WR_OK; + +could_not_parse_json: + logerror("could not parse JSON in POST body"); + free(post_body); + return WR_IO_ERROR; +} + +/* + * Since this is a test helper, I'm going to be lazy and + * run pack-objects as a background child using pipe_command + * and get the resulting packfile into a buffer. And then + * the caller can pump it to the client over the socket. + * + * This avoids the need to set up a custom loop (like in + * upload-pack) to drive it and/or the use of a bunch of + * tempfiles. + * + * My assumption here is that we're not testing with GBs + * of data.... + */ +static enum worker_result get_packfile_from_oids( + struct oidset *oids, + struct strbuf *buf_packfile) +{ + struct child_process pack_objects = CHILD_PROCESS_INIT; + struct strbuf buf_child_stdin = STRBUF_INIT; + struct strbuf buf_child_stderr = STRBUF_INIT; + struct oidset_iter iter; + struct object_id *oid; + enum worker_result wr; + int result; + + strvec_push(&pack_objects.args, "git"); + strvec_push(&pack_objects.args, "pack-objects"); + strvec_push(&pack_objects.args, "-q"); + strvec_push(&pack_objects.args, "--revs"); + strvec_push(&pack_objects.args, "--delta-base-offset"); + strvec_push(&pack_objects.args, "--window=0"); + strvec_push(&pack_objects.args, "--depth=4095"); + strvec_push(&pack_objects.args, "--compression=1"); + strvec_push(&pack_objects.args, "--stdout"); + + pack_objects.in = -1; + pack_objects.out = -1; + pack_objects.err = -1; + + oidset_iter_init(oids, &iter); + while ((oid = oidset_iter_next(&iter))) + strbuf_addf(&buf_child_stdin, "%s\n", oid_to_hex(oid)); + strbuf_addstr(&buf_child_stdin, "\n"); + + result = pipe_command(&pack_objects, + buf_child_stdin.buf, buf_child_stdin.len, + buf_packfile, 0, + &buf_child_stderr, 0); + if (result) { + logerror("pack-objects failed: %s", buf_child_stderr.buf); + wr = WR_IO_ERROR; + goto done; + } + + wr = WR_OK; + +done: + strbuf_release(&buf_child_stdin); + strbuf_release(&buf_child_stderr); + + return wr; +} + +static enum worker_result send_packfile_from_buffer(const struct strbuf *packfile) +{ + struct strbuf response_header = STRBUF_INIT; + struct strbuf uuid = STRBUF_INIT; + enum worker_result wr; + + strbuf_addstr(&response_header, "HTTP/1.1 200 OK\r\n"); + strbuf_addstr(&response_header, "Cache-Control: private\r\n"); + strbuf_addstr(&response_header, "Content-Type: application/x-git-packfile\r\n"); + strbuf_addf( &response_header, "Content-Length: %d\r\n", (int)packfile->len); + strbuf_addf( &response_header, "Server: test-gvfs-protocol/%s\r\n", git_version_string); + strbuf_addf( &response_header, "Date: %s\r\n", show_date(time(NULL), 0, DATE_MODE(RFC2822))); + gen_fake_uuid(&uuid); + strbuf_addf( &response_header, "X-VSS-E2EID: %s\r\n", uuid.buf); + strbuf_addstr(&response_header, "\r\n"); + + if (write_in_full(1, response_header.buf, response_header.len) < 0) { + logerror("unable to write response header"); + wr = WR_IO_ERROR; + goto done; + } + + if (write_in_full(1, packfile->buf, packfile->len) < 0) { + logerror("unable to write response content body"); + wr = WR_IO_ERROR; + goto done; + } + + wr = WR_OK; + +done: + strbuf_release(&uuid); + strbuf_release(&response_header); + + return wr; +} + +/* + * The GVFS Protocol POST verb behaves like GET for non-commit objects + * (in that it just returns the requested object), but for commit + * objects POST *also* returns all trees referenced by the commit. + * + * The goal of this test is to confirm that: + * [] `gvfs-helper post` can request and receive a packfile at all. + * [] `gvfs-helper post` can handle getting either a packfile or a + * loose object. + * + * Therefore, I'm not going to blur the issue and support the custom + * semantics for commit objects. + * + * If one of the OIDs is a commit, `git pack-objects` will completely + * walk the trees and blobs for it and we get that for free. This is + * good enough for our testing. + * + * TODO A proper solution would separate the commit objects and do a + * TODO `rev-list --filter=blobs:none` for them (or use the internal + * TODO list-objects API) and a regular enumeration for the non-commit + * TODO objects. And build an new oidset with union of those and then + * TODO call pack-objects on it instead. + * TODO + * TODO But that's too much trouble for now. + * + * For now, we just need to know if the post asks for a single object, + * is it a commit or non-commit. That is sufficient to know whether + * we should send a packfile or loose object. +*/ +static enum worker_result classify_oids_in_post( + struct oidset *oids, int nr_oids, int *need_packfile) +{ + struct oidset_iter iter; + struct object_id *oid; + enum object_type type; + struct object_info oi = OBJECT_INFO_INIT; + unsigned flags = 0; + + if (nr_oids > 1) { + *need_packfile = 1; + return WR_OK; + } + + /* disable missing-object faulting */ + flags |= OBJECT_INFO_FOR_PREFETCH; + flags |= OBJECT_INFO_LOOKUP_REPLACE; + + oi.typep = &type; + + oidset_iter_init(oids, &iter); + while ((oid = oidset_iter_next(&iter))) { + if (!oid_object_info_extended(the_repository, oid, &oi, flags) && + type == OBJ_COMMIT) { + *need_packfile = 1; + return WR_OK; + } + } + + *need_packfile = 0; + return WR_OK; +} + +static enum worker_result do__gvfs_objects__post(struct req *req) +{ + struct oidset oids = OIDSET_INIT; + struct strbuf packfile = STRBUF_INIT; + enum worker_result wr; + int nr_oids = 0; + int need_packfile = 0; + + wr = read_json_post_body(req, &oids, &nr_oids); + if (wr & WR_STOP_THE_MUSIC) + goto done; + + wr = classify_oids_in_post(&oids, nr_oids, &need_packfile); + if (wr & WR_STOP_THE_MUSIC) + goto done; + + if (!need_packfile) { + struct oidset_iter iter; + struct object_id *oid; + + oidset_iter_init(&oids, &iter); + oid = oidset_iter_next(&iter); + + wr = send_loose_object(oid, 1); + } else { + wr = get_packfile_from_oids(&oids, &packfile); + if (wr & WR_STOP_THE_MUSIC) + goto done; + + wr = send_packfile_from_buffer(&packfile); + } + +done: + oidset_clear(&oids); + strbuf_release(&packfile); + + return wr; +} + +/* + * bswap.h only defines big endian functions. + * The GVFS Protocol defines fields in little endian. + */ +static inline uint64_t my_get_le64(uint64_t le_val) +{ +#if GIT_BYTE_ORDER == GIT_LITTLE_ENDIAN + return le_val; +#else + return default_bswap64(le_val); +#endif +} + +static inline uint16_t my_get_le16(uint16_t le_val) +{ +#if GIT_BYTE_ORDER == GIT_LITTLE_ENDIAN + return le_val; +#else + return default_bswap16(le_val); +#endif +} + +/* + * GVFS Protocol headers for the multipack format + * All integer values are little-endian on the wire. + * + * Note: technically, the protocol defines the `ph` fields as signed, but + * that makes a mess of the bswap routines and we're not going to overflow + * them for a very long time. + */ + +static unsigned char v1_h[6] = { 'G', 'P', 'R', 'E', ' ', 0x01 }; + +struct ph { + uint64_t timestamp; + uint64_t len_pack; + uint64_t len_idx; +}; + +/* + * Accumulate a list of commits-and-trees packfiles we have in the local ODB. + * The test script should have pre-created a set of "ct-.pack" and .idx + * files for us. We serve these as is and DO NOT try to dynamically create + * new commits/trees packfiles (like the cache-server does). We are only + * testing if/whether gvfs-helper.exe can receive one or more packfiles and + * idx files over the protocol. + */ +struct ct_pack_item { + struct ph ph; + struct strbuf path_pack; + struct strbuf path_idx; +}; + +static void ct_pack_item__free(struct ct_pack_item *item) +{ + if (!item) + return; + strbuf_release(&item->path_pack); + strbuf_release(&item->path_idx); + free(item); +} + +struct ct_pack_data { + struct ct_pack_item **items; + size_t nr, alloc; +}; + +static void ct_pack_data__release(struct ct_pack_data *data) +{ + int k; + + if (!data) + return; + + for (k = 0; k < data->nr; k++) + ct_pack_item__free(data->items[k]); + + FREE_AND_NULL(data->items); + data->nr = 0; + data->alloc = 0; +} + +static void cb_ct_pack(const char *full_path, size_t full_path_len, + const char *file_path, void *void_data) +{ + struct ct_pack_data *data = void_data; + struct ct_pack_item *item = NULL; + struct stat st; + const char *v; + + /* + * We only want "ct-.pack" files. The test script creates + * cached commits-and-trees packfiles with this prefix to avoid + * confusion with prefetch packfiles received by gvfs-helper. + */ + if (!ends_with(file_path, ".pack")) + return; + if (!skip_prefix(file_path, "ct-", &v)) + return; + + item = (struct ct_pack_item *)xcalloc(1, sizeof(*item)); + strbuf_init(&item->path_pack, 0); + strbuf_addstr(&item->path_pack, full_path); + + strbuf_init(&item->path_idx, 0); + strbuf_addstr(&item->path_idx, full_path); + strbuf_strip_suffix(&item->path_idx, ".pack"); + strbuf_addstr(&item->path_idx, ".idx"); + + item->ph.timestamp = (uint64_t)strtoul(v, NULL, 10); + + lstat(item->path_pack.buf, &st); + item->ph.len_pack = (uint64_t)st.st_size; + + if (string_list_has_string(&mayhem_list, "no_prefetch_idx")) + item->ph.len_idx = maximum_unsigned_value_of_type(uint64_t); + else if (lstat(item->path_idx.buf, &st) < 0) + item->ph.len_idx = maximum_unsigned_value_of_type(uint64_t); + else + item->ph.len_idx = (uint64_t)st.st_size; + + ALLOC_GROW(data->items, data->nr + 1, data->alloc); + data->items[data->nr++] = item; +} + +/* + * Sort by increasing EPOCH time. + */ +static int ct_pack_sort_compare(const void *_a, const void *_b) +{ + const struct ct_pack_item *a = *(const struct ct_pack_item **)_a; + const struct ct_pack_item *b = *(const struct ct_pack_item **)_b; + return (a->ph.timestamp < b->ph.timestamp) ? -1 : (a->ph.timestamp != b->ph.timestamp); +} + +static enum worker_result send_ct_item(const struct ct_pack_item *item) +{ + struct ph ph_le; + int fd_pack = -1; + int fd_idx = -1; + enum worker_result wr = WR_OK; + + /* send per-packfile header. all fields are little-endian on the wire. */ + ph_le.timestamp = my_get_le64(item->ph.timestamp); + ph_le.len_pack = my_get_le64(item->ph.len_pack); + ph_le.len_idx = my_get_le64(item->ph.len_idx); + + if (write_in_full(1, &ph_le, sizeof(ph_le)) < 0) { + logerror("unable to write ph_le"); + wr = WR_IO_ERROR; + goto done; + } + + trace2_printf("%s: sending prefetch pack '%s'", TR2_CAT, item->path_pack.buf); + + fd_pack = git_open_cloexec(item->path_pack.buf, O_RDONLY); + if (fd_pack == -1 || copy_fd(fd_pack, 1)) { + logerror("could not send packfile"); + wr = WR_IO_ERROR; + goto done; + } + + if (item->ph.len_idx != maximum_unsigned_value_of_type(uint64_t)) { + trace2_printf("%s: sending prefetch idx '%s'", TR2_CAT, item->path_idx.buf); + + fd_idx = git_open_cloexec(item->path_idx.buf, O_RDONLY); + if (fd_idx == -1 || copy_fd(fd_idx, 1)) { + logerror("could not send idx"); + wr = WR_IO_ERROR; + goto done; + } + } + +done: + if (fd_pack != -1) + close(fd_pack); + if (fd_idx != -1) + close(fd_idx); + return wr; +} + +/* + * The GVFS Protocol defines the lastTimeStamp parameter as the value + * of the last prefetch pack that the client has. Therefore, we only + * want to send newer ones. + */ +static int want_ct_pack(const struct ct_pack_item *item, timestamp_t last_timestamp) +{ + return item->ph.timestamp > last_timestamp; +} + +static enum worker_result send_multipack(struct ct_pack_data *data, + timestamp_t last_timestamp) +{ + struct strbuf response_header = STRBUF_INIT; + struct strbuf uuid = STRBUF_INIT; + enum worker_result wr; + size_t content_len = 0; + unsigned short np = 0; + unsigned short np_le; + int k; + + /* + * Precompute the content-length so that we don't have to deal with + * chunking it. + */ + content_len += sizeof(v1_h) + sizeof(np); + for (k = 0; k < data->nr; k++) { + struct ct_pack_item *item = data->items[k]; + + if (!want_ct_pack(item, last_timestamp)) + continue; + + np++; + content_len += sizeof(struct ph); + content_len += item->ph.len_pack; + if (item->ph.len_idx != maximum_unsigned_value_of_type(uint64_t)) + content_len += item->ph.len_idx; + } + + strbuf_addstr(&response_header, "HTTP/1.1 200 OK\r\n"); + strbuf_addstr(&response_header, "Cache-Control: private\r\n"); + strbuf_addstr(&response_header, + "Content-Type: application/x-gvfs-timestamped-packfiles-indexes\r\n"); + strbuf_addf( &response_header, "Content-Length: %d\r\n", (int)content_len); + strbuf_addf( &response_header, "Server: test-gvfs-protocol/%s\r\n", git_version_string); + strbuf_addf( &response_header, "Date: %s\r\n", show_date(time(NULL), 0, DATE_MODE(RFC2822))); + gen_fake_uuid(&uuid); + strbuf_addf( &response_header, "X-VSS-E2EID: %s\r\n", uuid.buf); + strbuf_addstr(&response_header, "\r\n"); + + if (write_in_full(1, response_header.buf, response_header.len) < 0) { + logerror("unable to write response header"); + wr = WR_IO_ERROR; + goto done; + } + + /* send protocol version header */ + if (write_in_full(1, v1_h, sizeof(v1_h)) < 0) { + logerror("unabled to write v1_h"); + wr = WR_IO_ERROR; + goto done; + } + + /* send number of packfiles */ + np_le = my_get_le16(np); + if (write_in_full(1, &np_le, sizeof(np_le)) < 0) { + logerror("unable to write np"); + wr = WR_IO_ERROR; + goto done; + } + + for (k = 0; k < data->nr; k++) { + if (!want_ct_pack(data->items[k], last_timestamp)) + continue; + + wr = send_ct_item(data->items[k]); + if (wr != WR_OK) + goto done; + } + + wr = WR_OK; + +done: + strbuf_release(&uuid); + strbuf_release(&response_header); + + return wr; +} + +static enum worker_result do__gvfs_prefetch__get(struct req *req) +{ + struct ct_pack_data data; + timestamp_t last_timestamp = 0; + enum worker_result wr; + + memset(&data, 0, sizeof(data)); + + if (req->quest_args.len) { + const char *key = strstr(req->quest_args.buf, "lastPackTimestamp="); + if (key) { + const char *val; + if (skip_prefix(key, "lastPackTimestamp=", &val)) { + last_timestamp = strtol(val, NULL, 10); + } + } + } + trace2_printf("%s: prefetch/since %"PRItime, TR2_CAT, last_timestamp); + + for_each_file_in_pack_dir(get_object_directory(), cb_ct_pack, &data); + QSORT(data.items, data.nr, ct_pack_sort_compare); + + wr = send_multipack(&data, last_timestamp); + + ct_pack_data__release(&data); + + return wr; +} + +/* + * Read the HTTP request up to the start of the optional message-body. + * We do this byte-by-byte because we have keep-alive turned on and + * cannot rely on an EOF. + * + * https://tools.ietf.org/html/rfc7230 + * https://github.com/microsoft/VFSForGit/blob/master/Protocol.md + * + * We cannot call die() here because our caller needs to properly + * respond to the client and/or close the socket before this + * child exits so that the client doesn't get a connection reset + * by peer error. + */ +static enum worker_result req__read(struct req *req, int fd) +{ + struct strbuf h = STRBUF_INIT; + int nr_start_line_fields; + const char *uri_target; + const char *http_version; + const char *gvfs; + + /* + * Read line 0 of the request and split it into component parts: + * + * SP SP CRLF + * + */ + if (strbuf_getwholeline_fd(&req->start_line, fd, '\n') == EOF) + return WR_OK | WR_HANGUP; + + if (string_list_has_string(&mayhem_list, "close_read")) { + logmayhem("close_read"); + return WR_MAYHEM | WR_HANGUP; + } + + if (string_list_has_string(&mayhem_list, "close_read_1") && + mayhem_child == 0) { + /* + * Mayhem: fail the first request, but let retries succeed. + */ + logmayhem("close_read_1"); + return WR_MAYHEM | WR_HANGUP; + } + + strbuf_trim_trailing_newline(&req->start_line); + + nr_start_line_fields = string_list_split(&req->start_line_fields, + req->start_line.buf, + ' ', -1); + if (nr_start_line_fields != 3) { + logerror("could not parse request start-line '%s'", + req->start_line.buf); + return WR_IO_ERROR; + } + uri_target = req->start_line_fields.items[1].string; + http_version = req->start_line_fields.items[2].string; + + if (strcmp(http_version, "HTTP/1.1")) { + logerror("unsupported version '%s' (expecting HTTP/1.1)", + http_version); + return WR_IO_ERROR; + } + + /* + * Next, extract the GVFS terms from the . The + * GVFS Protocol defines a REST API containing several GVFS + * commands of the form: + * + * []/gvfs/[/] + * []/gvfs/[?] + * + * For example: + * "GET /gvfs/config HTTP/1.1" + * "GET /gvfs/objects/aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd HTTP/1.1" + * "GET /gvfs/prefetch?lastPackTimestamp=123456789 HTTP/1.1" + * + * "GET //gvfs/config HTTP/1.1" + * "GET //gvfs/objects/aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd HTTP/1.1" + * "GET //gvfs/prefetch?lastPackTimestamp=123456789 HTTP/1.1" + * + * "POST //gvfs/objects HTTP/1.1" + * + * For other testing later, we also allow non-gvfs URLs of the form: + * "GET /[?] HTTP/1.1" + * + * We do not attempt to split the query-params within the args. + * The caller can do that if they need to. + */ + gvfs = strstr(uri_target, "/gvfs/"); + if (gvfs) { + strbuf_add(&req->uri_base, uri_target, (gvfs - uri_target)); + strbuf_trim_trailing_dir_sep(&req->uri_base); + + gvfs += 6; /* skip "/gvfs/" */ + strbuf_add(&req->gvfs_api, "gvfs/", 5); + while (*gvfs && *gvfs != '/' && *gvfs != '?') + strbuf_addch(&req->gvfs_api, *gvfs++); + + /* + */ + if (*gvfs == '/') + strbuf_addstr(&req->slash_args, gvfs + 1); + else if (*gvfs == '?') + strbuf_addstr(&req->quest_args, gvfs + 1); + } else { + + const char *quest = strchr(uri_target, '?'); + + if (quest) { + strbuf_add(&req->uri_base, uri_target, (quest - uri_target)); + strbuf_trim_trailing_dir_sep(&req->uri_base); + strbuf_addstr(&req->quest_args, quest + 1); + } else { + strbuf_addstr(&req->uri_base, uri_target); + strbuf_trim_trailing_dir_sep(&req->uri_base); + } + } + + /* + * Read the set of HTTP headers into a string-list. + */ + while (1) { + if (strbuf_getwholeline_fd(&h, fd, '\n') == EOF) + goto done; + strbuf_trim_trailing_newline(&h); + + if (!h.len) + goto done; /* a blank line ends the header */ + + string_list_append(&req->header_list, + strbuf_detach(&h, NULL)); + } + + /* + * TODO If the set of HTTP headers includes things like: + * TODO + * TODO Connection: Upgrade, HTTP2-Settings + * TODO Upgrade: h2c + * TODO HTTP2-Settings: AAMAAABkAARAAAAAAAIAAAAA + * TODO + * TODO then the client is asking to optionally switch to HTTP/2. + * TODO + * TODO We currently DO NOT support that (and I don't currently + * TODO see a need to do so (because we don't need the multiplexed + * TODO streams feature (because the client never asks for n packfiles + * TODO at the same time))). + * TODO + * TODO https://en.wikipedia.org/wiki/HTTP/1.1_Upgrade_header + */ + + /* + * We do not attempt to read the , if it exists. + * We let our caller read/chunk it in as appropriate. + */ +done: + +#if 0 + /* + * This is useful for debugging the request, but very noisy. + */ + if (trace2_is_enabled()) { + struct string_list_item *item; + trace2_printf("%s: %s", TR2_CAT, req->start_line.buf); + for_each_string_list_item(item, &req->start_line_fields) + trace2_printf("%s: Field: %s", TR2_CAT, item->string); + trace2_printf("%s: [uri-base '%s'][gvfs '%s'][args '%s' '%s']", + TR2_CAT, + req->uri_base.buf, + req->gvfs_api.buf, + req->slash_args.buf, + req->quest_args.buf); + for_each_string_list_item(item, &req->header_list) + trace2_printf("%s: Hdrs: %s", TR2_CAT, item->string); + } +#endif + + return WR_OK; +} + +static enum worker_result dispatch(struct req *req) +{ + static regex_t *smart_http_regex; + static int initialized; + const char *method; + enum worker_result wr; + + if (string_list_has_string(&mayhem_list, "close_no_write")) { + logmayhem("close_no_write"); + return WR_MAYHEM | WR_HANGUP; + } + if (string_list_has_string(&mayhem_list, "http_503")) { + logmayhem("http_503"); + return send_http_error(1, 503, "Service Unavailable", 2, + WR_MAYHEM | WR_HANGUP); + } + if (string_list_has_string(&mayhem_list, "http_429")) { + logmayhem("http_429"); + return send_http_error(1, 429, "Too Many Requests", 2, + WR_MAYHEM | WR_HANGUP); + } + if (string_list_has_string(&mayhem_list, "http_429_1") && + mayhem_child == 0) { + logmayhem("http_429_1"); + return send_http_error(1, 429, "Too Many Requests", 2, + WR_MAYHEM | WR_HANGUP); + } + if (mayhem_try_auth(req, &wr)) + return wr; + + method = req->start_line_fields.items[0].string; + + if (!strcmp(req->gvfs_api.buf, "gvfs/objects")) { + + if (!strcmp(method, "GET")) + return do__gvfs_objects__get(req); + if (!strcmp(method, "POST")) + return do__gvfs_objects__post(req); + } + + if (!strcmp(req->gvfs_api.buf, "gvfs/config")) { + + if (!strcmp(method, "GET")) + return do__gvfs_config__get(req); + } + + if (!strcmp(req->gvfs_api.buf, "gvfs/prefetch")) { + + if (!strcmp(method, "GET")) + return do__gvfs_prefetch__get(req); + } + + if (!initialized) { + smart_http_regex = xmalloc(sizeof(*smart_http_regex)); + if (regcomp(smart_http_regex, "^/(HEAD|info/refs|" + "objects/info/[^/]+|git-(upload|receive)-pack)$", + REG_EXTENDED)) { + warning("could not compile smart HTTP regex"); + smart_http_regex = NULL; + } + initialized = 1; + } + + if (smart_http_regex && + !regexec(smart_http_regex, req->uri_base.buf, 0, NULL, 0)) { + const char *ok = "HTTP/1.1 200 OK\r\n"; + struct child_process cp = CHILD_PROCESS_INIT; + int i, res; + + if (write(1, ok, strlen(ok)) < 0) + return error(_("could not send '%s'"), ok); + + strvec_pushf(&cp.env, "REQUEST_METHOD=%s", method); + strvec_pushf(&cp.env, "PATH_TRANSLATED=%s", + req->uri_base.buf); + /* Prevent MSYS2 from "converting to a Windows path" */ + strvec_pushf(&cp.env, + "MSYS2_ENV_CONV_EXCL=PATH_TRANSLATED"); + strvec_push(&cp.env, "SERVER_PROTOCOL=HTTP/1.1"); + if (req->quest_args.len) + strvec_pushf(&cp.env, "QUERY_STRING=%s", + req->quest_args.buf); + for (i = 0; i < req->header_list.nr; i++) { + const char *header = req->header_list.items[i].string; + if (!strncasecmp("Content-Type: ", header, 14)) + strvec_pushf(&cp.env, "CONTENT_TYPE=%s", + header + 14); + else if (!strncasecmp("Content-Length: ", header, 16)) + strvec_pushf(&cp.env, "CONTENT_LENGTH=%s", + header + 16); + } + cp.git_cmd = 1; + strvec_push(&cp.args, "http-backend"); + res = run_command(&cp); + close(1); + close(0); + return !!res; + } + + return send_http_error(1, 501, "Not Implemented", -1, + WR_OK | WR_HANGUP); +} + +static enum worker_result worker(void) +{ + struct req req = REQ__INIT; + char *client_addr = getenv("REMOTE_ADDR"); + char *client_port = getenv("REMOTE_PORT"); + enum worker_result wr = WR_OK; + + if (client_addr) + loginfo("Connection from %s:%s", client_addr, client_port); + + set_keep_alive(0); + + while (1) { + req__release(&req); + + alarm(init_timeout ? init_timeout : timeout); + wr = req__read(&req, 0); + alarm(0); + + if (wr & WR_STOP_THE_MUSIC) + break; + + wr = dispatch(&req); + if (wr & WR_STOP_THE_MUSIC) + break; + } + + close(0); + close(1); + + return !!(wr & WR_IO_ERROR); +} + +////////////////////////////////////////////////////////////////// +// This section contains the listener and child-process management +// code used by the primary instance to accept incoming connections +// and dispatch them to async child process "worker" instances. +////////////////////////////////////////////////////////////////// + +static int addrcmp(const struct sockaddr_storage *s1, + const struct sockaddr_storage *s2) +{ + const struct sockaddr *sa1 = (const struct sockaddr*) s1; + const struct sockaddr *sa2 = (const struct sockaddr*) s2; + + if (sa1->sa_family != sa2->sa_family) + return sa1->sa_family - sa2->sa_family; + if (sa1->sa_family == AF_INET) + return memcmp(&((struct sockaddr_in *)s1)->sin_addr, + &((struct sockaddr_in *)s2)->sin_addr, + sizeof(struct in_addr)); +#ifndef NO_IPV6 + if (sa1->sa_family == AF_INET6) + return memcmp(&((struct sockaddr_in6 *)s1)->sin6_addr, + &((struct sockaddr_in6 *)s2)->sin6_addr, + sizeof(struct in6_addr)); +#endif + return 0; +} + +static int max_connections = 32; + +static unsigned int live_children; + +static struct child { + struct child *next; + struct child_process cld; + struct sockaddr_storage address; +} *firstborn; + +static void add_child(struct child_process *cld, struct sockaddr *addr, socklen_t addrlen) +{ + struct child *newborn, **cradle; + + newborn = xcalloc(1, sizeof(*newborn)); + live_children++; + memcpy(&newborn->cld, cld, sizeof(*cld)); + memcpy(&newborn->address, addr, addrlen); + for (cradle = &firstborn; *cradle; cradle = &(*cradle)->next) + if (!addrcmp(&(*cradle)->address, &newborn->address)) + break; + newborn->next = *cradle; + *cradle = newborn; +} + +/* + * This gets called if the number of connections grows + * past "max_connections". + * + * We kill the newest connection from a duplicate IP. + */ +static void kill_some_child(void) +{ + const struct child *blanket, *next; + + if (!(blanket = firstborn)) + return; + + for (; (next = blanket->next); blanket = next) + if (!addrcmp(&blanket->address, &next->address)) { + kill(blanket->cld.pid, SIGTERM); + break; + } +} + +static void check_dead_children(void) +{ + int status; + pid_t pid; + + struct child **cradle, *blanket; + for (cradle = &firstborn; (blanket = *cradle);) + if ((pid = waitpid(blanket->cld.pid, &status, WNOHANG)) > 1) { + const char *dead = ""; + if (status) + dead = " (with error)"; + loginfo("[%"PRIuMAX"] Disconnected%s", (uintmax_t)pid, dead); + + /* remove the child */ + *cradle = blanket->next; + live_children--; + child_process_clear(&blanket->cld); + free(blanket); + } else + cradle = &blanket->next; +} + +static struct strvec cld_argv = STRVEC_INIT; +static void handle(int incoming, struct sockaddr *addr, socklen_t addrlen) +{ + struct child_process cld = CHILD_PROCESS_INIT; + + if (max_connections && live_children >= max_connections) { + kill_some_child(); + sleep(1); /* give it some time to die */ + check_dead_children(); + if (live_children >= max_connections) { + close(incoming); + logerror("Too many children, dropping connection"); + return; + } + } + + if (addr->sa_family == AF_INET) { + char buf[128] = ""; + struct sockaddr_in *sin_addr = (void *) addr; + inet_ntop(addr->sa_family, &sin_addr->sin_addr, buf, sizeof(buf)); + strvec_pushf(&cld.env, "REMOTE_ADDR=%s", buf); + strvec_pushf(&cld.env, "REMOTE_PORT=%d", + ntohs(sin_addr->sin_port)); +#ifndef NO_IPV6 + } else if (addr->sa_family == AF_INET6) { + char buf[128] = ""; + struct sockaddr_in6 *sin6_addr = (void *) addr; + inet_ntop(AF_INET6, &sin6_addr->sin6_addr, buf, sizeof(buf)); + strvec_pushf(&cld.env, "REMOTE_ADDR=[%s]", buf); + strvec_pushf(&cld.env, "REMOTE_PORT=%d", + ntohs(sin6_addr->sin6_port)); +#endif + } + + if (mayhem_list.nr) { + strvec_pushf(&cld.env, "MAYHEM_CHILD=%d", + mayhem_child++); + } + + strvec_pushv(&cld.args, cld_argv.v); + cld.in = incoming; + cld.out = dup(incoming); + + if (cld.out < 0) + logerror("could not dup() `incoming`"); + else if (start_command(&cld)) + logerror("unable to fork"); + else + add_child(&cld, addr, addrlen); +} + +static void child_handler(int signo) +{ + /* + * Otherwise empty handler because systemcalls will get interrupted + * upon signal receipt + * SysV needs the handler to be rearmed + */ + signal(SIGCHLD, child_handler); +} + +static int set_reuse_addr(int sockfd) +{ + int on = 1; + + if (!reuseaddr) + return 0; + return setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, + &on, sizeof(on)); +} + +struct socketlist { + int *list; + size_t nr; + size_t alloc; +}; + +static const char *ip2str(int family, struct sockaddr *sin, socklen_t len) +{ +#ifdef NO_IPV6 + static char ip[INET_ADDRSTRLEN]; +#else + static char ip[INET6_ADDRSTRLEN]; +#endif + + switch (family) { +#ifndef NO_IPV6 + case AF_INET6: + inet_ntop(family, &((struct sockaddr_in6*)sin)->sin6_addr, ip, len); + break; +#endif + case AF_INET: + inet_ntop(family, &((struct sockaddr_in*)sin)->sin_addr, ip, len); + break; + default: + xsnprintf(ip, sizeof(ip), ""); + } + return ip; +} + +#ifndef NO_IPV6 + +static int setup_named_sock(char *listen_addr, int listen_port, struct socketlist *socklist) +{ + int socknum = 0; + char pbuf[NI_MAXSERV]; + struct addrinfo hints, *ai0, *ai; + int gai; + long flags; + + xsnprintf(pbuf, sizeof(pbuf), "%d", listen_port); + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + hints.ai_flags = AI_PASSIVE; + + gai = getaddrinfo(listen_addr, pbuf, &hints, &ai0); + if (gai) { + logerror("getaddrinfo() for %s failed: %s", listen_addr, gai_strerror(gai)); + return 0; + } + + for (ai = ai0; ai; ai = ai->ai_next) { + int sockfd; + + sockfd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); + if (sockfd < 0) + continue; + if (sockfd >= FD_SETSIZE) { + logerror("Socket descriptor too large"); + close(sockfd); + continue; + } + +#ifdef IPV6_V6ONLY + if (ai->ai_family == AF_INET6) { + int on = 1; + setsockopt(sockfd, IPPROTO_IPV6, IPV6_V6ONLY, + &on, sizeof(on)); + /* Note: error is not fatal */ + } +#endif + + if (set_reuse_addr(sockfd)) { + logerror("Could not set SO_REUSEADDR: %s", strerror(errno)); + close(sockfd); + continue; + } + + set_keep_alive(sockfd); + + if (bind(sockfd, ai->ai_addr, ai->ai_addrlen) < 0) { + logerror("Could not bind to %s: %s", + ip2str(ai->ai_family, ai->ai_addr, ai->ai_addrlen), + strerror(errno)); + close(sockfd); + continue; /* not fatal */ + } + if (listen(sockfd, 5) < 0) { + logerror("Could not listen to %s: %s", + ip2str(ai->ai_family, ai->ai_addr, ai->ai_addrlen), + strerror(errno)); + close(sockfd); + continue; /* not fatal */ + } + + flags = fcntl(sockfd, F_GETFD, 0); + if (flags >= 0) + fcntl(sockfd, F_SETFD, flags | FD_CLOEXEC); + + ALLOC_GROW(socklist->list, socklist->nr + 1, socklist->alloc); + socklist->list[socklist->nr++] = sockfd; + socknum++; + } + + freeaddrinfo(ai0); + + return socknum; +} + +#else /* NO_IPV6 */ + +static int setup_named_sock(char *listen_addr, int listen_port, struct socketlist *socklist) +{ + struct sockaddr_in sin; + int sockfd; + long flags; + + memset(&sin, 0, sizeof sin); + sin.sin_family = AF_INET; + sin.sin_port = htons(listen_port); + + if (listen_addr) { + /* Well, host better be an IP address here. */ + if (inet_pton(AF_INET, listen_addr, &sin.sin_addr.s_addr) <= 0) + return 0; + } else { + sin.sin_addr.s_addr = htonl(INADDR_ANY); + } + + sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (sockfd < 0) + return 0; + + if (set_reuse_addr(sockfd)) { + logerror("Could not set SO_REUSEADDR: %s", strerror(errno)); + close(sockfd); + return 0; + } + + set_keep_alive(sockfd); + + if ( bind(sockfd, (struct sockaddr *)&sin, sizeof sin) < 0 ) { + logerror("Could not bind to %s: %s", + ip2str(AF_INET, (struct sockaddr *)&sin, sizeof(sin)), + strerror(errno)); + close(sockfd); + return 0; + } + + if (listen(sockfd, 5) < 0) { + logerror("Could not listen to %s: %s", + ip2str(AF_INET, (struct sockaddr *)&sin, sizeof(sin)), + strerror(errno)); + close(sockfd); + return 0; + } + + flags = fcntl(sockfd, F_GETFD, 0); + if (flags >= 0) + fcntl(sockfd, F_SETFD, flags | FD_CLOEXEC); + + ALLOC_GROW(socklist->list, socklist->nr + 1, socklist->alloc); + socklist->list[socklist->nr++] = sockfd; + return 1; +} + +#endif + +static void socksetup(struct string_list *listen_addr, int listen_port, struct socketlist *socklist) +{ + if (!listen_addr->nr) + setup_named_sock("127.0.0.1", listen_port, socklist); + else { + int i, socknum; + for (i = 0; i < listen_addr->nr; i++) { + socknum = setup_named_sock(listen_addr->items[i].string, + listen_port, socklist); + + if (socknum == 0) + logerror("unable to allocate any listen sockets for host %s on port %u", + listen_addr->items[i].string, listen_port); + } + } +} + +static int service_loop(struct socketlist *socklist) +{ + struct pollfd *pfd; + int i; + + CALLOC_ARRAY(pfd, socklist->nr); + + for (i = 0; i < socklist->nr; i++) { + pfd[i].fd = socklist->list[i]; + pfd[i].events = POLLIN; + } + + signal(SIGCHLD, child_handler); + + for (;;) { + int i; + int nr_ready; + int timeout = (pid_file ? 100 : -1); + + check_dead_children(); + + nr_ready = poll(pfd, socklist->nr, timeout); + if (nr_ready < 0) { + if (errno != EINTR) { + logerror("Poll failed, resuming: %s", + strerror(errno)); + sleep(1); + } + continue; + } + else if (nr_ready == 0) { + /* + * If we have a pid_file, then we watch it. + * If someone deletes it, we shutdown the service. + * The shell scripts in the test suite will use this. + */ + if (!pid_file || file_exists(pid_file)) + continue; + goto shutdown; + } + + for (i = 0; i < socklist->nr; i++) { + if (pfd[i].revents & POLLIN) { + union { + struct sockaddr sa; + struct sockaddr_in sai; +#ifndef NO_IPV6 + struct sockaddr_in6 sai6; +#endif + } ss; + socklen_t sslen = sizeof(ss); + int incoming = accept(pfd[i].fd, &ss.sa, &sslen); + if (incoming < 0) { + switch (errno) { + case EAGAIN: + case EINTR: + case ECONNABORTED: + continue; + default: + die_errno("accept returned"); + } + } + handle(incoming, &ss.sa, sslen); + } + } + } + +shutdown: + loginfo("Starting graceful shutdown (pid-file gone)"); + for (i = 0; i < socklist->nr; i++) + close(socklist->list[i]); + + return 0; +} + +static int serve(struct string_list *listen_addr, int listen_port) +{ + struct socketlist socklist = { NULL, 0, 0 }; + + socksetup(listen_addr, listen_port, &socklist); + if (socklist.nr == 0) + die("unable to allocate any listen sockets on port %u", + listen_port); + + loginfo("Ready to rumble"); + + /* + * Wait to create the pid-file until we've setup the sockets + * and are open for business. + */ + if (pid_file) + write_file(pid_file, "%"PRIuMAX, (uintmax_t) getpid()); + + return service_loop(&socklist); +} + +////////////////////////////////////////////////////////////////// +// This section is executed by both the primary instance and all +// worker instances. So, yes, each child-process re-parses the +// command line argument and re-discovers how it should behave. +////////////////////////////////////////////////////////////////// + +int cmd_main(int argc, const char **argv) +{ + int listen_port = 0; + struct string_list listen_addr = STRING_LIST_INIT_NODUP; + int worker_mode = 0; + int i; + + trace2_cmd_name("test-gvfs-protocol"); + setup_git_directory_gently(NULL); + + for (i = 1; i < argc; i++) { + const char *arg = argv[i]; + const char *v; + + if (skip_prefix(arg, "--listen=", &v)) { + string_list_append(&listen_addr, xstrdup_tolower(v)); + continue; + } + if (skip_prefix(arg, "--port=", &v)) { + char *end; + unsigned long n; + n = strtoul(v, &end, 0); + if (*v && !*end) { + listen_port = n; + continue; + } + } + if (!strcmp(arg, "--worker")) { + worker_mode = 1; + trace2_cmd_mode("worker"); + continue; + } + if (!strcmp(arg, "--verbose")) { + verbose = 1; + continue; + } + if (skip_prefix(arg, "--timeout=", &v)) { + timeout = atoi(v); + continue; + } + if (skip_prefix(arg, "--init-timeout=", &v)) { + init_timeout = atoi(v); + continue; + } + if (skip_prefix(arg, "--max-connections=", &v)) { + max_connections = atoi(v); + if (max_connections < 0) + max_connections = 0; /* unlimited */ + continue; + } + if (!strcmp(arg, "--reuseaddr")) { + reuseaddr = 1; + continue; + } + if (skip_prefix(arg, "--pid-file=", &v)) { + pid_file = v; + continue; + } + if (skip_prefix(arg, "--mayhem=", &v)) { + string_list_append(&mayhem_list, v); + continue; + } + + usage(test_gvfs_protocol_usage); + } + + /* avoid splitting a message in the middle */ + setvbuf(stderr, NULL, _IOFBF, 4096); + + if (listen_port == 0) + listen_port = DEFAULT_GIT_PORT; + + /* + * If no --listen= args are given, the setup_named_sock() + * code will use receive a NULL address and set INADDR_ANY. + * This exposes both internal and external interfaces on the + * port. + * + * Disallow that and default to the internal-use-only loopback + * address. + */ + if (!listen_addr.nr) + string_list_append(&listen_addr, "127.0.0.1"); + + /* + * worker_mode is set in our own child process instances + * (that are bound to a connected socket from a client). + */ + if (worker_mode) { + if (mayhem_list.nr) { + const char *string = getenv("MAYHEM_CHILD"); + if (string && *string) + mayhem_child = atoi(string); + } + + build_gvfs_config_json(&jw_config, &listen_addr, listen_port); + + return worker(); + } + + /* + * `cld_argv` is a bit of a clever hack. The top-level instance + * of test-gvfs-protocol.exe does the normal bind/listen/accept + * stuff. For each incoming socket, the top-level process spawns + * a child instance of test-gvfs-protocol.exe *WITH* the additional + * `--worker` argument. This causes the child to set `worker_mode` + * and immediately call `worker()` using the connected socket (and + * without the usual need for fork() or threads). + * + * The magic here is made possible because `cld_argv` is static + * and handle() (called by service_loop()) knows about it. + */ + strvec_push(&cld_argv, argv[0]); + strvec_push(&cld_argv, "--worker"); + for (i = 1; i < argc; ++i) + strvec_push(&cld_argv, argv[i]); + + /* + * Setup primary instance to listen for connections. + */ + return serve(&listen_addr, listen_port); +} diff --git a/t/perf/p2000-sparse-operations.sh b/t/perf/p2000-sparse-operations.sh index fce8151d41cbbd..eff9e55712c710 100755 --- a/t/perf/p2000-sparse-operations.sh +++ b/t/perf/p2000-sparse-operations.sh @@ -55,7 +55,7 @@ test_expect_success 'setup repo and indexes' ' git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-v3 && ( cd full-v3 && - git sparse-checkout init --cone && + git sparse-checkout init --cone --no-sparse-index && git sparse-checkout set $SPARSE_CONE && git config index.version 3 && git update-index --index-version=3 && @@ -64,7 +64,7 @@ test_expect_success 'setup repo and indexes' ' git -c core.sparseCheckoutCone=true clone --branch=wide --sparse . full-v4 && ( cd full-v4 && - git sparse-checkout init --cone && + git sparse-checkout init --cone --no-sparse-index && git sparse-checkout set $SPARSE_CONE && git config index.version 4 && git update-index --index-version=4 && diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index 502b4bcf9ea0ad..4e3a6bd043603b 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -1105,6 +1105,11 @@ test_expect_success 'writing this tree with --missing-ok' ' git write-tree --missing-ok ' +test_expect_success 'writing this tree with missing ok config value' ' + git config core.gvfs 4 && + git write-tree +' + ################################################################ test_expect_success 'git read-tree followed by write-tree should be idempotent' ' diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh index 408aeededc4295..98fbc2312e46e6 100755 --- a/t/t0021-conversion.sh +++ b/t/t0021-conversion.sh @@ -335,6 +335,47 @@ test_expect_success "filter: smudge empty file" ' test_cmp expected filtered-empty-in-repo ' +test_expect_success "filter: clean filters blocked when under GVFS" ' + test_config filter.empty-in-repo.clean "cat >/dev/null" && + test_config filter.empty-in-repo.smudge "echo smudged && cat" && + test_config core.gvfs 64 && + + echo dead data walking >empty-in-repo && + test_must_fail git add empty-in-repo +' + +test_expect_success "filter: smudge filters blocked when under GVFS" ' + test_config filter.empty-in-repo.clean "cat >/dev/null" && + test_config filter.empty-in-repo.smudge "echo smudged && cat" && + test_config core.gvfs 64 && + + test_must_fail git checkout && + + # ensure the local core.gvfs setting overwrites the global setting + git config --global core.gvfs false && + test_must_fail git checkout +' + +test_expect_success "ident blocked on add when under GVFS" ' + test_config core.gvfs 64 && + test_config core.autocrlf false && + + echo "*.i ident" >.gitattributes && + echo "\$Id\$" > ident.i && + + test_must_fail git add ident.i +' + +test_expect_success "ident blocked when under GVFS" ' + git add ident.i && + + git commit -m "added ident.i" && + test_config core.gvfs 64 && + rm ident.i && + + test_must_fail git checkout -- ident.i +' + test_expect_success 'disable filter with empty override' ' test_config_global filter.disable.smudge false && test_config_global filter.disable.clean false && diff --git a/t/t0027-auto-crlf.sh b/t/t0027-auto-crlf.sh index a94ac1eae377c0..7b1c10274e3d80 100755 --- a/t/t0027-auto-crlf.sh +++ b/t/t0027-auto-crlf.sh @@ -334,6 +334,18 @@ checkout_files () { " } +test_expect_success 'crlf conversions blocked when under GVFS' ' + git checkout -b gvfs && + test_commit initial && + rm initial.t && + test_config core.gvfs 64 && + test_config core.autocrlf true && + test_must_fail git read-tree --reset -u HEAD && + + git config core.autocrlf false && + git read-tree --reset -u HEAD +' + # Test control characters # NUL SOH CR EOF==^Z test_expect_success 'ls-files --eol -o Text/Binary' ' diff --git a/t/t0400-pre-command-hook.sh b/t/t0400-pre-command-hook.sh new file mode 100755 index 00000000000000..f2a9115e299385 --- /dev/null +++ b/t/t0400-pre-command-hook.sh @@ -0,0 +1,69 @@ +#!/bin/sh + +test_description='pre-command hook' + +. ./test-lib.sh + +test_expect_success 'with no hook' ' + echo "first" > file && + git add file && + git commit -m "first" +' + +test_expect_success 'with succeeding hook' ' + mkdir -p .git/hooks && + write_script .git/hooks/pre-command <<-EOF && + echo "\$*" | sed "s/ --git-pid=[0-9]*//" \ + >\$(git rev-parse --git-dir)/pre-command.out + EOF + echo "second" >> file && + git add file && + test "add file" = "$(cat .git/pre-command.out)" && + echo Hello | git hash-object --stdin && + test "hash-object --stdin" = "$(cat .git/pre-command.out)" +' + +test_expect_success 'with failing hook' ' + write_script .git/hooks/pre-command <<-EOF && + exit 1 + EOF + echo "third" >> file && + test_must_fail git add file && + test_path_is_missing "$(cat .git/pre-command.out)" +' + +test_expect_success 'in a subdirectory' ' + echo touch i-was-here | write_script .git/hooks/pre-command && + mkdir sub && + ( + cd sub && + git version + ) && + test_path_is_file sub/i-was-here +' + +test_expect_success 'in a subdirectory, using an alias' ' + git reset --hard && + echo "echo \"\$@; \$(pwd)\" >>log" | + write_script .git/hooks/pre-command && + mkdir -p sub && + ( + cd sub && + git -c alias.v="version" v + ) && + test_path_is_missing log && + test_line_count = 2 sub/log +' + +test_expect_success 'with core.hooksPath' ' + mkdir -p .git/alternateHooks && + write_script .git/alternateHooks/pre-command <<-EOF && + echo "alternate" >\$(git rev-parse --git-dir)/pre-command.out + EOF + write_script .git/hooks/pre-command <<-EOF && + echo "original" >\$(git rev-parse --git-dir)/pre-command.out + EOF + git -c core.hooksPath=.git/alternateHooks status && + test "alternate" = "$(cat .git/pre-command.out)" +' +test_done diff --git a/t/t0401-post-command-hook.sh b/t/t0401-post-command-hook.sh new file mode 100755 index 00000000000000..fcbfc4a0c79c1e --- /dev/null +++ b/t/t0401-post-command-hook.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +test_description='post-command hook' + +. ./test-lib.sh + +test_expect_success 'with no hook' ' + echo "first" > file && + git add file && + git commit -m "first" +' + +test_expect_success 'with succeeding hook' ' + mkdir -p .git/hooks && + write_script .git/hooks/post-command <<-EOF && + echo "\$*" | sed "s/ --git-pid=[0-9]*//" \ + >\$(git rev-parse --git-dir)/post-command.out + EOF + echo "second" >> file && + git add file && + test "add file --exit_code=0" = "$(cat .git/post-command.out)" +' + +test_expect_success 'with failing pre-command hook' ' + write_script .git/hooks/pre-command <<-EOF && + exit 1 + EOF + echo "third" >> file && + test_must_fail git add file && + test_path_is_missing "$(cat .git/post-command.out)" +' + +test_done diff --git a/t/t0402-block-command-on-gvfs.sh b/t/t0402-block-command-on-gvfs.sh new file mode 100755 index 00000000000000..3ec7620ce6194d --- /dev/null +++ b/t/t0402-block-command-on-gvfs.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +test_description='block commands in GVFS repo' + +. ./test-lib.sh + +not_with_gvfs () { + command=$1 && + shift && + test_expect_success "test $command $*" " + test_config alias.g4rbled $command && + test_config core.gvfs true && + test_must_fail git $command $* && + test_must_fail git g4rbled $* && + test_unconfig core.gvfs && + test_must_fail git -c core.gvfs=true $command $* && + test_must_fail git -c core.gvfs=true g4rbled $* + " +} + +not_with_gvfs fsck +not_with_gvfs gc +not_with_gvfs gc --auto +not_with_gvfs prune +not_with_gvfs repack +not_with_gvfs submodule status +not_with_gvfs update-index --index-version 2 +not_with_gvfs update-index --skip-worktree +not_with_gvfs update-index --no-skip-worktree +not_with_gvfs update-index --split-index +not_with_gvfs worktree list + +test_expect_success 'test gc --auto succeeds when disabled via config' ' + test_config core.gvfs true && + test_config gc.auto 0 && + git gc --auto +' + +test_done diff --git a/t/t0410/read-object b/t/t0410/read-object new file mode 100755 index 00000000000000..02c799837f4057 --- /dev/null +++ b/t/t0410/read-object @@ -0,0 +1,118 @@ +#!/usr/bin/perl +# +# Example implementation for the Git read-object protocol version 1 +# See Documentation/technical/read-object-protocol.txt +# +# Allows you to test the ability for blobs to be pulled from a host git repo +# "on demand." Called when git needs a blob it couldn't find locally due to +# a lazy clone that only cloned the commits and trees. +# +# A lazy clone can be simulated via the following commands from the host repo +# you wish to create a lazy clone of: +# +# cd /host_repo +# git rev-parse HEAD +# git init /guest_repo +# git cat-file --batch-check --batch-all-objects | grep -v 'blob' | +# cut -d' ' -f1 | git pack-objects /guest_repo/.git/objects/pack/noblobs +# cd /guest_repo +# git config core.virtualizeobjects true +# git reset --hard +# +# Please note, this sample is a minimal skeleton. No proper error handling +# was implemented. +# + +use strict; +use warnings; + +# +# Point $DIR to the folder where your host git repo is located so we can pull +# missing objects from it +# +my $DIR = "../.git/"; + +sub packet_bin_read { + my $buffer; + my $bytes_read = read STDIN, $buffer, 4; + if ( $bytes_read == 0 ) { + + # EOF - Git stopped talking to us! + exit(); + } + elsif ( $bytes_read != 4 ) { + die "invalid packet: '$buffer'"; + } + my $pkt_size = hex($buffer); + if ( $pkt_size == 0 ) { + return ( 1, "" ); + } + elsif ( $pkt_size > 4 ) { + my $content_size = $pkt_size - 4; + $bytes_read = read STDIN, $buffer, $content_size; + if ( $bytes_read != $content_size ) { + die "invalid packet ($content_size bytes expected; $bytes_read bytes read)"; + } + return ( 0, $buffer ); + } + else { + die "invalid packet size: $pkt_size"; + } +} + +sub packet_txt_read { + my ( $res, $buf ) = packet_bin_read(); + unless ( $buf =~ s/\n$// ) { + die "A non-binary line MUST be terminated by an LF."; + } + return ( $res, $buf ); +} + +sub packet_bin_write { + my $buf = shift; + print STDOUT sprintf( "%04x", length($buf) + 4 ); + print STDOUT $buf; + STDOUT->flush(); +} + +sub packet_txt_write { + packet_bin_write( $_[0] . "\n" ); +} + +sub packet_flush { + print STDOUT sprintf( "%04x", 0 ); + STDOUT->flush(); +} + +( packet_txt_read() eq ( 0, "git-read-object-client" ) ) || die "bad initialize"; +( packet_txt_read() eq ( 0, "version=1" ) ) || die "bad version"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad version end"; + +packet_txt_write("git-read-object-server"); +packet_txt_write("version=1"); +packet_flush(); + +( packet_txt_read() eq ( 0, "capability=get" ) ) || die "bad capability"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad capability end"; + +packet_txt_write("capability=get"); +packet_flush(); + +while (1) { + my ($command) = packet_txt_read() =~ /^command=([^=]+)$/; + + if ( $command eq "get" ) { + my ($sha1) = packet_txt_read() =~ /^sha1=([0-9a-f]{40,64})$/; + packet_bin_read(); + + system ('git --git-dir="' . $DIR . '" cat-file blob ' . $sha1 . ' | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1'); + packet_txt_write(($?) ? "status=error" : "status=success"); + packet_flush(); + + open my $log, '>>.git/read-object-hook.log'; + print $log "Read object $sha1, exit code $?\n"; + close $log; + } else { + die "bad command '$command'"; + } +} diff --git a/t/t0411-read-object.sh b/t/t0411-read-object.sh new file mode 100755 index 00000000000000..af97ec5b50f356 --- /dev/null +++ b/t/t0411-read-object.sh @@ -0,0 +1,34 @@ +#!/bin/sh + +test_description='tests for long running read-object process' + +. ./test-lib.sh + +test_expect_success 'setup host repo with a root commit' ' + test_commit zero && + hash1=$(git ls-tree HEAD | grep zero.t | cut -f1 | cut -d\ -f3) +' + +test_expect_success 'blobs can be retrieved from the host repo' ' + git init guest-repo && + (cd guest-repo && + mkdir -p .git/hooks && + cp $TEST_DIRECTORY/t0410/read-object .git/hooks/ && + git config core.virtualizeobjects true && + git cat-file blob "$hash1") +' + +test_expect_success 'invalid blobs generate errors' ' + (cd guest-repo && + test_must_fail git cat-file blob "invalid") +' + +test_expect_success 'read-object-hook is bypassed when writing objects' ' + (cd guest-repo && + echo hello >hello.txt && + git add hello.txt && + hash="$(git rev-parse --verify :hello.txt)" && + ! grep "$hash" .git/read-object-hook.log) +' + +test_done diff --git a/t/t1016-read-tree-skip-sha-on-read.sh b/t/t1016-read-tree-skip-sha-on-read.sh new file mode 100755 index 00000000000000..5b76a80a0020dc --- /dev/null +++ b/t/t1016-read-tree-skip-sha-on-read.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +test_description='check that read-tree works with core.gvfs config value' + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-read-tree.sh + +test_expect_success setup ' + echo one >a && + git add a && + git commit -m initial +' +test_expect_success 'read-tree without core.gvsf' ' + read_tree_u_must_succeed -m -u HEAD +' + +test_expect_success 'read-tree with core.gvfs set to 1' ' + git config core.gvfs 1 && + read_tree_u_must_succeed -m -u HEAD +' + +test_done diff --git a/t/t1090-sparse-checkout-scope.sh b/t/t1090-sparse-checkout-scope.sh index 529844e2862c74..02b393e36a7d28 100755 --- a/t/t1090-sparse-checkout-scope.sh +++ b/t/t1090-sparse-checkout-scope.sh @@ -106,6 +106,26 @@ test_expect_success 'in partial clone, sparse checkout only fetches needed blobs test_cmp expect actual ' +test_expect_success 'checkout does not delete items outside the sparse checkout file' ' + # The "core.virtualfilesystem" config will prevent the + # SKIP_WORKTREE flag from being dropped on files present on-disk. + test_config core.virtualfilesystem true && + + test_config core.gvfs 8 && + git checkout -b outside && + echo "new file1" >d && + git add --sparse d && + git commit -m "branch initial" && + echo "new file1" >e && + git add --sparse e && + git commit -m "skipped worktree" && + git update-index --skip-worktree e && + echo "/d" >.git/info/sparse-checkout && + git checkout HEAD^ && + test_path_is_file d && + test_path_is_file e +' + test_expect_success MINGW 'no unnecessary opendir() with fscache' ' git clone . fscache-test && ( diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh index de1ec89007d787..5528fe486c17c0 100755 --- a/t/t1091-sparse-checkout-builtin.sh +++ b/t/t1091-sparse-checkout-builtin.sh @@ -761,6 +761,10 @@ test_expect_success 'cone mode clears ignored subdirectories' ' git -C repo status --porcelain=v2 >out && test_must_be_empty out && + git -C repo -c index.deleteSparseDirectories=false sparse-checkout reapply && + test_path_is_dir repo/folder1 && + test_path_is_dir repo/deep/deeper2 && + git -C repo sparse-checkout reapply && test_path_is_missing repo/folder1 && test_path_is_missing repo/deep/deeper2 && diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh index b9350c075c2a02..038d3f6cc6e005 100755 --- a/t/t1092-sparse-checkout-compatibility.sh +++ b/t/t1092-sparse-checkout-compatibility.sh @@ -155,6 +155,7 @@ init_repos () { git -C sparse-index reset --hard && # initialize sparse-checkout definitions + git -C sparse-checkout config index.sparse false && git -C sparse-checkout sparse-checkout init --cone && git -C sparse-checkout sparse-checkout set deep && git -C sparse-index sparse-checkout init --cone --sparse-index && @@ -504,6 +505,43 @@ test_expect_success 'diff --cached' ' test_all_match git diff --cached ' +test_expect_success 'diff partially-staged' ' + init_repos && + + write_script edit-contents <<-\EOF && + echo text >>$1 + EOF + + # Add file within cone + test_all_match git sparse-checkout set deep && + run_on_all ../edit-contents deep/testfile && + test_all_match git add deep/testfile && + run_on_all ../edit-contents deep/testfile && + + test_all_match git diff && + test_all_match git diff --staged && + + # Add file outside cone + test_all_match git reset --hard && + run_on_all mkdir newdirectory && + run_on_all ../edit-contents newdirectory/testfile && + test_all_match git sparse-checkout set newdirectory && + test_all_match git add newdirectory/testfile && + run_on_all ../edit-contents newdirectory/testfile && + test_all_match git sparse-checkout set && + + test_all_match git diff && + test_all_match git diff --staged && + + # Merge conflict outside cone + test_all_match git reset --hard && + test_all_match git checkout merge-left && + test_all_match test_must_fail git merge merge-right && + + test_all_match git diff && + test_all_match git diff --staged +' + # NEEDSWORK: sparse-checkout behaves differently from full-checkout when # running this test with 'df-conflict-2' after 'df-conflict-1'. test_expect_success 'diff with renames and conflicts' ' @@ -978,7 +1016,9 @@ test_expect_success 'read-tree --merge with directory-file conflicts' ' test_expect_success 'merge, cherry-pick, and rebase' ' init_repos && - for OPERATION in "merge -m merge" cherry-pick "rebase --apply" "rebase --merge" + # microsoft/git specific: we need to use "quiet" mode + # to avoid different stderr for some rebases. + for OPERATION in "merge -m merge" cherry-pick "rebase -q --apply" "rebase -q --merge" do test_all_match git checkout -B temp update-deep && test_all_match git $OPERATION update-folder1 && @@ -1428,6 +1468,11 @@ test_expect_success 'sparse-index is not expanded' ' ensure_not_expanded reset --merge update-deep && ensure_not_expanded reset --hard && + echo a test change >>sparse-index/README.md && + ensure_not_expanded diff && + git -C sparse-index add README.md && + ensure_not_expanded diff --staged && + ensure_not_expanded reset base -- deep/a && ensure_not_expanded reset base -- nonexistent-file && ensure_not_expanded reset deepest -- deep && @@ -1723,6 +1768,46 @@ test_expect_success 'sparse index is not expanded: sparse-checkout' ' ensure_not_expanded sparse-checkout set ' +# NEEDSWORK: although the full repository's index is _not_ expanded as part of +# stash, a temporary index, which is _not_ sparse, is created when stashing and +# applying a stash of untracked files. As a result, the test reports that it +# finds an instance of `ensure_full_index`, but it does not carry with it the +# performance implications of expanding the full repository index. +test_expect_success 'sparse index is not expanded: stash -u' ' + init_repos && + + mkdir -p sparse-index/folder1 && + echo >>sparse-index/README.md && + echo >>sparse-index/a && + echo >>sparse-index/folder1/new && + + GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \ + git -C sparse-index stash -u && + test_region index ensure_full_index trace2.txt && + + GIT_TRACE2_EVENT="$(pwd)/trace2.txt" GIT_TRACE2_EVENT_NESTING=10 \ + git -C sparse-index stash pop && + test_region index ensure_full_index trace2.txt +' + +# NEEDSWORK: similar to `git add`, untracked files outside of the sparse +# checkout definition are successfully stashed and unstashed. +test_expect_success 'stash -u outside sparse checkout definition' ' + init_repos && + + write_script edit-contents <<-\EOF && + echo text >>$1 + EOF + + run_on_sparse mkdir -p folder1 && + run_on_all ../edit-contents folder1/new && + test_all_match git stash -u && + test_all_match git status --porcelain=v2 && + + test_all_match git stash pop -q && + test_all_match git status --porcelain=v2 +' + # NEEDSWORK: a sparse-checkout behaves differently from a full checkout # in this scenario, but it shouldn't. test_expect_success 'reset mixed and checkout orphan' ' diff --git a/t/t1093-virtualfilesystem.sh b/t/t1093-virtualfilesystem.sh new file mode 100755 index 00000000000000..cad13d680cb199 --- /dev/null +++ b/t/t1093-virtualfilesystem.sh @@ -0,0 +1,382 @@ +#!/bin/sh + +test_description='virtual file system tests' + +. ./test-lib.sh + +clean_repo () { + rm .git/index && + git -c core.virtualfilesystem= reset --hard HEAD && + git -c core.virtualfilesystem= clean -fd && + touch untracked.txt && + touch dir1/untracked.txt && + touch dir2/untracked.txt +} + +test_expect_success 'setup' ' + git branch -M main && + mkdir -p .git/hooks/ && + cat > .gitignore <<-\EOF && + .gitignore + expect* + actual* + EOF + mkdir -p dir1 && + touch dir1/file1.txt && + touch dir1/file2.txt && + mkdir -p dir2 && + touch dir2/file1.txt && + touch dir2/file2.txt && + git add . && + git commit -m "initial" && + git config --local core.virtualfilesystem .git/hooks/virtualfilesystem +' + +test_expect_success 'test hook parameters and version' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + if test "$#" -ne 1 + then + echo "$0: Exactly 1 argument expected" >&2 + exit 2 + fi + + if test "$1" != 1 + then + echo "$0: Unsupported hook version." >&2 + exit 1 + fi + EOF + git status && + write_script .git/hooks/virtualfilesystem <<-\EOF && + exit 3 + EOF + test_must_fail git status +' + +test_expect_success 'verify status is clean' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir2/file1.txt\0" + EOF + rm -f .git/index && + git checkout -f && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir2/file1.txt\0" + printf "dir1/file1.txt\0" + printf "dir1/file2.txt\0" + EOF + git status > actual && + cat > expected <<-\EOF && + On branch main + nothing to commit, working tree clean + EOF + test_cmp expected actual +' + +test_expect_success 'verify skip-worktree bit is set for absolute path' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file1.txt\0" + EOF + git ls-files -v > actual && + cat > expected <<-\EOF && + H dir1/file1.txt + S dir1/file2.txt + S dir2/file1.txt + S dir2/file2.txt + EOF + test_cmp expected actual +' + +test_expect_success 'verify skip-worktree bit is cleared for absolute path' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file2.txt\0" + EOF + git ls-files -v > actual && + cat > expected <<-\EOF && + S dir1/file1.txt + H dir1/file2.txt + S dir2/file1.txt + S dir2/file2.txt + EOF + test_cmp expected actual +' + +test_expect_success 'verify folder wild cards' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/\0" + EOF + git ls-files -v > actual && + cat > expected <<-\EOF && + H dir1/file1.txt + H dir1/file2.txt + S dir2/file1.txt + S dir2/file2.txt + EOF + test_cmp expected actual +' + +test_expect_success 'verify folders not included are ignored' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file1.txt\0" + printf "dir1/file2.txt\0" + EOF + mkdir -p dir1/dir2 && + touch dir1/a && + touch dir1/b && + touch dir1/dir2/a && + touch dir1/dir2/b && + git add . && + git ls-files -v > actual && + cat > expected <<-\EOF && + H dir1/file1.txt + H dir1/file2.txt + S dir2/file1.txt + S dir2/file2.txt + EOF + test_cmp expected actual +' + +test_expect_success 'verify including one file doesnt include the rest' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file1.txt\0" + printf "dir1/file2.txt\0" + printf "dir1/dir2/a\0" + EOF + mkdir -p dir1/dir2 && + touch dir1/a && + touch dir1/b && + touch dir1/dir2/a && + touch dir1/dir2/b && + git add . && + git ls-files -v > actual && + cat > expected <<-\EOF && + H dir1/dir2/a + H dir1/file1.txt + H dir1/file2.txt + S dir2/file1.txt + S dir2/file2.txt + EOF + test_cmp expected actual +' + +test_expect_success 'verify files not listed are ignored by git clean -f -x' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "untracked.txt\0" + printf "dir1/\0" + EOF + mkdir -p dir3 && + touch dir3/untracked.txt && + git clean -f -x && + test ! -f untracked.txt && + test -d dir1 && + test -f dir1/file1.txt && + test -f dir1/file2.txt && + test ! -f dir1/untracked.txt && + test -f dir2/file1.txt && + test -f dir2/file2.txt && + test -f dir2/untracked.txt && + test -d dir3 && + test -f dir3/untracked.txt +' + +test_expect_success 'verify files not listed are ignored by git clean -f -d -x' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "untracked.txt\0" + printf "dir1/\0" + printf "dir3/\0" + EOF + mkdir -p dir3 && + touch dir3/untracked.txt && + git clean -f -d -x && + test ! -f untracked.txt && + test -d dir1 && + test -f dir1/file1.txt && + test -f dir1/file2.txt && + test ! -f dir1/untracked.txt && + test -f dir2/file1.txt && + test -f dir2/file2.txt && + test -f dir2/untracked.txt && + test ! -d dir3 && + test ! -f dir3/untracked.txt +' + +test_expect_success 'verify folder entries include all files' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/\0" + EOF + mkdir -p dir1/dir2 && + touch dir1/a && + touch dir1/b && + touch dir1/dir2/a && + touch dir1/dir2/b && + git status -su > actual && + cat > expected <<-\EOF && + ?? dir1/a + ?? dir1/b + ?? dir1/dir2/a + ?? dir1/dir2/b + ?? dir1/untracked.txt + EOF + test_cmp expected actual +' + +test_expect_success 'verify case insensitivity of virtual file system entries' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/a\0" + printf "Dir1/Dir2/a\0" + printf "DIR2/\0" + EOF + mkdir -p dir1/dir2 && + touch dir1/a && + touch dir1/b && + touch dir1/dir2/a && + touch dir1/dir2/b && + git -c core.ignorecase=false status -su > actual && + cat > expected <<-\EOF && + ?? dir1/a + EOF + test_cmp expected actual && + git -c core.ignorecase=true status -su > actual && + cat > expected <<-\EOF && + ?? dir1/a + ?? dir1/dir2/a + ?? dir2/untracked.txt + EOF + test_cmp expected actual +' + +test_expect_success 'on file created' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file3.txt\0" + EOF + touch dir1/file3.txt && + git add . && + git ls-files -v > actual && + cat > expected <<-\EOF && + S dir1/file1.txt + S dir1/file2.txt + H dir1/file3.txt + S dir2/file1.txt + S dir2/file2.txt + EOF + test_cmp expected actual +' + +test_expect_success 'on file renamed' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file1.txt\0" + printf "dir1/file3.txt\0" + EOF + mv dir1/file1.txt dir1/file3.txt && + git status -su > actual && + cat > expected <<-\EOF && + D dir1/file1.txt + ?? dir1/file3.txt + EOF + test_cmp expected actual +' + +test_expect_success 'on file deleted' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file1.txt\0" + EOF + rm dir1/file1.txt && + git status -su > actual && + cat > expected <<-\EOF && + D dir1/file1.txt + EOF + test_cmp expected actual +' + +test_expect_success 'on file overwritten' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/file1.txt\0" + EOF + echo "overwritten" > dir1/file1.txt && + git status -su > actual && + cat > expected <<-\EOF && + M dir1/file1.txt + EOF + test_cmp expected actual +' + +test_expect_success 'on folder created' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/dir1/\0" + EOF + mkdir -p dir1/dir1 && + git status -su > actual && + cat > expected <<-\EOF && + EOF + test_cmp expected actual && + git clean -fd && + test ! -d "/dir1/dir1" +' + +test_expect_success 'on folder renamed' ' + clean_repo && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir3/\0" + printf "dir1/file1.txt\0" + printf "dir1/file2.txt\0" + printf "dir3/file1.txt\0" + printf "dir3/file2.txt\0" + EOF + mv dir1 dir3 && + git status -su > actual && + cat > expected <<-\EOF && + D dir1/file1.txt + D dir1/file2.txt + ?? dir3/file1.txt + ?? dir3/file2.txt + ?? dir3/untracked.txt + EOF + test_cmp expected actual +' + +test_expect_success 'folder with same prefix as file' ' + clean_repo && + touch dir1.sln && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/\0" + printf "dir1.sln\0" + EOF + git add dir1.sln && + git ls-files -v > actual && + cat > expected <<-\EOF && + H dir1.sln + H dir1/file1.txt + H dir1/file2.txt + S dir2/file1.txt + S dir2/file2.txt + EOF + test_cmp expected actual +' + +test_expect_success MINGW,FSMONITOR_DAEMON 'virtualfilesystem hook disables built-in FSMonitor' ' + clean_repo && + test_config core.usebuiltinfsmonitor true && + write_script .git/hooks/virtualfilesystem <<-\EOF && + printf "dir1/\0" + EOF + git config core.virtualfilesystem .git/hooks/virtualfilesystem && + git status && + test_must_fail git fsmonitor--daemon status +' + +test_done diff --git a/t/t5583-vfs.sh b/t/t5583-vfs.sh new file mode 100755 index 00000000000000..8a703cbb640387 --- /dev/null +++ b/t/t5583-vfs.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +test_description='fetch using the flag to skip reachability and upload pack' + +. ./test-lib.sh + + +test_expect_success setup ' + echo inital >a && + git add a && + git commit -m initial && + git clone . one +' + +test_expect_success "fetch test" ' + cd one && + git config core.gvfs 16 && + rm -rf .git/objects/* && + git -C .. cat-file commit HEAD | git hash-object -w --stdin -t commit && + git fetch && + test_must_fail git rev-parse --verify HEAD^{tree} +' + +test_done \ No newline at end of file diff --git a/t/t5799-gvfs-helper.sh b/t/t5799-gvfs-helper.sh new file mode 100755 index 00000000000000..3a10b6b68ff87d --- /dev/null +++ b/t/t5799-gvfs-helper.sh @@ -0,0 +1,1325 @@ +#!/bin/sh + +test_description='test gvfs-helper and GVFS Protocol' + +. ./test-lib.sh + +# Set the port for t/helper/test-gvfs-protocol.exe from either the +# environment or from the test number of this shell script. +# +test_set_port GIT_TEST_GVFS_PROTOCOL_PORT + +# Setup the following repos: +# +# repo_src: +# A normal, no-magic, fully-populated clone of something. +# No GVFS (aka VFS4G). No Scalar. No partial-clone. +# This will be used by "t/helper/test-gvfs-protocol.exe" +# to serve objects. +# +# repo_t1: +# An empty repo with no contents nor commits. That is, +# everything is missing. For the tests based on this repo, +# we don't care why it is missing objects (or if we could +# actually use it). We are only testing explicit object +# fetching using gvfs-helper.exe in isolation. +# +REPO_SRC="$(pwd)"/repo_src +REPO_T1="$(pwd)"/repo_t1 + +# Setup some loopback URLs where test-gvfs-protocol.exe will be +# listening. We will spawn it directly inside the repo_src directory, +# so we don't need any of the directory mapping or configuration +# machinery found in "git-daemon.exe" or "git-http-backend.exe". +# +# This lets us use the "uri-base" part of the URL (prior to the REST +# API "/gvfs/") to control how our mock server responds. For +# example, only the origin (main Git) server supports "/gvfs/config". +# +# For example, this means that if we add a remote containing $ORIGIN_URL, +# it will work with gvfs-helper, but not for fetch (without some mapping +# tricks). +# +HOST_PORT=127.0.0.1:$GIT_TEST_GVFS_PROTOCOL_PORT +ORIGIN_URL=http://$HOST_PORT/servertype/origin +CACHE_URL=http://$HOST_PORT/servertype/cache + +SHARED_CACHE_T1="$(pwd)"/shared_cache_t1 + +# The pid-file is created by test-gvfs-protocol.exe when it starts. +# The server will shut down if/when we delete it. (This is a little +# easier than killing it by PID.) +# +PID_FILE="$(pwd)"/pid-file.pid +SERVER_LOG="$(pwd)"/OUT.server.log + +PATH="$GIT_BUILD_DIR/t/helper/:$PATH" && export PATH + +OIDS_FILE="$(pwd)"/oid_list.txt +OIDS_CT_FILE="$(pwd)"/oid_ct_list.txt +OIDS_BLOBS_FILE="$(pwd)"/oids_blobs_file.txt +OID_ONE_BLOB_FILE="$(pwd)"/oid_one_blob_file.txt +OID_ONE_COMMIT_FILE="$(pwd)"/oid_one_commit_file.txt + +# Get a list of available OIDs in repo_src so that we can try to fetch +# them and so that we don't have to hard-code a list of known OIDs. +# This doesn't need to be a complete list -- just enough to drive some +# representative tests. +# +# Optionally require that we find a minimum number of OIDs. +# +get_list_of_oids () { + git -C "$REPO_SRC" rev-list --objects HEAD | sed 's/ .*//' | sort >"$OIDS_FILE" + + if test $# -eq 1 + then + actual_nr=$(wc -l <"$OIDS_FILE") + if test $actual_nr -lt $1 + then + echo "get_list_of_oids: insufficient data. Need $1 OIDs." + return 1 + fi + fi + return 0 +} + +get_list_of_blobs_oids () { + git -C "$REPO_SRC" ls-tree HEAD | grep ' blob ' | awk "{print \$3}" | sort >"$OIDS_BLOBS_FILE" + head -1 <"$OIDS_BLOBS_FILE" >"$OID_ONE_BLOB_FILE" +} + +get_list_of_commit_and_tree_oids () { + git -C "$REPO_SRC" cat-file --batch-check --batch-all-objects | awk "/commit|tree/ {print \$1}" | sort >"$OIDS_CT_FILE" + + if test $# -eq 1 + then + actual_nr=$(wc -l <"$OIDS_CT_FILE") + if test $actual_nr -lt $1 + then + echo "get_list_of_commit_and_tree_oids: insufficient data. Need $1 OIDs." + return 1 + fi + fi + return 0 +} + +get_one_commit_oid () { + git -C "$REPO_SRC" rev-parse HEAD >"$OID_ONE_COMMIT_FILE" + return 0 +} + +# Create a commits-and-trees packfile for use with "prefetch" +# using the given range of commits. +# +create_commits_and_trees_packfile () { + if test $# -eq 2 + then + epoch=$1 + revs=$2 + else + echo "create_commits_and_trees_packfile: Need 2 args" + return 1 + fi + + pack_file="$REPO_SRC"/.git/objects/pack/ct-$epoch.pack + idx_file="$REPO_SRC"/.git/objects/pack/ct-$epoch.idx + + git -C "$REPO_SRC" pack-objects --stdout --revs --filter=blob:none \ + >"$pack_file" <<-EOF + $revs + EOF + git -C "$REPO_SRC" index-pack -o "$idx_file" "$pack_file" + return 0 +} + +test_expect_success 'setup repos' ' + test_create_repo "$REPO_SRC" && + git -C "$REPO_SRC" branch -M main && + # + # test_commit_bulk() does magic to create a packfile containing + # the new commits. + # + # We create branches in repo_src, but also remember the branch OIDs + # in files so that we can refer to them in repo_t1, which will not + # have the commits locally (because we do not clone or fetch). + # + test_commit_bulk -C "$REPO_SRC" --filename="batch_a.%s.t" 9 && + git -C "$REPO_SRC" branch B1 && + cp "$REPO_SRC"/.git/refs/heads/main m1.branch && + # + test_commit_bulk -C "$REPO_SRC" --filename="batch_b.%s.t" 9 && + git -C "$REPO_SRC" branch B2 && + cp "$REPO_SRC"/.git/refs/heads/main m2.branch && + # + # test_commit() creates commits, trees, tags, and blobs and leave + # them loose. + # + test_config gc.auto 0 && + # + test_commit -C "$REPO_SRC" file1.txt && + test_commit -C "$REPO_SRC" file2.txt && + test_commit -C "$REPO_SRC" file3.txt && + test_commit -C "$REPO_SRC" file4.txt && + test_commit -C "$REPO_SRC" file5.txt && + test_commit -C "$REPO_SRC" file6.txt && + test_commit -C "$REPO_SRC" file7.txt && + test_commit -C "$REPO_SRC" file8.txt && + test_commit -C "$REPO_SRC" file9.txt && + git -C "$REPO_SRC" branch B3 && + cp "$REPO_SRC"/.git/refs/heads/main m3.branch && + # + # Create some commits-and-trees-only packfiles for testing prefetch. + # Set arbitrary EPOCH times to make it easier to test fetch-since. + # + create_commits_and_trees_packfile 1000000000 B1 && + create_commits_and_trees_packfile 1100000000 B1..B2 && + create_commits_and_trees_packfile 1200000000 B2..B3 && + # + # gvfs-helper.exe writes downloaded objects to a shared-cache directory + # rather than the ODB inside the .git directory. + # + mkdir "$SHARED_CACHE_T1" && + mkdir "$SHARED_CACHE_T1/pack" && + mkdir "$SHARED_CACHE_T1/info" && + # + # setup repo_t1 and point all of the gvfs.* values to repo_src. + # + test_create_repo "$REPO_T1" && + git -C "$REPO_T1" branch -M main && + git -C "$REPO_T1" remote add origin $ORIGIN_URL && + git -C "$REPO_T1" config --local gvfs.cache-server $CACHE_URL && + git -C "$REPO_T1" config --local gvfs.sharedCache "$SHARED_CACHE_T1" && + echo "$SHARED_CACHE_T1" >> "$REPO_T1"/.git/objects/info/alternates && + # + # + # + cat <<-EOF >creds.txt && + username=x + password=y + EOF + cat <<-EOF >creds.sh && + #!/bin/sh + cat "$(pwd)"/creds.txt + EOF + chmod 755 creds.sh && + git -C "$REPO_T1" config --local credential.helper "!f() { cat \"$(pwd)\"/creds.txt; }; f" && + # + # Create some test data sets. + # + get_list_of_oids 30 && + get_list_of_commit_and_tree_oids 30 && + get_list_of_blobs_oids && + get_one_commit_oid +' + +stop_gvfs_protocol_server () { + if ! test -f "$PID_FILE" + then + return 0 + fi + # + # The server will shutdown automatically when we delete the pid-file. + # + rm -f "$PID_FILE" + # + # Give it a few seconds to shutdown (mainly to completely release the + # port before the next test start another instance and it attempts to + # bind to it). + # + for k in 0 1 2 3 4 + do + if grep -q "Starting graceful shutdown" "$SERVER_LOG" + then + return 0 + fi + sleep 1 + done + + echo "stop_gvfs_protocol_server: timeout waiting for server shutdown" + return 1 +} + +start_gvfs_protocol_server () { + # + # Launch our server into the background in repo_src. + # + ( + cd "$REPO_SRC" + test-gvfs-protocol --verbose \ + --listen=127.0.0.1 \ + --port=$GIT_TEST_GVFS_PROTOCOL_PORT \ + --reuseaddr \ + --pid-file="$PID_FILE" \ + 2>"$SERVER_LOG" & + ) + # + # Give it a few seconds to get started. + # + for k in 0 1 2 3 4 + do + if test -f "$PID_FILE" + then + return 0 + fi + sleep 1 + done + + echo "start_gvfs_protocol_server: timeout waiting for server startup" + return 1 +} + +start_gvfs_protocol_server_with_mayhem () { + if test $# -lt 1 + then + echo "start_gvfs_protocol_server_with_mayhem: need mayhem args" + return 1 + fi + + mayhem="" + for k in $* + do + mayhem="$mayhem --mayhem=$k" + done + # + # Launch our server into the background in repo_src. + # + ( + cd "$REPO_SRC" + test-gvfs-protocol --verbose \ + --listen=127.0.0.1 \ + --port=$GIT_TEST_GVFS_PROTOCOL_PORT \ + --reuseaddr \ + --pid-file="$PID_FILE" \ + $mayhem \ + 2>"$SERVER_LOG" & + ) + # + # Give it a few seconds to get started. + # + for k in 0 1 2 3 4 + do + if test -f "$PID_FILE" + then + return 0 + fi + sleep 1 + done + + echo "start_gvfs_protocol_server: timeout waiting for server startup" + return 1 +} + +# Verify the number of connections from the client. +# +# If keep-alive is working, a series of successful sequential requests to the +# same server should use the same TCP connection, so a simple multi-get would +# only have one connection. +# +# On the other hand, an auto-retry after a network error (mayhem) will have +# more than one for a single object request. +# +# TODO This may generate false alarm when we get to complicated tests, so +# TODO we might only want to use it for basic tests. +# +verify_connection_count () { + if test $# -eq 1 + then + expected_nr=$1 + else + expected_nr=1 + fi + + actual_nr=$(grep -c "Connection from" "$SERVER_LOG") + + if test $actual_nr -ne $expected_nr + then + echo "verify_keep_live: expected $expected_nr; actual $actual_nr" + return 1 + fi + return 0 +} + +# Verify that the set of requested objects are present in +# the shared-cache and that there is no corruption. We use +# cat-file to hide whether the object is packed or loose in +# the test repo. +# +# Usage: +# +verify_objects_in_shared_cache () { + # + # See if any of the objects are missing from repo_t1. + # + git -C "$REPO_T1" cat-file --batch-check <"$1" >OUT.bc_actual || return 1 + grep -q " missing" OUT.bc_actual && return 1 + # + # See if any of the objects have different sizes or types than repo_src. + # + git -C "$REPO_SRC" cat-file --batch-check <"$1" >OUT.bc_expect || return 1 + test_cmp OUT.bc_expect OUT.bc_actual || return 1 + # + # See if any of the objects are corrupt in repo_t1. This fully + # reconstructs the objects and verifies the hash and therefore + # detects corruption not found by the earlier "batch-check" step. + # + git -C "$REPO_T1" cat-file --batch <"$1" >OUT.b_actual || return 1 + # + # TODO move the shared-cache directory (and/or the + # TODO .git/objects/info/alternates and temporarily unset + # TODO gvfs.sharedCache) and repeat the first "batch-check" + # TODO and make sure that they are ALL missing. + # + return 0 +} + +# gvfs-helper prints a "packfile " message for each received +# packfile to stdout. Verify that we received the expected number +# of packfiles. +# +verify_received_packfile_count () { + if test $# -eq 1 + then + expected_nr=$1 + else + expected_nr=1 + fi + + actual_nr=$(grep -c "packfile " OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received object. + # Verify that gvfs-helper received each of the requested objects. + # + sed "s/loose //" OUT.actual && + test_cmp "$OIDS_FILE" OUT.actual && + + verify_objects_in_shared_cache "$OIDS_FILE" && + verify_connection_count 1 +' + +test_expect_success 'basic: GET cache-server multi-get trust-mode' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Connect to the cache-server and make a series of + # single-object GET requests. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=trust \ + --remote=origin \ + get \ + <"$OIDS_FILE" >OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received object. + # Verify that gvfs-helper received each of the requested objects. + # + sed "s/loose //" OUT.actual && + test_cmp "$OIDS_FILE" OUT.actual && + + verify_objects_in_shared_cache "$OIDS_FILE" && + verify_connection_count 1 +' + +test_expect_success 'basic: GET gvfs/config' ' +# test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Connect to the cache-server and make a series of + # single-object GET requests. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + config \ + <"$OIDS_FILE" >OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # The cache-server URL should be listed in the gvfs/config output. + # We confirm this before assuming error-mode will work. + # + grep -q "$CACHE_URL" OUT.output +' + +test_expect_success 'basic: GET cache-server multi-get error-mode' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Connect to the cache-server and make a series of + # single-object GET requests. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=error \ + --remote=origin \ + get \ + <"$OIDS_FILE" >OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received object. + # Verify that gvfs-helper received each of the requested objects. + # + sed "s/loose //" OUT.actual && + test_cmp "$OIDS_FILE" OUT.actual && + + verify_objects_in_shared_cache "$OIDS_FILE" && + + # Technically, we have 1 connection to the origin server + # for the "gvfs/config" request and 1 to cache server to + # get the objects, but because we are using the same port + # for both, keep-alive will handle it. So 1 connection. + # + verify_connection_count 1 +' + +# The GVFS Protocol POST verb behaves like GET for non-commit objects +# (in that it just returns the requested object), but for commit +# objects POST *also* returns all trees referenced by the commit. +# +# The goal of this test is to confirm that gvfs-helper can send us +# a packfile at all. So, this test only passes blobs to not blur +# the issue. +# +test_expect_success 'basic: POST origin blobs' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Connect to the origin server (w/o auth) and make + # multi-object POST request. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" >OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # gvfs-helper prints a "packfile " message for each received + # packfile. We verify the number of expected packfile(s) and we + # individually verify that each requested object is present in the + # shared cache (and index-pack already verified the integrity of + # the packfile), so we do not bother to run "git verify-pack -v" + # and do an exact matchup here. + # + verify_received_packfile_count 1 && + + verify_objects_in_shared_cache "$OIDS_BLOBS_FILE" && + verify_connection_count 1 +' + +# Request a single blob via POST. Per the GVFS Protocol, the server +# should implicitly send a loose object for it. Confirm that. +# +test_expect_success 'basic: POST-request a single blob' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Connect to the origin server (w/o auth) and request a single + # blob via POST. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OID_ONE_BLOB_FILE" >OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received + # loose object. + # + sed "s/loose //" OUT.actual && + test_cmp "$OID_ONE_BLOB_FILE" OUT.actual && + + verify_connection_count 1 +' + +# Request a single commit via POST. Per the GVFS Protocol, the server +# should implicitly send us a packfile containing the commit and the +# trees it references. Confirm that properly handled the receipt of +# the packfile. (Here, we are testing that asking for a single commit +# via POST yields a packfile rather than a loose object.) +# +# We DO NOT verify that the packfile contains commits/trees and no blobs +# because our test helper doesn't implement the filtering. +# +test_expect_success 'basic: POST-request a single commit' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Connect to the origin server (w/o auth) and request a single + # commit via POST. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OID_ONE_COMMIT_FILE" >OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 1 && + + verify_connection_count 1 +' + +test_expect_success 'basic: PREFETCH w/o arg gets all' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Without a "since" argument gives us all "ct-*.pack" since the EPOCH + # because we do not have any prefetch packs locally. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 3 && + verify_prefetch_keeps 1200000000 && + + stop_gvfs_protocol_server && + verify_connection_count 1 +' + +test_expect_success 'basic: PREFETCH w/ arg' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Ask for cached packfiles NEWER THAN the given time. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch --since="1000000000" >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 2 && + verify_prefetch_keeps 1200000000 && + + stop_gvfs_protocol_server && + verify_connection_count 1 +' + +test_expect_success 'basic: PREFETCH mayhem no_prefetch_idx' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem no_prefetch_idx && + + # Request prefetch packs, but tell server to not send any + # idx files and force gvfs-helper to compute them. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch --since="1000000000" >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 2 && + verify_prefetch_keeps 1200000000 && + + stop_gvfs_protocol_server && + verify_connection_count 1 +' + +test_expect_success 'basic: PREFETCH up-to-date' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Ask for cached packfiles NEWER THAN the given time. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch --since="1000000000" >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 2 && + verify_prefetch_keeps 1200000000 && + + # Ask again for any packfiles newer than what we have cached locally. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + prefetch >OUT.output && + + # gvfs-helper prints a "packfile " message for each received + # packfile. + # + verify_received_packfile_count 0 && + verify_prefetch_keeps 1200000000 && + + stop_gvfs_protocol_server && + verify_connection_count 2 +' + +################################################################# +# Tests to see how gvfs-helper responds to network problems. +# +# We use small --max-retry value because of exponential backoff. +# +# These mayhem tests are interested in how gvfs-helper gracefully +# retries when there is a network error. And verify that it gives +# up gracefully too. +################################################################# + +mayhem_observed__close__connections () { + if $(grep -q "transient" OUT.stderr) + then + # Transient errors should retry. + # 1 for initial request + 2 retries. + # + verify_connection_count 3 + return $? + elif $(grep -q "hard_fail" OUT.stderr) + then + # Hard errors should not retry. + # + verify_connection_count 1 + return $? + else + error "mayhem_observed__close: unexpected mayhem-induced error type" + return 1 + fi +} + +mayhem_observed__close () { + # Expected error codes for mayhem events: + # close_read + # close_write + # close_no_write + # + # CURLE_PARTIAL_FILE 18 + # CURLE_GOT_NOTHING 52 + # CURLE_SEND_ERROR 55 + # CURLE_RECV_ERROR 56 + # + # I don't want to pin it down to an exact error for each because there may + # be races here because of network buffering. + # + # Also, It is unclear which of these network errors should be transient + # (with retry) and which should be a hard-fail (without retry). I'm only + # going to verify the connection counts based upon what type of error + # gvfs-helper claimed it to be. + # + if $(grep -q "error: get: (curl:18)" OUT.stderr) || + $(grep -q "error: get: (curl:52)" OUT.stderr) || + $(grep -q "error: get: (curl:55)" OUT.stderr) || + $(grep -q "error: get: (curl:56)" OUT.stderr) + then + mayhem_observed__close__connections + return $? + else + echo "mayhem_observed__close: unexpected mayhem-induced error" + return 1 + fi +} + +test_expect_success 'curl-error: no server' ' + test_when_finished "per_test_cleanup" && + + # Try to do a multi-get without a server. + # + # Use small max-retry value because of exponential backoff, + # but yet do exercise retry some. + # + test_must_fail \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OIDS_FILE" >OUT.output 2>OUT.stderr && + + # CURLE_COULDNT_CONNECT 7 + grep -q "error: get: (curl:7)" OUT.stderr +' + +test_expect_success 'curl-error: close socket while reading request' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem close_read && + + test_must_fail \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OIDS_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + mayhem_observed__close +' + +test_expect_success 'curl-error: close socket while writing response' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem close_write && + + test_must_fail \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OIDS_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + mayhem_observed__close +' + +test_expect_success 'curl-error: close socket before writing response' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem close_no_write && + + test_must_fail \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OIDS_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + mayhem_observed__close +' + +################################################################# +# Tests to confirm that gvfs-helper does silently recover when +# a retry succeeds. +# +# Note: I'm only to do this for 1 of the close_* mayhem events. +################################################################# + +test_expect_success 'successful retry after curl-error: origin get' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem close_read_1 && + + # Connect to the origin server (w/o auth). + # Make a single-object GET request. + # Confirm that it succeeds without error. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OID_ONE_BLOB_FILE" >OUT.output && + + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received object. + # Verify that gvfs-helper received each of the requested objects. + # + sed "s/loose //" OUT.actual && + test_cmp "$OID_ONE_BLOB_FILE" OUT.actual && + + verify_objects_in_shared_cache "$OID_ONE_BLOB_FILE" && + verify_connection_count 2 +' + +################################################################# +# Tests to see how gvfs-helper responds to HTTP errors/problems. +# +################################################################# + +# See "enum gh__error_code" in gvfs-helper.c +# +GH__ERROR_CODE__HTTP_404=4 +GH__ERROR_CODE__HTTP_429=5 +GH__ERROR_CODE__HTTP_503=6 + +test_expect_success 'http-error: 503 Service Unavailable (with retry)' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_503 && + + test_expect_code $GH__ERROR_CODE__HTTP_503 \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OIDS_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + grep -q "error: get: (http:503)" OUT.stderr && + verify_connection_count 3 +' + +test_expect_success 'http-error: 429 Service Unavailable (with retry)' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_429 && + + test_expect_code $GH__ERROR_CODE__HTTP_429 \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OIDS_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + grep -q "error: get: (http:429)" OUT.stderr && + verify_connection_count 3 +' + +test_expect_success 'http-error: 404 Not Found (no retry)' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_404 && + + test_expect_code $GH__ERROR_CODE__HTTP_404 \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OID_ONE_BLOB_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + grep -q "error: get: (http:404)" OUT.stderr && + verify_connection_count 1 +' + +################################################################# +# Tests to confirm that gvfs-helper does silently recover when an +# HTTP request succeeds after a failure. +# +################################################################# + +test_expect_success 'successful retry after http-error: origin get' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_429_1 && + + # Connect to the origin server (w/o auth). + # Make a single-object GET request. + # Confirm that it succeeds without error. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OID_ONE_BLOB_FILE" >OUT.output && + + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received object. + # Verify that gvfs-helper received each of the requested objects. + # + sed "s/loose //" OUT.actual && + test_cmp "$OID_ONE_BLOB_FILE" OUT.actual && + + verify_objects_in_shared_cache "$OID_ONE_BLOB_FILE" && + verify_connection_count 2 +' + +################################################################# +# Test HTTP Auth +# +################################################################# + +test_expect_success 'HTTP GET Auth on Origin Server' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_401 && + + # Force server to require auth. + # Connect to the origin server without auth. + # Make a single-object GET request. + # Confirm that it gets a 401 and then retries with auth. + # + GIT_CONFIG_NOSYSTEM=1 \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OID_ONE_BLOB_FILE" >OUT.output && + + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received object. + # Verify that gvfs-helper received each of the requested objects. + # + sed "s/loose //" OUT.actual && + test_cmp "$OID_ONE_BLOB_FILE" OUT.actual && + + verify_objects_in_shared_cache "$OID_ONE_BLOB_FILE" && + verify_connection_count 2 +' + +test_expect_success 'HTTP POST Auth on Origin Server' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_401 && + + # Connect to the origin server and make multi-object POST + # request and verify that it automatically handles the 401. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" >OUT.output && + + # Stop the server to prevent the verification steps from faulting-in + # any missing objects. + # + stop_gvfs_protocol_server && + + # gvfs-helper prints a "packfile " message for each received + # packfile. We verify the number of expected packfile(s) and we + # individually verify that each requested object is present in the + # shared cache (and index-pack already verified the integrity of + # the packfile), so we do not bother to run "git verify-pack -v" + # and do an exact matchup here. + # + verify_received_packfile_count 1 && + + verify_objects_in_shared_cache "$OIDS_BLOBS_FILE" && + verify_connection_count 2 +' + +test_expect_success 'HTTP GET Auth on Cache Server' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem http_401 && + + # Try auth to cache-server. Note that gvfs-helper *ALWAYS* sends + # creds to cache-servers, so we will never see the "400 Bad Request" + # response. And we are using "trust" mode, so we only expect 1 + # connection to the server. + # + GIT_CONFIG_NOSYSTEM=1 \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=trust \ + --remote=origin \ + get \ + --max-retries=2 \ + <"$OID_ONE_BLOB_FILE" >OUT.output && + + stop_gvfs_protocol_server && + + # gvfs-helper prints a "loose " message for each received object. + # Verify that gvfs-helper received each of the requested objects. + # + sed "s/loose //" OUT.actual && + test_cmp "$OID_ONE_BLOB_FILE" OUT.actual && + + verify_objects_in_shared_cache "$OID_ONE_BLOB_FILE" && + verify_connection_count 1 +' + +################################################################# +# Integration tests with Git.exe +# +# Now that we have confirmed that gvfs-helper works in isolation, +# run a series of tests using random Git commands that fault-in +# objects as needed. +# +# At this point, I'm going to stop verifying the shape of the ODB +# (loose vs packfiles) and the number of connections required to +# get them. The tests from here on are to verify that objects are +# magically fetched whenever required. +################################################################# + +test_expect_success 'integration: explicit commit/trees, implicit blobs: diff 2 commits' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # We have a very empty repo. Seed it with all of the commits + # and trees. The purpose of this test is to demand-load the + # needed blobs only, so we prefetch the commits and trees. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + get \ + <"$OIDS_CT_FILE" >OUT.output && + + # Confirm that we do not have the blobs locally. + # With gvfs-helper turned off, we should fail. + # + test_must_fail \ + git -C "$REPO_T1" -c core.useGVFSHelper=false \ + diff $(cat m1.branch)..$(cat m3.branch) \ + >OUT.output 2>OUT.stderr && + + # Turn on gvfs-helper and retry. This should implicitly fetch + # any needed blobs. + # + git -C "$REPO_T1" -c core.useGVFSHelper=true \ + diff $(cat m1.branch)..$(cat m3.branch) \ + >OUT.output 2>OUT.stderr && + + # Verify that gvfs-helper wrote the fetched the blobs to the + # local ODB, such that a second attempt with gvfs-helper + # turned off should succeed. + # + git -C "$REPO_T1" -c core.useGVFSHelper=false \ + diff $(cat m1.branch)..$(cat m3.branch) \ + >OUT.output 2>OUT.stderr +' + +test_expect_success 'integration: fully implicit: diff 2 commits' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + # Implicitly demand-load everything without any pre-seeding. + # + git -C "$REPO_T1" -c core.useGVFSHelper=true \ + diff $(cat m1.branch)..$(cat m3.branch) \ + >OUT.output 2>OUT.stderr +' + +################################################################# +# Duplicate packfile tests. +# +# If we request a fixed set of blobs, we should get a unique packfile +# of the form "vfs-.{pack,idx}". It we request that same set +# again, the server should create and send the exact same packfile. +# True web servers might build the custom packfile in random order, +# but our test web server should give us consistent results. +# +# Verify that we can handle the duplicate pack and idx file properly. +################################################################# + +test_expect_success 'duplicate: vfs- packfile' ' + test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server && + + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" >OUT.output 2>OUT.stderr && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && + + # Re-fetch the same packfile. We do not care if it replaces + # first one or if it silently fails to overwrite the existing + # one. We just confirm that afterwards we only have 1 packfile. + # + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" >OUT.output 2>OUT.stderr && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && + + stop_gvfs_protocol_server +' + +# Return the absolute pathname of the first received packfile. +# +first_received_packfile_pathname () { + fn=$(sed -n '/^packfile/p' OUT.output \ + 2>OUT.stderr && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && + + # Re-fetch the same packfile, but hold the existing packfile + # open for writing on an obscure (and randomly-chosen) file + # descriptor. + # + # This should cause the replacement-install to fail (at least + # on Windows) with an EBUSY or EPERM or something. + # + # Verify that that error is eaten. We do not care if the + # replacement is retried or if gvfs-helper simply discards the + # second instance. We just confirm that afterwards we only + # have 1 packfile on disk and that the command "lies" and reports + # that it created the existing packfile. (We want the lie because + # in normal usage, gh-client has already built the packed-git list + # in memory and is using gvfs-helper to fetch missing objects; + # gh-client does not care who does the fetch, but it needs to + # update its packed-git list and restart the object lookup.) + # + PACK=$(first_received_packfile_pathname) && + git -C "$REPO_T1" gvfs-helper \ + --cache-server=disable \ + --remote=origin \ + --no-progress \ + post \ + <"$OIDS_BLOBS_FILE" \ + >OUT.output \ + 2>OUT.stderr \ + 9>>"$PACK" && + verify_received_packfile_count 1 && + verify_vfs_packfile_count 1 && + + stop_gvfs_protocol_server +' + +################################################################# +# Ensure that the SHA of the blob we received matches the SHA of +# the blob we requested. +################################################################# + +# Request a loose blob from the server. Verify that we received +# content matches the requested SHA. +# +test_expect_success 'catch corrupted loose object' ' +# test_when_finished "per_test_cleanup" && + start_gvfs_protocol_server_with_mayhem corrupt_loose && + + test_must_fail \ + git -C "$REPO_T1" gvfs-helper \ + --cache-server=trust \ + --remote=origin \ + get \ + <"$OID_ONE_BLOB_FILE" >OUT.output 2>OUT.stderr && + + stop_gvfs_protocol_server && + + # Verify corruption detected. + # Verify valid blob not included in response to client. + + grep "hash failed for received loose object" OUT.stderr && + + # Verify that we did not write the corrupted blob to the ODB. + + ! verify_objects_in_shared_cache "$OID_ONE_BLOB_FILE" && + git -C "$REPO_T1" fsck +' + +test_done diff --git a/t/t7108-reset-stdin.sh b/t/t7108-reset-stdin.sh index b7cbcbf869296c..db5483b8f10052 100755 --- a/t/t7108-reset-stdin.sh +++ b/t/t7108-reset-stdin.sh @@ -29,4 +29,13 @@ test_expect_success '--stdin requires --mixed' ' git reset --mixed --stdin list && + git reset --stdin marker + EOF + + : make sure -changed is called if -change does not exist && + test_when_finished "echo testing >dir1/file2.txt && git status" && + echo changed >dir1/file2.txt && + : force index to be dirty && + test-tool chmtime -60 .git/index && + git status && + test_path_is_file marker && + + test_when_finished "rm -f .git/hooks/post-index-change marker2" && + write_script .git/hooks/post-index-change <<-\EOF && + : >marker2 + EOF + + : make sure -changed is not called if -change exists && + rm -f marker marker2 && + echo testing >dir1/file2.txt && + : force index to be dirty && + test-tool chmtime -60 .git/index && + git status && + test_path_is_missing marker && + test_path_is_file marker2 +' + test_expect_success 'test status, add, commit, others trigger hook without flags set' ' test_hook post-index-change <<-\EOF && if test "$1" -eq 1; then diff --git a/t/t7519/fsmonitor-watchman b/t/t7519/fsmonitor-watchman index 264b9daf834ec8..6461f625f64181 100755 --- a/t/t7519/fsmonitor-watchman +++ b/t/t7519/fsmonitor-watchman @@ -17,7 +17,6 @@ use IPC::Open2; # 'git config core.fsmonitor .git/hooks/query-watchman' # my ($version, $time) = @ARGV; -#print STDERR "$0 $version $time\n"; # Check the hook interface version @@ -44,7 +43,7 @@ launch_watchman(); sub launch_watchman { - my $pid = open2(\*CHLD_OUT, \*CHLD_IN, 'watchman -j') + my $pid = open2(\*CHLD_OUT, \*CHLD_IN, 'watchman -j --no-pretty') or die "open2() failed: $!\n" . "Falling back to scanning...\n"; @@ -62,19 +61,11 @@ sub launch_watchman { "fields": ["name"] }] END - - open (my $fh, ">", ".git/watchman-query.json"); - print $fh $query; - close $fh; print CHLD_IN $query; close CHLD_IN; my $response = do {local $/; }; - open ($fh, ">", ".git/watchman-response.json"); - print $fh $response; - close $fh; - die "Watchman: command returned no output.\n" . "Falling back to scanning...\n" if $response eq ""; die "Watchman: command returned invalid output: $response\n" . @@ -93,7 +84,6 @@ sub launch_watchman { my $o = $json_pkg->new->utf8->decode($response); if ($retry > 0 and $o->{error} and $o->{error} =~ m/unable to resolve root .* directory (.*) is not watched/) { - print STDERR "Adding '$git_work_tree' to watchman's watch list.\n"; $retry--; qx/watchman watch "$git_work_tree"/; die "Failed to make watchman watch '$git_work_tree'.\n" . @@ -103,11 +93,6 @@ sub launch_watchman { # return the fast "everything is dirty" flag to git and do the # Watchman query just to get it over with now so we won't pay # the cost in git to look up each individual file. - - open ($fh, ">", ".git/watchman-output.out"); - print "/\0"; - close $fh; - print "/\0"; eval { launch_watchman() }; exit 0; @@ -116,11 +101,6 @@ sub launch_watchman { die "Watchman: $o->{error}.\n" . "Falling back to scanning...\n" if $o->{error}; - open ($fh, ">", ".git/watchman-output.out"); - binmode $fh, ":utf8"; - print $fh @{$o->{files}}; - close $fh; - binmode STDOUT, ":utf8"; local $, = "\0"; print @{$o->{files}}; diff --git a/t/t7519/fsmonitor-watchman-debug b/t/t7519/fsmonitor-watchman-debug new file mode 100755 index 00000000000000..d8e7a1e5ba85c0 --- /dev/null +++ b/t/t7519/fsmonitor-watchman-debug @@ -0,0 +1,128 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use IPC::Open2; + +# An example hook script to integrate Watchman +# (https://facebook.github.io/watchman/) with git to speed up detecting +# new and modified files. +# +# The hook is passed a version (currently 1) and a time in nanoseconds +# formatted as a string and outputs to stdout all files that have been +# modified since the given time. Paths must be relative to the root of +# the working tree and separated by a single NUL. +# +# To enable this hook, rename this file to "query-watchman" and set +# 'git config core.fsmonitor .git/hooks/query-watchman' +# +my ($version, $time) = @ARGV; +#print STDERR "$0 $version $time\n"; + +# Check the hook interface version + +if ($version == 1) { + # convert nanoseconds to seconds + # subtract one second to make sure watchman will return all changes + $time = int ($time / 1000000000) - 1; +} else { + die "Unsupported query-fsmonitor hook version '$version'.\n" . + "Falling back to scanning...\n"; +} + +my $git_work_tree; +if ($^O =~ 'msys' || $^O =~ 'cygwin') { + $git_work_tree = Win32::GetCwd(); + $git_work_tree =~ tr/\\/\//; +} else { + require Cwd; + $git_work_tree = Cwd::cwd(); +} + +my $retry = 1; + +launch_watchman(); + +sub launch_watchman { + + my $pid = open2(\*CHLD_OUT, \*CHLD_IN, 'watchman -j') + or die "open2() failed: $!\n" . + "Falling back to scanning...\n"; + + # In the query expression below we're asking for names of files that + # changed since $time but were not transient (ie created after + # $time but no longer exist). + # + # To accomplish this, we're using the "since" generator to use the + # recency index to select candidate nodes and "fields" to limit the + # output to file names only. + + my $query = <<" END"; + ["query", "$git_work_tree", { + "since": $time, + "fields": ["name"] + }] + END + + open (my $fh, ">", ".git/watchman-query.json"); + print $fh $query; + close $fh; + + print CHLD_IN $query; + close CHLD_IN; + my $response = do {local $/; }; + + open ($fh, ">", ".git/watchman-response.json"); + print $fh $response; + close $fh; + + die "Watchman: command returned no output.\n" . + "Falling back to scanning...\n" if $response eq ""; + die "Watchman: command returned invalid output: $response\n" . + "Falling back to scanning...\n" unless $response =~ /^\{/; + + my $json_pkg; + eval { + require JSON::XS; + $json_pkg = "JSON::XS"; + 1; + } or do { + require JSON::PP; + $json_pkg = "JSON::PP"; + }; + + my $o = $json_pkg->new->utf8->decode($response); + + if ($retry > 0 and $o->{error} and $o->{error} =~ m/unable to resolve root .* directory (.*) is not watched/) { + print STDERR "Adding '$git_work_tree' to watchman's watch list.\n"; + $retry--; + qx/watchman watch "$git_work_tree"/; + die "Failed to make watchman watch '$git_work_tree'.\n" . + "Falling back to scanning...\n" if $? != 0; + + # Watchman will always return all files on the first query so + # return the fast "everything is dirty" flag to git and do the + # Watchman query just to get it over with now so we won't pay + # the cost in git to look up each individual file. + + open ($fh, ">", ".git/watchman-output.out"); + print "/\0"; + close $fh; + + print "/\0"; + eval { launch_watchman() }; + exit 0; + } + + die "Watchman: $o->{error}.\n" . + "Falling back to scanning...\n" if $o->{error}; + + open ($fh, ">", ".git/watchman-output.out"); + binmode $fh, ":utf8"; + print $fh @{$o->{files}}; + close $fh; + + binmode STDOUT, ":utf8"; + local $, = "\0"; + print @{$o->{files}}; +} diff --git a/t/t7522-serialized-status.sh b/t/t7522-serialized-status.sh new file mode 100755 index 00000000000000..230e1e24cfc1c4 --- /dev/null +++ b/t/t7522-serialized-status.sh @@ -0,0 +1,458 @@ +#!/bin/sh + +test_description='git serialized status tests' + +. ./test-lib.sh + +# This file includes tests for serializing / deserializing +# status data. These tests cover two basic features: +# +# [1] Because users can request different types of untracked-file +# and ignored file reporting, the cache data generated by +# serialize must use either the same untracked and ignored +# parameters as the later deserialize invocation; otherwise, +# the deserialize invocation must disregard the cached data +# and run a full scan itself. +# +# To increase the number of cases where the cached status can +# be used, we have added a "--untracked-file=complete" option +# that reports a superset or union of the results from the +# "-u normal" and "-u all". We combine this with a filter in +# deserialize to filter the results. +# +# Ignored file reporting is simpler in that is an all or +# nothing; there are no subsets. +# +# The tests here (in addition to confirming that a cache +# file can be generated and used by a subsequent status +# command) need to test this untracked-file filtering. +# +# [2] ensuring the status calls are using data from the status +# cache as expected. This includes verifying cached data +# is used when appropriate as well as falling back to +# performing a new status scan when the data in the cache +# is insufficient/known stale. + +test_expect_success 'setup' ' + git branch -M main && + cat >.gitignore <<-\EOF && + *.ign + ignored_dir/ + EOF + + mkdir tracked ignored_dir && + touch tracked_1.txt tracked/tracked_1.txt && + git add . && + test_tick && + git commit -m"Adding original file." && + mkdir untracked && + touch ignored.ign ignored_dir/ignored_2.txt \ + untracked_1.txt untracked/untracked_2.txt untracked/untracked_3.txt && + + test_oid_cache <<-EOF + branch_oid sha1:68d4a437ea4c2de65800f48c053d4d543b55c410 + x_base sha1:587be6b4c3f93f93c489c0111bba5596147a26cb + x_ours sha1:b68025345d5301abad4d9ec9166f455243a0d746 + x_theirs sha1:975fbec8256d3e8a3797e7a3611380f27c49f4ac + + branch_oid sha256:6b95e4b1ea911dad213f2020840f5e92d3066cf9e38cf35f79412ec58d409ce4 + x_base sha256:14f5162e2fe3d240d0d37aaab0f90e4af9a7cfa79639f3bab005b5bfb4174d9f + x_ours sha256:3a404ba030a4afa912155c476a48a253d4b3a43d0098431b6d6ca6e554bd78fb + x_theirs sha256:44dc634218adec09e34f37839b3840bad8c6103693e9216626b32d00e093fa35 + EOF +' + +test_expect_success 'verify untracked-files=complete with no conversion' ' + test_when_finished "rm serialized_status.dat new_change.txt output" && + cat >expect <<-\EOF && + ? expect + ? serialized_status.dat + ? untracked/ + ? untracked/untracked_2.txt + ? untracked/untracked_3.txt + ? untracked_1.txt + ! ignored.ign + ! ignored_dir/ + EOF + + git status --untracked-files=complete --ignored=matching --serialize >serialized_status.dat && + touch new_change.txt && + + git status --porcelain=v2 --untracked-files=complete --ignored=matching --deserialize=serialized_status.dat >output && + test_cmp expect output +' + +test_expect_success 'verify untracked-files=complete to untracked-files=normal conversion' ' + test_when_finished "rm serialized_status.dat new_change.txt output" && + cat >expect <<-\EOF && + ? expect + ? serialized_status.dat + ? untracked/ + ? untracked_1.txt + EOF + + git status --untracked-files=complete --ignored=matching --serialize >serialized_status.dat && + touch new_change.txt && + + git status --porcelain=v2 --deserialize=serialized_status.dat >output && + test_cmp expect output +' + +test_expect_success 'verify untracked-files=complete to untracked-files=all conversion' ' + test_when_finished "rm serialized_status.dat new_change.txt output" && + cat >expect <<-\EOF && + ? expect + ? serialized_status.dat + ? untracked/untracked_2.txt + ? untracked/untracked_3.txt + ? untracked_1.txt + ! ignored.ign + ! ignored_dir/ + EOF + + git status --untracked-files=complete --ignored=matching --serialize >serialized_status.dat && + touch new_change.txt && + + git status --porcelain=v2 --untracked-files=all --ignored=matching --deserialize=serialized_status.dat >output && + test_cmp expect output +' + +test_expect_success 'verify serialized status with non-convertible ignore mode does new scan' ' + test_when_finished "rm serialized_status.dat new_change.txt output" && + cat >expect <<-\EOF && + ? expect + ? new_change.txt + ? output + ? serialized_status.dat + ? untracked/ + ? untracked_1.txt + ! ignored.ign + ! ignored_dir/ + EOF + + git status --untracked-files=complete --ignored=matching --serialize >serialized_status.dat && + touch new_change.txt && + + git status --porcelain=v2 --ignored --deserialize=serialized_status.dat >output && + test_cmp expect output +' + +test_expect_success 'verify serialized status handles path scopes' ' + test_when_finished "rm serialized_status.dat new_change.txt output" && + cat >expect <<-\EOF && + ? untracked/ + EOF + + git status --untracked-files=complete --ignored=matching --serialize >serialized_status.dat && + touch new_change.txt && + + git status --porcelain=v2 --deserialize=serialized_status.dat untracked >output && + test_cmp expect output +' + +test_expect_success 'verify no-ahead-behind and serialized status integration' ' + test_when_finished "rm serialized_status.dat new_change.txt output" && + cat >expect <<-EOF && + # branch.oid $(test_oid branch_oid) + # branch.head alt_branch + # branch.upstream main + # branch.ab +1 -0 + ? expect + ? serialized_status.dat + ? untracked/ + ? untracked_1.txt + EOF + + git checkout -b alt_branch main --track >/dev/null && + touch alt_branch_changes.txt && + git add alt_branch_changes.txt && + test_tick && + git commit -m"New commit on alt branch" && + + git status --untracked-files=complete --ignored=matching --serialize >serialized_status.dat && + touch new_change.txt && + + git -c status.aheadBehind=false status --porcelain=v2 --branch --ahead-behind --deserialize=serialized_status.dat >output && + test_cmp expect output +' + +test_expect_success 'verify new --serialize=path mode' ' + test_when_finished "rm serialized_status.dat expect new_change.txt output.1 output.2" && + cat >expect <<-\EOF && + ? expect + ? output.1 + ? untracked/ + ? untracked_1.txt + EOF + + git checkout -b serialize_path_branch main --track >/dev/null && + touch alt_branch_changes.txt && + git add alt_branch_changes.txt && + test_tick && + git commit -m"New commit on serialize_path_branch" && + + git status --porcelain=v2 --serialize=serialized_status.dat >output.1 && + touch new_change.txt && + + git status --porcelain=v2 --deserialize=serialized_status.dat >output.2 && + test_cmp expect output.1 && + test_cmp expect output.2 +' + +test_expect_success 'try deserialize-wait feature' ' + test_when_finished "rm -f serialized_status.dat dirt expect.* output.* trace.*" && + + git status --serialize=serialized_status.dat >output.1 && + + # make status cache stale by updating the mtime on the index. confirm that + # deserialize fails when requested. + sleep 1 && + touch .git/index && + test_must_fail git status --deserialize=serialized_status.dat --deserialize-wait=fail && + test_must_fail git -c status.deserializeWait=fail status --deserialize=serialized_status.dat && + + cat >expect.1 <<-\EOF && + ? expect.1 + ? output.1 + ? serialized_status.dat + ? untracked/ + ? untracked_1.txt + EOF + + # refresh the status cache. + git status --porcelain=v2 --serialize=serialized_status.dat >output.1 && + test_cmp expect.1 output.1 && + + # create some dirt. confirm deserialize used the existing status cache. + echo x >dirt && + git status --porcelain=v2 --deserialize=serialized_status.dat >output.2 && + test_cmp output.1 output.2 && + + # make the cache stale and try the timeout feature and wait upto + # 2 tenths of a second. confirm deserialize timed out and rejected + # the status cache and did a normal scan. + + cat >expect.2 <<-\EOF && + ? dirt + ? expect.1 + ? expect.2 + ? output.1 + ? output.2 + ? serialized_status.dat + ? trace.2 + ? untracked/ + ? untracked_1.txt + EOF + + sleep 1 && + touch .git/index && + GIT_TRACE_DESERIALIZE=1 git status --porcelain=v2 --deserialize=serialized_status.dat --deserialize-wait=2 >output.2 2>trace.2 && + test_cmp expect.2 output.2 && + grep "wait polled=2 result=1" trace.2 >trace.2g +' + +test_expect_success 'merge conflicts' ' + + # create a merge conflict. + + git init -b main conflicts && + echo x >conflicts/x.txt && + git -C conflicts add x.txt && + git -C conflicts commit -m x && + git -C conflicts branch a && + git -C conflicts branch b && + git -C conflicts checkout a && + echo y >conflicts/x.txt && + git -C conflicts add x.txt && + git -C conflicts commit -m a && + git -C conflicts checkout b && + echo z >conflicts/x.txt && + git -C conflicts add x.txt && + git -C conflicts commit -m b && + test_must_fail git -C conflicts merge --no-commit a && + + # verify that regular status correctly identifies it + # in each format. + + cat >expect.v2 <observed.v2 && + test_cmp expect.v2 observed.v2 && + + cat >expect.long <..." to mark resolution) + both modified: x.txt + +no changes added to commit (use "git add" and/or "git commit -a") +EOF + git -C conflicts status --long >observed.long && + test_cmp expect.long observed.long && + + cat >expect.short <observed.short && + test_cmp expect.short observed.short && + + # save status data in serialized cache. + + git -C conflicts status --serialize >serialized && + + # make some dirt in the worktree so we can tell whether subsequent + # status commands used the cached data or did a fresh status. + + echo dirt >conflicts/dirt.txt && + + # run status using the cached data. + + git -C conflicts status --long --deserialize=../serialized >observed.long && + test_cmp expect.long observed.long && + + git -C conflicts status --short --deserialize=../serialized >observed.short && + test_cmp expect.short observed.short && + + # currently, the cached data does not have enough information about + # merge conflicts for porcelain V2 format. (And V2 format looks at + # the index to get that data, but the whole point of the serialization + # is to avoid reading the index unnecessarily.) So V2 always rejects + # the cached data when there is an unresolved conflict. + + cat >expect.v2.dirty <observed.v2 && + test_cmp expect.v2.dirty observed.v2 + +' + +test_expect_success 'renames' ' + git init -b main rename_test && + echo OLDNAME >rename_test/OLDNAME && + git -C rename_test add OLDNAME && + git -C rename_test commit -m OLDNAME && + git -C rename_test mv OLDNAME NEWNAME && + git -C rename_test status --serialize=renamed.dat >output.1 && + echo DIRT >rename_test/DIRT && + git -C rename_test status --deserialize=renamed.dat >output.2 && + test_cmp output.1 output.2 +' + +test_expect_success 'hint message when cached with u=complete' ' + git init -b main hint && + echo xxx >hint/xxx && + git -C hint add xxx && + git -C hint commit -m xxx && + + cat >expect.clean <expect.use_u <hint.output_normal && + test_cmp expect.clean hint.output_normal && + + git -C hint status --untracked-files=all >hint.output_all && + test_cmp expect.clean hint.output_all && + + git -C hint status --untracked-files=no >hint.output_no && + test_cmp expect.use_u hint.output_no && + + # Create long format output for "complete" and create status cache. + + git -C hint status --untracked-files=complete --ignored=matching --serialize=../hint.dat >hint.output_complete && + test_cmp expect.clean hint.output_complete && + + # Capture long format output using the status cache and verify + # that the output matches the non-cached version. There are 2 + # ways to specify untracked-files, so do them both. + + git -C hint status --deserialize=../hint.dat -unormal >hint.d1_normal && + test_cmp expect.clean hint.d1_normal && + git -C hint -c status.showuntrackedfiles=normal status --deserialize=../hint.dat >hint.d2_normal && + test_cmp expect.clean hint.d2_normal && + + git -C hint status --deserialize=../hint.dat -uall >hint.d1_all && + test_cmp expect.clean hint.d1_all && + git -C hint -c status.showuntrackedfiles=all status --deserialize=../hint.dat >hint.d2_all && + test_cmp expect.clean hint.d2_all && + + git -C hint status --deserialize=../hint.dat -uno >hint.d1_no && + test_cmp expect.use_u hint.d1_no && + git -C hint -c status.showuntrackedfiles=no status --deserialize=../hint.dat >hint.d2_no && + test_cmp expect.use_u hint.d2_no + +' + +test_expect_success 'ensure deserialize -v does not crash' ' + + git init -b main verbose_test && + touch verbose_test/a && + touch verbose_test/b && + touch verbose_test/c && + git -C verbose_test add a b c && + git -C verbose_test commit -m abc && + + echo green >>verbose_test/a && + git -C verbose_test add a && + echo red_1 >>verbose_test/b && + echo red_2 >verbose_test/dirt && + + git -C verbose_test status >output.ref && + git -C verbose_test status -v >output.ref_v && + + git -C verbose_test --no-optional-locks status --serialize=../verbose_test.dat >output.ser.long && + git -C verbose_test --no-optional-locks status --serialize=../verbose_test.dat_v -v >output.ser.long_v && + + # Verify that serialization does not affect the status output itself. + test_cmp output.ref output.ser.long && + test_cmp output.ref_v output.ser.long_v && + + GIT_TRACE2_PERF="$(pwd)"/verbose_test.log \ + git -C verbose_test status --deserialize=../verbose_test.dat >output.des.long && + + # Verify that normal deserialize was actually used and produces the same result. + test_cmp output.ser.long output.des.long && + grep -q "deserialize/result:ok" verbose_test.log && + + GIT_TRACE2_PERF="$(pwd)"/verbose_test.log_v \ + git -C verbose_test status --deserialize=../verbose_test.dat_v -v >output.des.long_v && + + # Verify that vebose mode produces the same result because verbose was rejected. + test_cmp output.ser.long_v output.des.long_v && + grep -q "deserialize/reject:args/verbose" verbose_test.log_v +' + +test_expect_success 'fallback when implicit' ' + git init -b main implicit_fallback_test && + git -C implicit_fallback_test -c status.deserializepath=foobar status +' + +test_expect_success 'fallback when explicit' ' + git init -b main explicit_fallback_test && + git -C explicit_fallback_test status --deserialize=foobar +' + +test_expect_success 'deserialize from stdin' ' + git init -b main stdin_test && + git -C stdin_test status --serialize >serialized_status.dat && + cat serialize_status.dat | git -C stdin_test status --deserialize +' + +test_done diff --git a/t/t7523-status-complete-untracked.sh b/t/t7523-status-complete-untracked.sh new file mode 100755 index 00000000000000..f79611fc024f48 --- /dev/null +++ b/t/t7523-status-complete-untracked.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +test_description='git status untracked complete tests' + +. ./test-lib.sh + +test_expect_success 'setup' ' + cat >.gitignore <<-\EOF && + *.ign + ignored_dir/ + EOF + + mkdir tracked ignored_dir && + touch tracked_1.txt tracked/tracked_1.txt && + git add . && + test_tick && + git commit -m"Adding original file." && + mkdir untracked && + touch ignored.ign ignored_dir/ignored_2.txt \ + untracked_1.txt untracked/untracked_2.txt untracked/untracked_3.txt +' + +test_expect_success 'verify untracked-files=complete' ' + cat >expect <<-\EOF && + ? expect + ? output + ? untracked/ + ? untracked/untracked_2.txt + ? untracked/untracked_3.txt + ? untracked_1.txt + ! ignored.ign + ! ignored_dir/ + EOF + + git status --porcelain=v2 --untracked-files=complete --ignored >output && + test_cmp expect output +' + +test_done diff --git a/t/t7615-merge-sparse-checkout.sh b/t/t7615-merge-sparse-checkout.sh new file mode 100755 index 00000000000000..5ce12431f62ad1 --- /dev/null +++ b/t/t7615-merge-sparse-checkout.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +test_description='merge can handle sparse-checkout' + +. ./test-lib.sh + +# merges with conflicts + +test_expect_success 'setup' ' + git branch -M main && + test_commit a && + test_commit file && + git checkout -b delete-file && + git rm file.t && + test_tick && + git commit -m "remove file" && + git checkout main && + test_commit modify file.t changed +' + +test_expect_success 'merge conflict deleted file and modified' ' + echo "/a.t" >.git/info/sparse-checkout && + test_config core.sparsecheckout true && + git checkout -f && + test_path_is_missing file.t && + test_must_fail git merge delete-file && + test_path_is_file file.t && + test "changed" = "$(cat file.t)" +' + +test_done diff --git a/t/t7817-grep-sparse-checkout.sh b/t/t7817-grep-sparse-checkout.sh index eb595645657fad..db3004c4fe71c0 100755 --- a/t/t7817-grep-sparse-checkout.sh +++ b/t/t7817-grep-sparse-checkout.sh @@ -49,7 +49,7 @@ test_expect_success 'setup' ' echo "text" >B/b && git add A B && git commit -m sub && - git sparse-checkout init --cone && + git sparse-checkout init --cone --no-sparse-index && git sparse-checkout set B ) && diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 2724a44fe3ef24..b2e44a9640e681 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -54,6 +54,23 @@ test_expect_success 'run [--auto|--quiet]' ' test_subcommand git gc --no-quiet err && + grep "lock file .* exists, skipping maintenance" err && + + test-tool chmtime =-22000 .git/objects/maintenance.lock && + git maintenance run --schedule=hourly --no-quiet 2>err && + grep "deleted stale lock file" err && + test_path_is_missing .git/objects/maintenance.lock && + + git maintenance run --schedule=hourly 2>err && + test_must_be_empty err +' + test_expect_success 'maintenance.auto config option' ' GIT_TRACE2_EVENT="$(pwd)/default" git commit --quiet --allow-empty -m 1 && test_subcommand git maintenance run --auto --quiet actual && - test_cmp before actual + test_cmp before actual && + + # Expect unregister to be idempotent. + git maintenance unregister ' test_expect_success !MINGW 'register and unregister with regex metacharacters' ' diff --git a/t/t9210-scalar.sh b/t/t9210-scalar.sh index 14ca575a214f3b..8414cfc02496ff 100755 --- a/t/t9210-scalar.sh +++ b/t/t9210-scalar.sh @@ -7,6 +7,13 @@ test_description='test the `scalar` command' GIT_TEST_MAINT_SCHEDULER="crontab:test-tool crontab cron.txt,launchctl:true,schtasks:true" export GIT_TEST_MAINT_SCHEDULER +# Do not write any files outside the trash directory +Scalar_UNATTENDED=1 +export Scalar_UNATTENDED + +GIT_ASKPASS=true +export GIT_ASKPASS + test_expect_success 'scalar shows a usage' ' test_expect_code 129 scalar -h ' @@ -207,4 +214,157 @@ test_expect_success UNZIP 'scalar diagnose' ' grep "^Total: [1-9]" out ' +GIT_TEST_ALLOW_GVFS_VIA_HTTP=1 +export GIT_TEST_ALLOW_GVFS_VIA_HTTP + +test_set_port GIT_TEST_GVFS_PROTOCOL_PORT +HOST_PORT=127.0.0.1:$GIT_TEST_GVFS_PROTOCOL_PORT +PID_FILE="$(pwd)"/pid-file.pid +SERVER_LOG="$(pwd)"/OUT.server.log + +test_atexit ' + test -f "$PID_FILE" || return 0 + + # The server will shutdown automatically when we delete the pid-file. + rm -f "$PID_FILE" + + test -z "$verbose$verbose_log" || { + echo "server log:" + cat "$SERVER_LOG" + } + + # Give it a few seconds to shutdown (mainly to completely release the + # port before the next test start another instance and it attempts to + # bind to it). + for k in $(test_seq 5) + do + grep -q "Starting graceful shutdown" "$SERVER_LOG" && + return 0 || + sleep 1 + done + + echo "stop_gvfs_protocol_server: timeout waiting for server shutdown" + return 1 +' + +start_gvfs_enabled_http_server () { + GIT_HTTP_EXPORT_ALL=1 \ + test-gvfs-protocol --verbose \ + --listen=127.0.0.1 \ + --port=$GIT_TEST_GVFS_PROTOCOL_PORT \ + --reuseaddr \ + --pid-file="$PID_FILE" \ + 2>"$SERVER_LOG" & + + for k in 0 1 2 3 4 + do + if test -f "$PID_FILE" + then + return 0 + fi + sleep 1 + done + return 1 +} + +test_expect_success 'start GVFS-enabled server' ' + git config uploadPack.allowFilter false && + git config uploadPack.allowAnySHA1InWant false && + start_gvfs_enabled_http_server +' + +test_expect_success '`scalar clone` with GVFS-enabled server' ' + : the fake cache server requires fake authentication && + git config --global core.askPass true && + scalar clone --single-branch -- http://$HOST_PORT/ using-gvfs && + + : verify that the shared cache has been configured && + cache_key="url_$(printf "%s" http://$HOST_PORT/ | + tr A-Z a-z | + test-tool sha1)" && + echo "$(pwd)/using-gvfs/.scalarCache/$cache_key" >expect && + git -C using-gvfs/src config gvfs.sharedCache >actual && + test_cmp expect actual && + + second=$(git rev-parse --verify second:second.t) && + ( + cd using-gvfs/src && + test_path_is_missing 1/2 && + GIT_TRACE=$PWD/trace.txt git cat-file blob $second >actual && + : verify that the gvfs-helper was invoked to fetch it && + test_i18ngrep gvfs-helper trace.txt && + echo "second" >expect && + test_cmp expect actual + ) +' + +test_expect_success '`scalar register` parallel to worktree is unsupported' ' + git init test-repo/src && + mkdir -p test-repo/out && + + : parallel to worktree is unsupported && + test_must_fail env GIT_CEILING_DIRECTORIES="$(pwd)" \ + scalar register test-repo/out && + test_must_fail git config --get --global --fixed-value \ + maintenance.repo "$(pwd)/test-repo/src" && + scalar list >scalar.repos && + ! grep -F "$(pwd)/test-repo/src" scalar.repos && + + : at enlistment root, i.e. parent of repository, is supported && + GIT_CEILING_DIRECTORIES="$(pwd)" scalar register test-repo && + git config --get --global --fixed-value \ + maintenance.repo "$(pwd)/test-repo/src" && + scalar list >scalar.repos && + grep -F "$(pwd)/test-repo/src" scalar.repos && + + : scalar delete properly unregisters enlistment && + scalar delete test-repo && + test_must_fail git config --get --global --fixed-value \ + maintenance.repo "$(pwd)/test-repo/src" && + scalar list >scalar.repos && + ! grep -F "$(pwd)/test-repo/src" scalar.repos +' + +test_expect_success '`scalar register` & `unregister` with existing repo' ' + git init existing && + scalar register existing && + git config --get --global --fixed-value \ + maintenance.repo "$(pwd)/existing" && + scalar list >scalar.repos && + grep -F "$(pwd)/existing" scalar.repos && + scalar unregister existing && + test_must_fail git config --get --global --fixed-value \ + maintenance.repo "$(pwd)/existing" && + scalar list >scalar.repos && + ! grep -F "$(pwd)/existing" scalar.repos +' + +test_expect_success '`scalar unregister` with existing repo, deleted .git' ' + scalar register existing && + rm -rf existing/.git && + scalar unregister existing && + test_must_fail git config --get --global --fixed-value \ + maintenance.repo "$(pwd)/existing" && + scalar list >scalar.repos && + ! grep -F "$(pwd)/existing" scalar.repos +' + +test_expect_success '`scalar register` existing repo with `src` folder' ' + git init existing && + mkdir -p existing/src && + scalar register existing/src && + scalar list >scalar.repos && + grep -F "$(pwd)/existing" scalar.repos && + scalar unregister existing && + scalar list >scalar.repos && + ! grep -F "$(pwd)/existing" scalar.repos +' + +test_expect_success '`scalar delete` with existing repo' ' + git init existing && + scalar register existing && + scalar delete existing && + test_path_is_missing existing +' + test_done diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index 1c77050849e8ca..a7b5055588bf9c 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -1020,7 +1020,7 @@ test_must_fail_acceptable () { fi case "$1" in - git|__git*|test-tool|test_terminal) + git|__git*|scalar|test-tool|test_terminal) return 0 ;; *) diff --git a/trace2/tr2_tgt_event.c b/trace2/tr2_tgt_event.c index 37a3163be12110..11efe4fe6a0861 100644 --- a/trace2/tr2_tgt_event.c +++ b/trace2/tr2_tgt_event.c @@ -35,7 +35,7 @@ static struct tr2_dst tr2dst_event = { * event target. Use the TR2_SYSENV_EVENT_NESTING setting to increase * region details in the event target. */ -static int tr2env_event_max_nesting_levels = 2; +static int tr2env_event_max_nesting_levels = 4; /* * Use the TR2_SYSENV_EVENT_BRIEF to omit the