diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 721e2c63221..0b898836157 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -32,6 +32,7 @@ d6ab15362548b8fe270bd14d5153b8d94e1b15c0 b12cf444edea15da6274975e1b2ca6a7fce2a090 364c27f5d18ab9dd31825e67a93efabecad06823 d8b4de9076531dd13bdffa20cc10c72290a52356 +bdf06bca7534fbc0c4fc3cee3408a51a22615226 # ocp-indent d018d26d6acd4707a23288b327b49e44f732725e diff --git a/.github/workflows/1.249-lcm.yml b/.github/workflows/1.249-lcm.yml index 8057b255a92..8ba69e28ec2 100644 --- a/.github/workflows/1.249-lcm.yml +++ b/.github/workflows/1.249-lcm.yml @@ -10,7 +10,7 @@ on: jobs: python-test: name: Python tests - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 permissions: contents: read strategy: @@ -28,7 +28,7 @@ jobs: ocaml-test: name: Ocaml tests - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Checkout code diff --git a/.github/workflows/codechecker.yml b/.github/workflows/codechecker.yml index da8ea12c005..8908822a16e 100644 --- a/.github/workflows/codechecker.yml +++ b/.github/workflows/codechecker.yml @@ -38,7 +38,7 @@ jobs: opam pin add -y dune-compiledb https://github.com/edwintorok/dune-compiledb/releases/download/0.6.0/dune-compiledb-0.6.0.tbz - name: Trim dune cache - run: opam exec -- dune cache trim --size=2GiB + run: opam exec -- dune cache trim --size=2GiB - name: Generate compile_commands.json run: opam exec -- make compile_commands.json @@ -73,7 +73,10 @@ jobs: name: codechecker_sarif path: codechecker.sarif - - name: Upload SARIF report - uses: github/codeql-action/upload-sarif@v3 - with: - sarif_file: codechecker.sarif + # TODO: reenable after fixing + # https://github.blog/changelog/2025-07-21-code-scanning-will-stop-combining-multiple-sarif-runs-uploaded-in-the-same-sarif-file/ + # + #- name: Upload SARIF report + # uses: github/codeql-action/upload-sarif@v3 + # with: + # sarif_file: codechecker.sarif diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 94c7c1a687e..08d381eeaae 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -31,15 +31,23 @@ jobs: - name: Update Ubuntu repositories run: sudo apt-get update + # We set DUNE_CACHE_STORAGE_MODE, it is required for dune cache to work inside opam for now, + # otherwise it gets EXDEV and considers it a cache miss - name: Use ocaml - uses: ocaml/setup-ocaml@v2 + uses: ocaml/setup-ocaml@v3 with: ocaml-compiler: ${{ steps.dotenv.outputs.ocaml_version_full }} opam-repositories: | xs-opam: ${{ steps.dotenv.outputs.repository }} + dune-cache: true + opam-pin: false + cache-prefix: v3-${{ steps.system-info.outputs.name }}-${{ steps.system-info.outputs.release }} + env: + DUNE_CACHE_STORAGE_MODE: copy - name: Install dependencies - run: opam pin list --short | xargs opam install --deps-only -v + shell: bash + run: opam install . --deps-only -v - name: Generate xapi-storage docs run: | diff --git a/.github/workflows/generate-and-build-sdks.yml b/.github/workflows/generate-and-build-sdks.yml index ca1a67a4c78..46e2457aed7 100644 --- a/.github/workflows/generate-and-build-sdks.yml +++ b/.github/workflows/generate-and-build-sdks.yml @@ -29,17 +29,6 @@ jobs: shell: bash run: opam exec -- make sdk - # sdk-ci runs some Go unit tests. - # This setting ensures that SDK date time - # tests are run on a machine that - # isn't using UTC - - name: Set Timezone to Tokyo for datetime tests - run: | - sudo timedatectl set-timezone Asia/Tokyo - - - name: Run CI for SDKs - uses: ./.github/workflows/sdk-ci - - name: Store C SDK source uses: actions/upload-artifact@v4 with: @@ -60,7 +49,13 @@ jobs: name: SDK_Source_PowerShell path: _build/install/default/share/powershell/* - - name: Store Go SDK Artifacts + - name: Store Java SDK source + uses: actions/upload-artifact@v4 + with: + name: SDK_Source_Java + path: _build/install/default/share/java/* + + - name: Store Go SDK source uses: actions/upload-artifact@v4 with: name: SDK_Artifacts_Go @@ -69,11 +64,16 @@ jobs: !_build/install/default/share/go/dune !_build/install/default/share/go/**/*_test.go - - name: Store Java SDK source - uses: actions/upload-artifact@v4 - with: - name: SDK_Source_Java - path: _build/install/default/share/java/* + # sdk-ci runs some Go unit tests. + # This setting ensures that SDK date time + # tests are run on a machine that + # isn't using UTC + - name: Set Timezone to Tokyo for datetime tests + run: | + sudo timedatectl set-timezone Asia/Tokyo + + - name: Run CI for SDKs + uses: ./.github/workflows/sdk-ci - name: Trim dune cache run: opam exec -- dune cache trim --size=2GiB @@ -188,7 +188,7 @@ jobs: - name: Build C# SDK shell: pwsh run: | - dotnet build source/src ` + dotnet build source/src/XenServer.csproj ` --disable-build-servers ` --configuration Release ` -p:Version=${{ env.XAPI_VERSION_NUMBER }}-prerelease-unsigned ` @@ -200,86 +200,12 @@ jobs: name: SDK_Binaries_CSharp path: source/src/bin/Release/XenServer.NET.${{ env.XAPI_VERSION_NUMBER }}-prerelease-unsigned.nupkg - build-powershell-5x-sdk: - name: Build PowerShell 5.x SDK (.NET Framework 4.5) - needs: build-csharp-sdk - # PowerShell SDK for PowerShell 5.x needs to run on windows-2019 because - # windows-2022 doesn't contain .NET Framework 4.x dev tools - runs-on: windows-2019 - permissions: - contents: read - - steps: - - name: Strip 'v' prefix from xapi version - shell: pwsh - run: echo "XAPI_VERSION_NUMBER=$("${{ inputs.xapi_version }}".TrimStart('v'))" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append - - - name: Retrieve PowerShell SDK source - uses: actions/download-artifact@v4 - with: - name: SDK_Source_PowerShell - path: source/ - - - name: Retrieve C# SDK binaries - uses: actions/download-artifact@v4 - with: - name: SDK_Binaries_CSharp - path: csharp/ - - # Following needed for restoring packages - # when calling dotnet add package - - name: Set up dotnet CLI (.NET 6.0 and 8.0) - uses: actions/setup-dotnet@v4 - with: - dotnet-version: | - 6 - 8 - - - name: Setup project and dotnet CLI - shell: pwsh - run: | - dotnet nuget add source --name local ${{ github.workspace }}\csharp - dotnet add source/src package XenServer.NET --version ${{ env.XAPI_VERSION_NUMBER }}-prerelease-unsigned - - - name: Build PowerShell SDK (.NET Framework 4.5) - shell: pwsh - run: | - dotnet build source/src/XenServerPowerShell.csproj ` - --disable-build-servers ` - --configuration Release ` - -p:Version=${{ env.XAPI_VERSION_NUMBER }}-prerelease-unsigned ` - -p:TargetFramework=net45 ` - --verbosity=normal` - - - name: Update SDK and PS versions in "XenServerPSModule.psd1" - shell: pwsh - run: | - (Get-Content "source\XenServerPSModule.psd1") -replace "@SDK_VERSION@","${{ env.XAPI_VERSION_NUMBER }}" | Set-Content -Path "source\XenServerPSModule.psd1" - (Get-Content "source\XenServerPSModule.psd1") -replace "@PS_VERSION@","5.0" | Set-Content -Path "source\XenServerPSModule.psd1" - - - name: Move binaries to destination folder - shell: pwsh - run: | - New-Item -Path "." -Name "output" -ItemType "directory" - Copy-Item -Verbose "source\README_51.md" -Destination "output" -Force - Copy-Item -Verbose "source\LICENSE" -Destination "output" -Force - Copy-Item -Path "source\src\bin\Release\net45\*" -Include "*.dll" "output\" - Get-ChildItem -Path "source" |` - Where-Object { $_.Extension -eq ".ps1" -or $_.Extension -eq ".ps1xml" -or $_.Extension -eq ".psd1" -or $_.Extension -eq ".txt" } |` - ForEach-Object -Process { Copy-Item -Verbose $_.FullName -Destination "output" } - - - name: Store PowerShell SDK (.NET Framework 4.5) - uses: actions/upload-artifact@v4 - with: - name: SDK_Binaries_XenServerPowerShell_NET45 - path: output/**/* - build-powershell-7x-sdk: name: Build PowerShell 7.x SDK strategy: fail-fast: false matrix: - dotnet: ["6", "8"] + dotnet: ["8"] needs: build-csharp-sdk runs-on: windows-2022 permissions: diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 92f5101d189..e4971839e16 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -21,7 +21,17 @@ concurrency: # On new push, cancel old workflows from the same PR, branch or tag jobs: ocaml-tests: name: Run OCaml tests - runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + runs-on: ["ubuntu-22.04"] + experimental: [false] + include: + - runs-on: "ubuntu-22.04-arm" + experimental: true + + continue-on-error: ${{ matrix.experimental }} + runs-on: ${{ matrix.runs-on }} permissions: contents: read env: @@ -29,6 +39,7 @@ jobs: # when changing this value, to keep builds # consistent XAPI_VERSION: "v0.0.0" + steps: - name: Checkout code uses: actions/checkout@v4 diff --git a/.github/workflows/other.yml b/.github/workflows/other.yml index 7ec6914045d..0a94353560c 100644 --- a/.github/workflows/other.yml +++ b/.github/workflows/other.yml @@ -51,7 +51,7 @@ jobs: - uses: pre-commit/action@v3.0.1 name: Run pre-commit checks (no spaces at end of lines, etc) with: - extra_args: --all-files --verbose --hook-stage commit + extra_args: --all-files --verbose --hook-stage pre-commit env: SKIP: no-commit-to-branch diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d766f4f1e4a..5dc14425102 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -83,16 +83,10 @@ jobs: name: SDK_Binaries_CSharp path: dist/ - - name: Retrieve PowerShell 5.x SDK distribution artifacts - uses: actions/download-artifact@v4 - with: - name: SDK_Binaries_XenServerPowerShell_NET45 - path: sdk_powershell_5x/ - - name: Retrieve PowerShell 7.x SDK distribution artifacts uses: actions/download-artifact@v4 with: - name: SDK_Binaries_XenServerPowerShell_NET6 + name: SDK_Binaries_XenServerPowerShell_NET8 path: sdk_powershell_7x/ - name: Package C SDK artifacts for deployment @@ -104,10 +98,6 @@ jobs: rm -rf libxenserver/usr/local/lib/ tar -zcvf libxenserver-prerelease.src.tar.gz -C ./libxenserver/usr/local . - - name: Zip PowerShell 5.x SDK artifacts for deployment - shell: bash - run: zip PowerShell-SDK-5.x-prerelease-unsigned.zip ./sdk_powershell_5x -r - - name: Zip PowerShell 7.x SDK artifacts for deployment shell: bash run: zip PowerShell-SDK-7.x-prerelease-unsigned.zip ./sdk_powershell_7x -r @@ -120,7 +110,6 @@ jobs: shell: bash run: | gh release create ${{ github.ref_name }} --repo ${{ github.repository }} --generate-notes dist/* \ - PowerShell-SDK-5.x-prerelease-unsigned.zip \ PowerShell-SDK-7.x-prerelease-unsigned.zip \ Go-SDK-prerelease-unsigned.zip \ libxenserver-prerelease.tar.gz libxenserver-prerelease.src.tar.gz diff --git a/.github/workflows/setup-xapi-environment/action.yml b/.github/workflows/setup-xapi-environment/action.yml index 8381e31117b..c3126a6d157 100644 --- a/.github/workflows/setup-xapi-environment/action.yml +++ b/.github/workflows/setup-xapi-environment/action.yml @@ -18,7 +18,7 @@ runs: shell: bash run: | mkdir -p /opt/xensource/sm - wget -O /opt/xensource/sm/XE_SR_ERRORCODES.xml https://raw.githubusercontent.com/xapi-project/sm/master/drivers/XE_SR_ERRORCODES.xml + wget -O /opt/xensource/sm/XE_SR_ERRORCODES.xml https://raw.githubusercontent.com/xapi-project/sm/master/libs/sm/core/XE_SR_ERRORCODES.xml - name: Load environment file id: dotenv diff --git a/.gitignore b/.gitignore index 2c90d7261d3..93ad844074b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ _coverage/ *.install *.swp compile_flags.txt +_opam # tests xapi-db.xml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e8fb2f37e0e..008a4e13fb7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,7 +13,7 @@ ## For manually executing the pre-push hook: # pre-commit run -av --hook-stage pre-push # -default_stages: [commit, push] +default_stages: [pre-commit, pre-push] default_language_version: python: python3.11 repos: @@ -68,7 +68,7 @@ repos: entry: env PYTHONDEVMODE=yes sh -c 'coverage run && coverage xml && coverage html && coverage report && diff-cover --ignore-whitespace --compare-branch=origin/master - --show-uncovered --html-report .git/coverage-diff.html + --show-uncovered --format html:.git/coverage-diff.html --fail-under 50 .git/coverage3.11.xml' require_serial: true pass_filenames: false @@ -108,7 +108,7 @@ repos: hooks: - id: pylint files: python3/ - stages: [push] + stages: [pre-push] name: check that changes to python3 tree pass pylint entry: diff-quality --violations=pylint --ignore-whitespace --compare-branch=origin/master @@ -134,7 +134,7 @@ repos: entry: python3 pytype_reporter.py pass_filenames: false types: [python] - stages: [push] + stages: [pre-push] verbose: true # This hook runs locally only when Python files change: language: python diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..34b62707ea4 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,162 @@ +# Issues + +We welcome reports of technical issues with the components of the xen-api +toolstack. Please make sure that the description of the issue is as detailed as +possible to help anyone investigating it: + +1) Mention how it was detected, if and how it could be reproduced + +1) What's the desired behaviour? In what cases would it be useful? + +1) Include error messages, related logs if appropriate + +# Pull Requests + +To contribute changes to xen-api, please fork the repository on +GitHub, and then submit a pull request. + +It is required to add a `Signed-off-by:` as a +[Developers Certificate of Origin](http://developercertificate.org). +It certifies the patch's origin and is licensed under an +appropriate open-source licence to include it in Xapi: +https://git-scm.com/docs/git-commit#Documentation/git-commit.txt---signoff + +The following points are intended to describe what makes a contribution "good" - +easier to review, integrate, and maintain. Please follow them in your work. + +## Commit subjects and PR titles + +Commit subjects should preferrably start with the name of the component the +commit is most related to, and describe what the commit achieves. If your +commit only touches the `ocaml/xenopsd` directory, it should look like this, +for example: + +``` +xenopsd: Fix a deadlock during VM suspend +``` + +Similar principle applies to Pull Request titles. If there is only a single +commit in the PR, Github will automatically copy its subject and description to +the PR's title and body. If there are several commits in the PR, describe what +the PR achieves and the components it most directly impacts. + +If the commit subject includes some tracking identifier (such as `CP-1234`, for +example) referring to internal systems, please make sure to include all of the +essential information in the public descriptions - describe the symptoms of the +issue, how it was detected, investigated, how it could be reproduced, what are +the trade-offs and so on as appropriate. + +## Split into commits + +Following from the rules described above, if what the commit achieves is +difficult to fit into its subject, it is probably better to split it into +several commits, if possible. Note that every commit should build (`make` +should work and the CI should pass) independently, without requiring future +commits. This means some modifications can't really be split into several +commits (datamodel changes, in particular, require modifications to several +components at the same time), but makes it easier to revert part of the Pull +Request if some issues are detected in integration testing at a later point. + +## Good Commit Messages + +Commit messages (and the body of a Pull Request) should be as helpful and +descriptive as possible. If applicable, please include a description of current +behaviour, your changes, and the new behaviour. Justify the reasoning behind +your changes - are they sufficient on their own, or preparing for more changes? +Link any appropriate documentation, issues, or commits (avoiding internal and +publicly inaccessible sources) + +## CI + +Please make sure your Pull Request passes the Github CI. It will verify that +your code has been properly formatted (can be done locally with `make format`), +builds (`make` and `make check`), and passes the unit tests (`make test`). +The CI will run in the branches of your fork, so you can verify it passes +there before opening a Pull Request. + +## Testing + +Describe what kind of testing your contribution underwent. If the testing was +manual, please describe the commands or external clients that were used. If the +tests were automated, include at least a cursory description/name of the tests, +when they were regressed, if possible. + +Please note that any contribution to the code of the project will likely +require at least some testing to be done. Depending on how central the +component touched in your PR is to the system, the more things could only be +detected in real-world usecases through integration testing. + +If a commit has been determined to break integration testing at a later stage, +please note that the first and safest measure will almost always be reverting +the faulty commit. Making sure critical tests are passing remains a priority +over waiting for some commit to be reworked or refactored (which can be worked +on after a revert has been done). Though we are striving to make more tests +public (with failure then being visible to all), as long as some critical tests +remain private, this will also apply to such tests (with maintainers flagging +the breakage preferrably describing at least the gist of the test). + +If you are still waiting on some testing to be done, please mark the PR as a +"draft" and make the reasoning clear. + +If wider testing is needed (e.g. the change itself is believed to be correct +but may expose latent bugs in other components), lightweight feature flags can +also be used. E.g. an entry in `xapi_globs.ml` and `xapi.conf`, where the +feature/change is defaulted to `off`, to be turned on at a future time +(when e.g. more related PRs land, or it has passed some wider testing). + +If your contribution doesn't intend to have any functional changes, please make +that clear as well. + +## Feature work + +If your contribution adds some new feature or reworks some major aspect of the +system (as opposed to one-off fixes), it can be benefitial to first describe +the plan of your work in a design proposal. Architectural issues are better +spotted early on, and taking a big-picture view can often lead to new insights. + +An example of a design proposal is here: + +https://github.com/xapi-project/xen-api/pull/6387 + +If submitting a design first is not possible, include documentation alongside +with your PR describing the work, like it was done in the last three commits +here: + +https://github.com/xapi-project/xen-api/pull/6457 + +Note that the design will often serve as documentation as well - so take care +updating it after the implementation is done to better reflect reality. + +## Review process and merge + +It can often be useful to address review suggestions with a "fixup" commit +(created manually or with the help of `git commit --fixup=HASH`). This way it +is clear what the original code was and what your fix touches. Once the +fixup commit has been reviewed and the PR approved, please squash the fixup +commits with `git rebase --autosquash` before merging. Otherwise the commits in +the Pull Request should stay as independent commits - we do not require +squashing all the commits into a single one on merge. + +If the commit fixes a bug in an earlier, already merged PR then it might be +useful to mention that in the commit, if known. + +This can be done by adding this to your GIT configuration: + +``` +[pretty] + fixes = Fixes: %h (\"%s\") +``` + +And then running: + +``` +# git log -1 --pretty=fixes +Fixes: 1c581c074 ("xenopsd: Fix a deadlock during VM suspend") +``` + +This will print the commit title and hash in a nice format, which can then be +added to the footer of the commit message (alongside the sign-off). + +This is useful information to have if any of these commits get backported to +another release in the future, so that we also backport the bugfixes, not just +the buggy commits. diff --git a/Makefile b/Makefile index 805ece8f28b..a1d5a628f33 100644 --- a/Makefile +++ b/Makefile @@ -147,33 +147,34 @@ install-extra: DESTDIR=$(DESTDIR) SBINDIR=$(SBINDIR) QEMU_WRAPPER_DIR=$(QEMU_WRAPPER_DIR) XENOPSD_LIBEXECDIR=$(XENOPSD_LIBEXECDIR) ETCDIR=$(ETCDIR) ./ocaml/xenopsd/scripts/make-custom-xenopsd.conf # common flags and packages for 'dune install' and 'dune uninstall' -DUNE_IU_PACKAGES1=-j $(JOBS) --destdir=$(DESTDIR) --prefix=$(PREFIX) --libdir=$(LIBDIR) --mandir=$(MANDIR) +DUNE_IU_COMMON=-j $(JOBS) --destdir=$(DESTDIR) --libdir=$(LIBDIR) --mandir=$(MANDIR) +DUNE_IU_PACKAGES1=$(DUNE_IU_COMMON) --prefix=$(PREFIX) DUNE_IU_PACKAGES1+=--libexecdir=$(XENOPSD_LIBEXECDIR) --datadir=$(SDKDIR) DUNE_IU_PACKAGES1+=xapi-client xapi-schema xapi-consts xapi-cli-protocol xapi-datamodel xapi-types DUNE_IU_PACKAGES1+=xen-api-client xen-api-client-lwt rrdd-plugin rrd-transport DUNE_IU_PACKAGES1+=gzip http-lib pciutil sexpr stunnel uuid xml-light2 zstd xapi-compression safe-resources DUNE_IU_PACKAGES1+=message-switch message-switch-cli message-switch-core message-switch-lwt -DUNE_IU_PACKAGES1+=message-switch-unix xapi-idl forkexec xapi-forkexecd xapi-storage xapi-storage-script xapi-storage-cli +DUNE_IU_PACKAGES1+=message-switch-unix xapi-idl xapi-forkexecd xapi-storage xapi-storage-script xapi-storage-cli DUNE_IU_PACKAGES1+=xapi-nbd varstored-guard xapi-log xapi-open-uri xapi-tracing xapi-tracing-export xapi-expiry-alerts cohttp-posix -DUNE_IU_PACKAGES1+=xapi-rrd xapi-inventory clock xapi-sdk -DUNE_IU_PACKAGES1+=xapi-stdext-date xapi-stdext-encodings xapi-stdext-pervasives xapi-stdext-std xapi-stdext-threads xapi-stdext-unix xapi-stdext-zerocheck xapi-tools +DUNE_IU_PACKAGES1+=xapi-rrd xapi-inventory clock xapi-sdk tgroup +DUNE_IU_PACKAGES1+=xapi-stdext-encodings xapi-stdext-pervasives xapi-stdext-std xapi-stdext-threads xapi-stdext-unix xapi-stdext-zerocheck xapi-tools install-dune1: # dune can install libraries and several other files into the right locations dune install $(DUNE_IU_PACKAGES1) -DUNE_IU_PACKAGES2=-j $(JOBS) --destdir=$(DESTDIR) --prefix=$(OPTDIR) --libdir=$(LIBDIR) --mandir=$(MANDIR) --libexecdir=$(OPTDIR)/libexec --datadir=$(DOCDIR) xapi xe +DUNE_IU_PACKAGES2=$(DUNE_IU_COMMON) --prefix=$(OPTDIR) --libexecdir=$(OPTDIR)/libexec --datadir=$(DOCDIR) xapi xe install-dune2: dune install $(DUNE_IU_PACKAGES2) -DUNE_IU_PACKAGES3=-j $(JOBS) --destdir=$(DESTDIR) --prefix=$(OPTDIR) --libdir=$(LIBDIR) --mandir=$(MANDIR) --libexecdir=$(OPTDIR)/libexec --bindir=$(OPTDIR)/debug --datadir=$(OPTDIR)/debug xapi-debug +DUNE_IU_PACKAGES3=$(DUNE_IU_COMMON) --prefix=$(OPTDIR) --libexecdir=$(OPTDIR)/libexec --bindir=$(OPTDIR)/debug --datadir=$(OPTDIR)/debug xapi-debug install-dune3: dune install $(DUNE_IU_PACKAGES3) -DUNE_IU_PACKAGES4=-j $(JOBS) --destdir=$(DESTDIR) --prefix=$(PREFIX) --libdir=$(LIBDIR) --libexecdir=/usr/libexec --mandir=$(MANDIR) vhd-tool +DUNE_IU_PACKAGES4=$(DUNE_IU_COMMON) --prefix=$(PREFIX) --libexecdir=/usr/libexec vhd-tool forkexec qcow-stream-tool install-dune4: dune install $(DUNE_IU_PACKAGES4) @@ -186,7 +187,7 @@ install: chmod +x $(DESTDIR)$(DOCDIR)/doc-convert.sh # backward compat with existing specfile, to be removed after it is updated find $(DESTDIR) -name '*.cmxs' -delete - for pkg in xapi-debug xapi xe xapi-tools xapi-sdk vhd-tool; do for f in CHANGELOG LICENSE README.markdown; do rm $(DESTDIR)$(OPTDIR)/doc/$$pkg/$$f $(DESTDIR)$(PREFIX)/doc/$$pkg/$$f -f; done; for f in META dune-package opam; do rm $(DESTDIR)$(LIBDIR)/$$pkg/$$f -f; done; done; + for pkg in xapi-debug xapi xe xapi-tools xapi-sdk vhd-tool qcow-stream-tool; do for f in CHANGELOG LICENSE README.markdown; do rm $(DESTDIR)$(OPTDIR)/doc/$$pkg/$$f $(DESTDIR)$(PREFIX)/doc/$$pkg/$$f -f; done; for f in META dune-package opam; do rm $(DESTDIR)$(LIBDIR)/$$pkg/$$f -f; done; done; uninstall: diff --git a/README.markdown b/README.markdown index 1b9243c6ded..9f795d85506 100644 --- a/README.markdown +++ b/README.markdown @@ -11,7 +11,7 @@ Xen API is written mostly in [OCaml](http://caml.inria.fr/ocaml/) 4.07. Xapi is the main component produced by the Linux Foundation's -[Xapi Project](http://xenproject.org/developers/teams/xapi.html). +[Xapi Project](https://xenproject.org/projects/xapi/). Build and Install ----------------- @@ -108,6 +108,9 @@ It certifies the patch's origin and is licensed under an appropriate open-source licence to include it in Xapi: https://git-scm.com/docs/git-commit#Documentation/git-commit.txt---signoff +For more detailed guidelines on what makes a good contribution, see +[CONTRIBUTING](./CONTRIBUTING.md). + Discussions ----------- diff --git a/doc/content/design/sm-supported-image-formats.md b/doc/content/design/sm-supported-image-formats.md index fd1118e885d..3d860c2833f 100644 --- a/doc/content/design/sm-supported-image-formats.md +++ b/doc/content/design/sm-supported-image-formats.md @@ -2,7 +2,7 @@ title: Add supported image formats in sm-list layout: default design_doc: true -revision: 2 +revision: 3 status: proposed --- @@ -22,32 +22,16 @@ available formats. # Design Proposal To expose the available image formats to clients (e.g., XenCenter, XenOrchestra, etc.), -we propose adding a new field called `supported-image-formats` to the Storage Manager (SM) -module. This field will be included in the output of the `SM.get_all_records` call. +we propose adding a new field called `supported_image_formats` to the Storage Manager +(SM) module. This field will be included in the output of the `SM.get_all_records` call. -The `supported-image-formats` field will be populated by retrieving information -from the SMAPI drivers. Specifically, each driver will update its `DRIVER_INFO` -dictionary with a new key, `supported_image_formats`, which will contain a list -of strings representing the supported image formats -(for example: `["vhd", "raw", "qcow2"]`). - -The list designates the driver's preferred VDI format as its first entry. That -means that when migrating a VDI, the destination storage repository will -attempt to create a VDI in this preferred format. If the default format cannot -be used (e.g., due to size limitations), an error will be generated. - -If a driver does not provide this information (as is currently the case with existing -drivers), the default value will be an empty array. This signifies that it is the -driver that decides which format it will use. This ensures that the modification -remains compatible with both current and future drivers. - -With this new information, listing all parameters of the SM object will return: +- With this new information, listing all parameters of the SM object will return: ```bash # xe sm-list params=all ``` -will output something like: +Output of the command will look like (notice that CLI uses hyphens): ``` uuid ( RO) : c6ae9a43-fff6-e482-42a9-8c3f8c533e36 @@ -65,12 +49,118 @@ required-cluster-stack ( RO) : supported-image-formats ( RO) : vhd, raw, qcow2 ``` -This change impacts the SM data model, and as such, the XAPI database version will -be incremented. +## Implementation details + +The `supported_image_formats` field will be populated by retrieving information +from the SMAPI drivers. Specifically, each driver will update its `DRIVER_INFO` +dictionary with a new key, `supported_image_formats`, which will contain a list +of strings representing the supported image formats +(for example: `["vhd", "raw", "qcow2"]`). Although the formats are listed as a +list of strings, they are treated as a set-specifying the same format multiple +times has no effect. + +### Driver behavior without `supported_image_formats` + +If a driver does not provide this information (as is currently the case with +existing drivers), the default value will be an empty list. This signifies +that the driver determines which format to use when creating VDI. During a migration, +the destination driver will choose the format of the VDI if none is explicitly +specified. This ensures backward compatibility with both current and future drivers. + +### Specifying image formats for VDIs creation + +If the supported image format is exposed to the client, then, when creating new VDI, +user can specify the desired format via the `sm_config` parameter `image-format=qcow2` (or +any format that is supported). If no format is specified, the driver will use its +preferred default format. If the specified format is not supported, an error will be +generated indicating that the SR does not support it. Here is how it can be achieved +using the XE CLI: + +```bash +# xe vdi-create \ + sr-uuid=cbe2851e-9f9b-f310-9bca-254c1cf3edd8 \ + name-label="A new VDI" \ + virtual-size=10240 \ + sm-config:image-format=vhd +``` + +### Specifying image formats for VDIs migration + +When migrating a VDI, an API client may need to specify the desired image format if +the destination SR supports multiple storage formats. + +#### VDI pool migrate + +To support this, a new parameter, `dest_img_format`, is introduced to +`VDI.pool_migrate`. This field accepts a string specifying the desired format (e.g., *qcow2*), +ensuring that the VDI is migrated in the correct format. The new signature of +`VDI.pool_migrate` will be +`VDI ref pool_migrate (session ref, VDI ref, SR ref, string, (string -> string) map)`. + +If the specified format is not supported or cannot be used (e.g., due to size limitations), +an error will be generated. Validation will be performed as early as possible to prevent +disruptions during migration. These checks can be performed by examining the XAPI database +to determine whether the SR provided as the destination has a corresponding SM object with +the expected format. If this is not the case, a `format not found` error will be returned. +If no format is specified by the client, the destination driver will determine the appropriate +format. + +```bash +# xe vdi-pool-migrate \ + uuid= \ + sr-uuid= \ + dest-img-format=qcow2 +``` + +#### VM migration to remote host + +A VDI migration can also occur during a VM migration. In this case, we need to +be able to specify the expected destination format as well. Unlike `VDI.pool_migrate`, +which applies to a single VDI, VM migration may involve multiple VDIs. +The current signature of `VM.migrate_send` is `(session ref, VM ref, (string -> string) map, +bool, (VDI ref -> SR ref) map, (VIF ref -> network ref) map, (string -> string) map, +(VGPU ref -> GPU_group ref) map)`. Thus there is already a parameter that maps each source +VDI to its destination SR. We propose to add a new parameter that allows specifying the +desired destination format for a given source VDI: `(VDI ref -> string)`. It is +similar to the VDI-to-SR mapping. We will update the XE cli to support this new format. +It would be `image_format:=`: + +```bash +# xe vm-migrate \ + host-uuid= \ + remote-master= \ + remote-password= \ + remote-username= \ + vdi:= \ + vdi:= \ + image-format:=vhd \ + image-format:=qcow2 \ + uuid= +``` +The destination image format would be a string such as *vhd*, *qcow2*, or another +supported format. It is optional to specify a format. If omitted, the driver +managing the destination SR will determine the appropriate format. +As with VDI pool migration, if this parameter is not supported by the SM driver, +a `format not found` error will be returned. The validation must happen before +sending a creation message to the SM driver, ideally at the same time as checking +whether all VDIs can be migrated. + +To be able to check the format, we will need to modify `VM.assert_can_migrate` and +add the mapping from VDI references to their image formats, as is done in `VM.migrate_send`. # Impact -- **Data Model:** A new field (`supported-image-formats`) is added to the SM records. +It should have no impact on existing storage repositories that do not provide any information +about the supported image format. + +This change impacts the SM data model, and as such, the XAPI database version will +be incremented. It also impacts the API. + +- **Data Model:** + - A new field (`supported_image_formats`) is added to the SM records. + - A new parameter is added to `VM.migrate_send`: `(VDI ref -> string) map` + - A new parameter is added to `VM.assert_can_migrate`: `(VDI ref -> string) map` + - A new parameter is added to `VDI.pool_migrate`: `string` - **Client Awareness:** Clients like the `xe` CLI will now be able to query and display the supported image formats for a given SR. - **Database Versioning:** The XAPI database version will be updated to reflect this change. diff --git a/doc/content/python/_index.md b/doc/content/python/_index.md index 773f02ce38c..523c2018718 100644 --- a/doc/content/python/_index.md +++ b/doc/content/python/_index.md @@ -52,7 +52,7 @@ in the [pre-commit] configuration file [.pre-commit-config.yaml]. entry: sh -c 'coverage run && coverage xml && coverage html && coverage report && diff-cover --ignore-whitespace --compare-branch=origin/master - --show-uncovered --html-report .git/coverage-diff.html + --show-uncovered --format html:.git/coverage-diff.html --fail-under 50 .git/coverage3.11.xml' require_serial: true pass_filenames: false diff --git a/doc/content/toolstack/features/NUMA/index.md b/doc/content/toolstack/features/NUMA/_index.md similarity index 100% rename from doc/content/toolstack/features/NUMA/index.md rename to doc/content/toolstack/features/NUMA/_index.md diff --git a/doc/content/toolstack/features/NUMA/lazy-reclaim.md b/doc/content/toolstack/features/NUMA/lazy-reclaim.md new file mode 100644 index 00000000000..b99c4dfdd1f --- /dev/null +++ b/doc/content/toolstack/features/NUMA/lazy-reclaim.md @@ -0,0 +1,252 @@ +--- +title: "Lazy memory reclaim" +weight: 10 +categories: + NUMA +--- +## Xen host memory scrubbing + +Xen does not immediately reclaim deallocated memory. +Instead, Xen has a host memory scrubber that runs lazily in +the background to reclaim recently deallocated memory. + +Thus, there is no guarantee that Xen has finished scrubbing +when `xenopsd` is being asked to build a domain. + +## Waiting for enough free host memory + +> [!info] +> In case reclaimed memory is not sufficient yet, before building +> a VM, `xenopsd` waits by polling until enough memory is reclaimed. +> See the +> [walk-through of Domain.build](../../../xenopsd/walkthroughs/VM.build/Domain.build.md#build_pre-prepare-building-the-vm) +> for more context. + +For this, with CA-39743, the implementation of Xenopsd's +[build_pre](https://github.com/xapi-project/xen-api/blob/073373ff2abfa386025f2b1eee7131520df76be9/ocaml/xenopsd/xc/domain.ml#L899-L964) +function was updated to +[call](https://github.com/xapi-project/xen-api/blob/073373ff2abfa386025f2b1eee7131520df76be9/ocaml/xenopsd/xc/domain.ml#L904) +[wait_xen_free_mem](https://github.com/xapi-project/xen-api/blob/073373ff2abfa386025f2b1eee7131520df76be9/ocaml/xenopsd/xc/domain.ml#L236-L272) +to wait for to Xen reclaim a sufficient amount of host memory +host-wide: + +```ml +let build_pre ~xc ~xs ~vcpus ~memory ~has_hard_affinity domid = + let open Memory in + let uuid = get_uuid ~xc domid in + debug "VM = %s; domid = %d; waiting for %Ld MiB of free host memory" + (Uuidx.to_string uuid) domid memory.required_host_free_mib ; + (* CA-39743: Wait, if necessary, for the Xen scrubber to catch up. *) + if + not (wait_xen_free_mem ~xc (Memory.kib_of_mib memory.required_host_free_mib)) + then ( + error "VM = %s; domid = %d; Failed waiting for Xen to free %Ld MiB" + (Uuidx.to_string uuid) domid memory.required_host_free_mib ; + raise (Not_enough_memory (Memory.bytes_of_mib memory.required_host_free_mib)) + ) ; +``` + +This is the implementation of the function: + +```ml +let wait_xen_free_mem ~xc ?(maximum_wait_time_seconds = 64) required_memory_kib + : bool = + let open Memory in + let rec wait accumulated_wait_time_seconds = + let host_info = Xenctrl.physinfo xc in + let free_memory_kib = + kib_of_pages (Int64.of_nativeint host_info.Xenctrl.free_pages) + in + let scrub_memory_kib = + kib_of_pages (Int64.of_nativeint host_info.Xenctrl.scrub_pages) + in + (* At exponentially increasing intervals, write *) + (* a debug message saying how long we've waited: *) + if is_power_of_2 accumulated_wait_time_seconds then + debug + "Waited %i second(s) for memory to become available: %Ld KiB free, %Ld \ + KiB scrub, %Ld KiB required" + accumulated_wait_time_seconds free_memory_kib scrub_memory_kib + required_memory_kib ; + if + free_memory_kib >= required_memory_kib + (* We already have enough memory. *) + then + true + else if scrub_memory_kib = 0L (* We'll never have enough memory. *) then + false + else if + accumulated_wait_time_seconds >= maximum_wait_time_seconds + (* We've waited long enough. *) + then + false + else ( + Thread.delay 1.0 ; + wait (accumulated_wait_time_seconds + 1) + ) + in + wait 0 +``` + +## Waiting for enough free memory on NUMA nodes + +For NUMA, to improve the chances to make the new domain affine +to a single NUMA node, a similar algorighm could be utilized. + +This should be done directly before the NUMA placement algorithm +runs, or even as part of an improvement for it: + +The NUMA placement algorigthm calls the `numainfo` hypercall to +obtain the table of NUMA nodes along the available memory on each +node and the distance matrix between the NUMA nodes. + +If the reported free memory of the host is lower than would be +expected at that moment, this might be an indidation that some +memory might not be scrubbed yet. Another indication might be +if the amount of free memory is increasing betwen two checks. + +In such cases, when Xen has yet to reclaim more memory, or if +other domains are in the process of being shut down, it might +likewise make sense to wait until more memory is free to obtain +NUMA placement or a better NUMA placement for the new domain. + +In such situations, assuming for example that the NUMA placement +failed to obtain a suitable NUMA node affinity for the new domain, +the smallest possible change would be to simply re-run the NUMA +placement algorithm (essentially to poll for more memory) like +implemented with CA-39743 by calling `wait_xen_free_mem`, but +this time not by polling for system-wide memory, but for per-NUMA +node memory. + +As the NUMA placement algorithm does exactly that, simply +re-starting it in a defined way would be the smallest change +to handle cases of having to poll for free memory on a per-NUMA +node basis, if lazy memory reclaim is in progress on the host. + +## An example scenario + +This is an example scenario where not waiting for memory scrubbing +in a NUMA-aware way could fragment the VM across many NUMA nodes: + +In this example, a relatively large VM is rebooted: + +Fictional machine with 4 NUMA nodes, 25 GB each (for layout reasons): +```mermaid +%%{init: {"packet": {"bitsPerRow": 25, "rowHeight": 38}} }%% +packet-beta + 0-18: "Memory used by other VMs" + 19-24: "free: 6 GB" + 25-44: "VM before restart: 20 GB" + 45-49: "free: 5GB" + 50-69: "Memory used by other VMs" + 70-74: "free: 5GB" + 75-94: "Memory used by other VMs" + 95-99: "free: 5GB" +``` +VM is destroyed: +```mermaid +%%{init: {"packet": {"bitsPerRow": 25, "rowHeight": 38}} }%% +packet-beta + 0-18: "Memory used by other VMs" + 19-24: "free: 6 GB" + 25-44: "VM memory to be reclaimed, but not yet scrubbed" + 45-49: "free: 5GB" + 50-69: "Memory used by other VMs" + 70-74: "free: 5GB" + 75-94: "Memory used by other VMs" + 95-99: "free: 5GB" +``` + +NUMA placement runs, and sees that no NUMA node has enough memory +for the VM. Therefore: +1. NUMA placement does not return a NUMA placement solution. +2. As a result, vCPU soft pinning it not set up +3. As a result, the domain does not get a NUMA node affinity +4. When `xenguest` allocates the VM's memory, Xen falls back to + round-robin memory allocation across all NUMA nodes. + +Even if Xen has already scrubbed the memory by the time the +NUMA placement function returns, the decision to not select +a NUMA placement has already been done and the domain is +built in this way: +```mermaid +%%{init: {"packet": {"bitsPerRow": 25, "rowHeight": 38}} }%% +packet-beta + 0-18: "Memory used by other VMs" + 19-23: "VM: 5GB" + 24-24: "" + 25-44: "scrubbed/reclaimed free memory: 20 GB" + 45-49: "VM: 5GB" + 50-69: "Memory used by other VMs" + 70-74: "VM: 5GB" + 75-94: "Memory used by other VMs" + 95-99: "VM: 5GB" +``` + +In case the reclaimed 20 GB of memory is not partially allocated +for other VMs in the meantime: +After scrubbing and memory reclaim is complete. the 20 GB +of NUMA-node memory is available for the VM again. + +When the 20 GB VM is rebooted and the memory is still available, +the rebooted VM might become NUMA-affine to the 2nd NUMA node again. Of course, this unpredictability is what we need to fix. + +## Starting VMs when not enough reclaim is possible + +But, when no NUMA node has enough memory to run a new VM, +waiting will not help. + +But, it might be good to inform the caller that perfect NUMA placement +could not be achieved. + +However, if a CPU socket with multiple NUMA nodes with a very low +internode distance has enough free memory, that could be seen as +a fallback that would have relatively low performance impact. + +In the end, in such situations, it depends on the caller what to do: +Whether to start the VM anyway despite being not NUMA-aligned, +or to inform the caller of an expected performance degradation of the VM. + +### Example scenario when not waiting for free NUMA node memory: + +Note: This uses round numbers for easy checking and is purely theoretical: + +| Node | RAM | used | free | +| ----:| ---:| ----:| ----:| +| 1 | 50 | 35 | 15 | +| 2 | 50 | 45 | 5 | +| 3 | 50 | 35 | 15 | +| 4 | 50 | 35 | 15 | +| all | 200 | 150 | 50 | + +Action: A 45 GB VM on Node 2 is shut down and started again. +1. When the new `VM.start` runs, the 45 GB may not have been scrubbed yet. +2. The free memory check still finds 50 GB free, enough to start the VM. +3. NUMA placement picks one of the other nodes as they have more memory. +4. For example, assume it picks node 0, and sets the node-affinity to it. +5. The Xen buddy allocator will run out of 1GB superpages on node 0 after + having exhausted the 15 GB free memory on it. +6. This leaves 30 GB to be allocated elsewhere. +7. Meanwhile, some memory might have been scrubbed and reclaimed on Node 2. +8. The Xen buddy allocator then falls back to allocating in a round-robin + fashion from the other NUMA nodes, assume 10 GB on each of the 3 nodes. + +New memory situation after the restart: + +| Node | RAM | used | free | Dom1 | +| ----:| ---:| ----:| ----:| ----:| +| 1 | 50 | 50 | 0 | 15 | +| 2 | 50 | 10 | 40 | 10 | +| 3 | 50 | 45 | 5 | 10 | +| 4 | 50 | 45 | 5 | 10 | +| all | 200 | 150 | 50 | 45 | + +Thus, a single VM restart may cause the VM's memory to be spread over +all NUMA nodes. As a result, most memory accesses would be remote. + +`xenguest` populates the Guest memory in the process of the build step. + +But as the `VM.build` micro-ops running in parallel, this can happen: +The free memory reported by Xen may not yet reflect memory that will +be allocated by other concurrently running `VM.build` micro-ops when +the `xenguest` processes started by them populate the VM memory. diff --git a/doc/content/toolstack/features/NUMA/parallel-VM.build.md b/doc/content/toolstack/features/NUMA/parallel-VM.build.md new file mode 100644 index 00000000000..fe08dce2102 --- /dev/null +++ b/doc/content/toolstack/features/NUMA/parallel-VM.build.md @@ -0,0 +1,52 @@ +--- +title: "Parallel VM build" +categories: + - NUMA +weight: 50 +mermaid: + force: true +--- + +## Introduction + +When the `xenopsd` server receives a `VM.start` request, it: +1. splits the request it into micro-ops and +2. dispatches the micro-ops in one queue per VM. + +When `VM.start` requests arrive faster than the thread pool +finishes them, the thread pool will run multiple +micro-ops for different VMs in parallel. This includes the +VM.build micro-op that does NUMA placement and VM memory allocation. + +The [Xenopsd architecture](xenopsd/architecture/_index) and the +[walkthrough of VM.start](VM.start) provide more details. + +This walkthrough dives deeper into the `VM_create` and `VM_build` micro-ops +and focusses on allocating the memory allocation for different VMs in +parallel with respect to the NUMA placement of the starting VMs. + +## Architecture + +This diagram shows the [architecture](../../../xenopsd/architecture/_index) of Xenopsd: + +At the top of the diagram, two client RPCs have been sent: +One to start a VM and the other to fetch the latest events. +The `Xenops_server` module splits them into "micro-ops" (labelled "μ op" here). +These micro-ops are enqueued in queues, one queue per VM. The thread pool pulls +from the VM queues and runs the micro-ops: + +![Inside xenopsd](../../../../xenopsd/architecture/xenopsd.svg) +
Image 1: Xenopsd architecture
+ +Overview of the micro-ops for creating a new VM: + +- `VM.create`: create an empty Xen domain in the Hypervisor and the Xenstore +- `VM.build`: build a Xen domain: Allocate guest memory and load the firmware and `hvmloader` +- Several micro-ops to attach devices launch the device model. +- `VM.unpause`: unpause the domain + +## Flowchart: Parallel VM start + +When multiple `VM.start` run concurrently, an example could look like this: + +{{% include "snippets/vm-build-parallel" %}} diff --git a/doc/content/toolstack/features/SSH/index.md b/doc/content/toolstack/features/SSH/index.md new file mode 100644 index 00000000000..a0a7c937706 --- /dev/null +++ b/doc/content/toolstack/features/SSH/index.md @@ -0,0 +1,249 @@ +# SSH Management + +SSH Management enables programmatic control of SSH access to XenServer hosts. This feature +allows administrators to enable/disable SSH services, configure timeout settings, and implement +automatic SSH management based on XAPI health status. + +## Architecture Overview + +The SSH Management feature is built around three core components: + +1. **SSH Service Control**: Direct enable/disable operations for SSH on individual hosts or entire pools +2. **Timeout Management**: Configurable timeouts for both SSH sessions and service duration limits +3. **Auto Mode**: Intelligent SSH management that automatically adjusts based on XAPI health status + +![SSH Status Transition](ssh-status-trans.png) + +## SSH Service Control + +### API Design + +#### Host APIs + +- `host.enable_ssh`: Enables SSH access on the specified host +- `host.disable_ssh`: Disables SSH access on the specified host +- `host.set_ssh_enabled_timeout`: Configures SSH service timeout duration (0-172800 seconds, maximum 2 days) +- `host.set_console_idle_timeout`: Sets idle timeout for SSH/VNC console sessions +- `host.set_ssh_auto_mode`: Controls SSH auto mode behavior (when true, SSH is normally disabled but enabled during XAPI downtime) + +#### Pool APIs + +- `pool.enable_ssh`: Enables SSH access across all hosts in the pool +- `pool.disable_ssh`: Disables SSH access across all hosts in the pool +- `pool.set_ssh_enabled_timeout`: Sets SSH service timeout for all pool hosts +- `pool.set_console_idle_timeout`: Configures console idle timeout for all pool hosts +- `pool.set_ssh_auto_mode`: Applies SSH auto mode configuration to all pool hosts + +### Implementation Details + +The enable/disable operations work by directly managing systemd services. The code starts and enables the sshd systemd service to enable SSH access, or stops and disables it to disable SSH access: + +```ocaml +Xapi_systemctl.start "sshd" +Xapi_systemctl.enable "sshd" + +Xapi_systemctl.stop "sshd" +Xapi_systemctl.disable "sshd" +``` + +#### SSH Timeout Management + +The timeout management uses the scheduler system to automatically disable SSH after a specified period. The function removes any existing disable job from the queue and creates a new one-shot job that will execute the SSH disable operation when the timeout expires. if the XAPI restart during this period, xapi will schedule a new job to disable SSH with remaining time: + +```ocaml +let schedule_disable_ssh_job ~__context ~self ~timeout ~auto_mode = + Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue + !Xapi_globs.job_for_disable_ssh ; + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue + !Xapi_globs.job_for_disable_ssh + Xapi_stdext_threads_scheduler.Scheduler.OneShot (Int64.to_float timeout) + (fun () -> + disable_ssh_internal ~__context ~self + ) +``` + +#### Console Idle Timeout + +The console idle timeout is configured by writing to a profile script that sets the TMOUT environment variable. The function generates appropriate content based on the timeout value and atomically writes it to the profile script file: + +```ocaml +let set_console_idle_timeout ~__context ~self ~value = + let content = match value with + | 0L -> "# Console timeout is disabled\n" + | timeout -> Printf.sprintf "# Console timeout configuration\nexport TMOUT=%Ld\n" timeout + in + Unixext.atomic_write_to_file !Xapi_globs.console_timeout_profile_path 0o0644 + (fun fd -> Unix.write fd (Bytes.of_string content) 0 (String.length content)) +``` + +#### SSH Auto Mode + +The SSH auto mode is configured by managing the monitoring service. The function updates the database with the auto mode setting and then enables or disables the SSH monitoring daemon accordingly. When auto mode is enabled, it starts the monitoring service and enable SSH service (Always enable SSH service for avoid both XAPI and Monitor service are down, user is still able to start SSH service by reboot host); when disabled, it stops and disables the monitoring service: + +```ocaml +let set_ssh_auto_mode ~__context ~self ~value = + Db.Host.set_ssh_auto_mode ~__context ~self ~value ; + if value then ( + Xapi_systemctl.enable ~wait_until_success:false !Xapi_globs.ssh_service ; + Xapi_systemctl.enable ~wait_until_success:false !Xapi_globs.ssh_monitor_service ; + Xapi_systemctl.start ~wait_until_success:false !Xapi_globs.ssh_monitor_service + ) else ( + Xapi_systemctl.stop ~wait_until_success:false !Xapi_globs.ssh_monitor_service ; + Xapi_systemctl.disable ~wait_until_success:false !Xapi_globs.ssh_monitor_service + ) +``` + +### CLI Commands + +```bash +# Enable/disable SSH on hosts +xe host-enable-ssh host= +xe host-disable-ssh host-uuid= + +# Configure timeouts on individual hosts +xe host-param-set uuid= ssh-enabled-timeout=3600 +xe host-param-set uuid= console-idle-timeout=300 +xe host-param-set uuid= ssh-auto-mode=true + +# Query host SSH parameters +xe host-param-get uuid= param-name=ssh-enabled +xe host-param-get uuid= param-name=ssh-expiry +xe host-param-get uuid= param-name=ssh-enabled-timeout +xe host-param-get uuid= param-name=console-idle-timeout +xe host-param-get uuid= param-name=ssh-auto-mode + +# Enable/disable SSH across pool +xe pool-enable-ssh +xe pool-disable-ssh + +# Configure timeouts across pool +xe pool-param-set uuid= ssh-enabled-timeout=3600 +xe pool-param-set uuid= console-idle-timeout=300 +xe pool-param-set uuid= ssh-auto-mode=true + +# Query pool SSH parameters +xe pool-param-get uuid= param-name=ssh-enabled +xe pool-param-get uuid= param-name=ssh-expiry +xe pool-param-get uuid= param-name=ssh-enabled-timeout +xe pool-param-get uuid= param-name=console-idle-timeout +xe pool-param-get uuid= param-name=ssh-auto-mode +``` + +## Auto Mode + +### Overview + +The auto mode feature intelligently manages SSH access based on XAPI health status: +- SSH is automatically enabled when XAPI becomes unhealthy +- SSH is automatically disabled when XAPI is healthy and running normally + +When the user enables the SSH service with `enable_ssh` API, SSH auto mode will be turned off. +| SSH service | auto mode | +|-------------|-----------| +| enabled | off | + +If SSH auto mode is enabled and XAPI becomes unresponsive, the system will automatically enable the SSH service to allow access. +| auto mode | xapi healthy | SSH service | +|-----------|--------------|-------------| +| on | yes | disable | +| on | no | enable | +| off | NA | NA | + +When SSH is temporarily enabled using the ssh-enabled-timeout setting and enable-ssh command, the system preserves the original SSH auto-mode state in cache. During the timeout period, SSH auto-mode is suspended (set to off) to allow SSH access. Once the timeout expires, the system restores the cached auto-mode state - if auto-mode was originally enabled, it will be reactivated and automatically stop the SSH service again +| auto mode before set enable timeout | SSH service before set enable timeout | auto mode during the limited time period | auto mode after enable timeout | +|-----------------------------------|--------------------------------------|----------------------------------------|-------------------------------| +| on | off | off | on | + +### Service Architecture + +#### Monitoring Daemon + +The monitoring daemon (`/opt/xensource/libexec/xapi-state-monitor`) operates continuously: + +1. Monitors current SSH service status +2. When auto mode is enabled: + - If XAPI is healthy and SSH is active → Stop SSH + - If XAPI is unhealthy and SSH is inactive → Start SSH +3. Implements retry logic with up to 3 attempts for failed operations +4. Pauses for 60 seconds between health check cycles + +### Health Check Integration + +The system leverages the existing `xapi-health-check` script for health monitoring: +- Returns 0 when XAPI is healthy +- Returns 1 when XAPI is unhealthy +- Triggers unhealthy status after 20 consecutive failures + +### Configuration + +#### Default Behavior + +- **XenServer 8**: `ssh_auto_mode=false` (SSH is enabled by default) +- **XenServer 9**: `ssh_auto_mode=true` (SSH is disabled by default) + +#### Configuration Files + +In XS8, the ssh_auto_mode default value will be overridden by the configuration file as below, while in XS9, there is no configuration file, so auto-mode will remain enabled by default. + +```bash +# XS8: /etc/xapi.conf.d/ssh-auto-mode.conf +ssh_auto_mode=false +``` + +## Pool Operations + +### Pool Join + +When a host joins a pool, the following sequence occurs: +1. The host inherits SSH configuration from the pool coordinator +2. SSH settings are applied before metadata updates +3. The xapi-ssh-monitor service is started if auto mode is enabled + +### Pool Eject + +When a host is ejected from a pool: +1. The host resets to its default configuration (e.g., in XS8 SSH enabled, no timeout) +2. Default SSH configuration is applied before the host becomes a coordinator + +## XAPI Restart Handling + +During XAPI startup, the system performs several key operations to handle different restart scenarios: + +#### SSH Status Synchronization +The database is updated to reflect the actual SSH service state, ensuring consistency between the database and the running system. + +#### Short XAPI Downtime Recovery +When `ssh_enabled_timeout > 0` and `ssh_expiry > current_time`, indicating that XAPI restarted during a temporary SSH disable period: +- The system reschedules the disable SSH job with the remaining time +- This ensures that the original timeout period is maintained even after XAPI restart + +#### Extended XAPI Downtime Handling +When a ssh_enabled_timeout is configured, `ssh_expiry < current_time` and the SSH service is currently active, indicating that XAPI was down for an extended period that exceeded the timeout duration: +- SSH is automatically disabled +- SSH auto mode is enabled to ensure continuous SSH availability + +This scenario typically occurs when XAPI is not active when the SSH timeout expires, requiring the system to disable SSH and enable auto mode for remains continuously available. + +## Error Handling + +### Retry Logic + +The system implements robust retry mechanisms: +- SSH disable operations are retried up to 3 times +- 5-second intervals are maintained between retry attempts + +## Integration Points + +### xsconsole Integration + +The xsconsole interface has been updated to use XAPI APIs rather than direct systemd commands for consistent with XAPI db status: +- Enable/Disable operations: Calls `host.enable_ssh`/`host.disable_ssh` +- Auto mode configuration: Calls `host.set_ssh_auto_mode` + +### Answerfile Support + +The following configuration in answerfile can be used, when configure ssh-mode to on, auto-mode will be disabled and SSH will be enabled, when configure ssh-mode to off, auto-mode will be disabled and SSH will be disabled as well, when configure to auto, the auto-mode will be enabled and SSH will be disabled by auto-mode once the XAPI is on: + +```xml +on|off|auto +``` \ No newline at end of file diff --git a/doc/content/toolstack/features/SSH/ssh-status-trans.png b/doc/content/toolstack/features/SSH/ssh-status-trans.png new file mode 100644 index 00000000000..40cf16255a7 Binary files /dev/null and b/doc/content/toolstack/features/SSH/ssh-status-trans.png differ diff --git a/doc/content/toolstack/features/Tracing/index.md b/doc/content/toolstack/features/Tracing/index.md new file mode 100644 index 00000000000..c54441bbb68 --- /dev/null +++ b/doc/content/toolstack/features/Tracing/index.md @@ -0,0 +1,137 @@ ++++ +title = "Tracing" ++++ + +Tracing is a powerful tool for observing system behavior across multiple components, making it especially +useful for debugging and performance analysis in complex environments. + +By integrating OpenTelemetry (a standard that unifies OpenTracing and OpenCensus) and the Zipkin v2 protocol, +XAPI enables efficient tracking and visualization of operations across internal and external systems. +This facilitates detailed analysis and improves collaboration between teams. + +Tracing is commonly used in high-level applications such as web services. As a result, less widely-used or +non-web-oriented languages may lack dedicated libraries for distributed tracing (An OCaml implementation +has been developed specifically for XenAPI). + +# How tracing works in XAPI + +## Spans and Trace Context + +- A *span* is the core unit of a trace, representing a single operation with a defined start and end time. + Spans can contain sub-spans that represent child tasks. This helps identify bottlenecks or areas that + can be parallelized. + - A span can contain several contextual elements such as *tags* (key-value pairs), + *events* (time-based data), and *errors*. +- The *TraceContext* HTTP standard defines how trace IDs and span contexts are propagated across systems, + enabling full traceability of operations. + +This data enables the creation of relationships between tasks and supports visualizations such as +architecture diagrams or execution flows. These help in identifying root causes of issues and bottlenecks, +and also assist newcomers in onboarding to the project. + +## Configuration + +- To enable tracing, you need to create an *Observer* object in XAPI. This can be done using the *xe* CLI: + ```sh + xe observer-create \ + name-label= \ + enabled=true \ + components=xapi,xenopsd \ + ``` +- By default, if you don't specify `enabled=true`, the observer will be disabled. +- To add an HTTP endpoint, make sure the server is up and running, then run: + ```sh + xe observer-param-set uuid= endpoints=bugtool,http://:9411/api/v2/spans + ``` + If you specify an invalid or unreachable HTTP endpoint, the configuration will fail. +- **components**: Specify which internal components (e.g., *xapi*, *xenopsd*) should be traced. + Additional components are expected to be supported in future releases. An experimental *smapi* component + is also available and requires additional configuration (explained below). + +- **endpoints**: The observer can collect traces locally in */var/log/dt* or forward them to external + visualization tools such as [Jaeger](https://www.jaegertracing.io/). Currently, only HTTP/S endpoints + are supported, and they require additional configuration steps (see next section). + +- To disable tracing you just need to set *enabled* to false: + ```sh + xe observer-param-set uuid= enabled=false + ``` + +### Enabling smapi component + +- *smapi* component is currently considered experimental and is filtered by default. To enable it, you must + explicitly configure the following in **xapi.conf**: + ```ini + observer-experimental-components="" + ``` + This tells XAPI that no components are considered experimental, thereby allowing *smapi* to be traced. + A modification to **xapi.conf** requires a restart of the XAPI toolstack. + +### Enabling HTTP/S endpoints + +- By default HTTP and HTTPS endpoints are disabled. To enable them, add the following lines to **xapi.conf**: + ```ini + observer-endpoint-http-enabled=true + observer-endpoint-https-enabled=true + ``` + As with enabling *smapi* component, modifying **xapi.conf** requires a restart of the XAPI toolstack. + *Note*: HTTPS endpoint support is available but not tested and may not work. + +### Sending local trace to endpoint + +By default, traces are generated locally in the `/var/log/dt` directory. You can copy or forward +these traces to another location or endpoint using the `xs-trace` tool. For example, if you have +a *Jaeger* server running locally, you can run: + +```sh +xs-trace /var/log/dt/ http://127.0.0.1:9411/api/v2/spans +``` + +You will then be able to visualize the traces in Jaeger. + +### Tagging Trace Sessions for Easier Search + +#### Specific attributes +To make trace logs easier to locate and analyze, it can be helpful to add custom attributes around the +execution of specific commands. For example: + +```sh +# xe observer-param-set uuid= attributes:custom.random=1234 +# xe vm-start ... +# xe observer-param-clear uuid= param-name=attributes param-key=custom.random +``` + +This technique adds a temporary attribute, *custom.random=1234*, which will appear in the generated trace +spans, making it easier to search for specific activity in trace visualisation tools. It may also be possible +to achieve similar tagging using baggage parameters directly in individual *xe* commands, but this approach +is currently undocumented. + +#### Baggage + +*Baggage*, contextual information that resides alongside the context, is supported. This means you can run +the following command: + +```sh +BAGGAGE="mybaggage=apples" xe vm-list +``` + +You will be able to search for tags `mybaggage=apples`. + +#### Traceparent + +Another way to assist in trace searching is to use the `TRACEPARENT` HTTP header. It is an HTTP header field that +identifies the incoming request. It has a [specific format](https://www.w3.org/TR/trace-context/#traceparent-header) +and it is supported by **XAPI**. Once generated you can run command as: + +```sh +TRACEPARENT="00-4bf92f3577b34da6a3ce929d0e0e4736-00f067aa0ba902b7-01" xe vm-list +``` + +And you will be able to look for trace *4bf92f3577b34da6a3ce929d0e0e4736*. + +### Links + +- [Opentelemetry](https://opentelemetry.io/) +- [Trace Context](https://www.w3.org/TR/trace-context/) +- [Baggage](https://opentelemetry.io/docs/concepts/signals/baggage/) +- [Ocaml opentelemetry module](https://ocaml.org/p/opentelemetry/latest) diff --git a/doc/content/toolstack/features/events/index.md b/doc/content/toolstack/features/events/index.md index 3d76d4db927..98bdf17e6ae 100644 --- a/doc/content/toolstack/features/events/index.md +++ b/doc/content/toolstack/features/events/index.md @@ -72,9 +72,9 @@ while True: events = session.xenapi.event.next() # block until a xapi event on a xapi DB object is available for event in events: print "received event op=%s class=%s ref=%s" % (event['operation'], event['class'], event['ref']) - if event['class'] == 'vm' and event['operatoin'] == 'mod': + if event['class'] == 'vm' and event['operation'] == 'mod': vm = event['snapshot'] - print "xapi-event on vm: vm_uuid=%s, power_state=%s, current_operation=%s" % (vm['uuid'],vm['name_label'],vm['power_state'],vm['current_operations'].values()) + print "xapi-event on vm: vm_uuid=%s, vm_name_label=%s, power_state=%s, current_operation=%s" % (vm['uuid'],vm['name_label'],vm['power_state'],vm['current_operations'].values()) except XenAPI.Failure, e: if len(e.details) > 0 and e.details[0] == 'EVENTS_LOST': session.xenapi.event.unregister(["VM","pool"]) diff --git a/doc/content/xapi/guides/howtos/add-function.md b/doc/content/xapi/guides/howtos/add-function.md index 8aeedfb27fb..cbde59a991e 100644 --- a/doc/content/xapi/guides/howtos/add-function.md +++ b/doc/content/xapi/guides/howtos/add-function.md @@ -172,8 +172,8 @@ the Host module: let price_of ~__context ~host ~item = info "Host.price_of for item %s" item; let local_fn = Local.Host.price_of ~host ~item in - do_op_on ~local_fn ~__context ~host - (fun session_id rpc -> Client.Host.price_of ~rpc ~session_id ~host ~item) + let remote_fn = Client.Host.price_of ~host ~item in + do_op_on ~local_fn ~__context ~host ~remote_fn After the ~__context parameter, the parameters of this new function should match the parameters we specified for the message. In this case, that is the diff --git a/doc/content/xapi/storage/sxm.md b/doc/content/xapi/storage/sxm/index.md similarity index 76% rename from doc/content/xapi/storage/sxm.md rename to doc/content/xapi/storage/sxm/index.md index 6c44e432d22..4a8a68ced52 100644 --- a/doc/content/xapi/storage/sxm.md +++ b/doc/content/xapi/storage/sxm/index.md @@ -2,9 +2,443 @@ Title: Storage migration --- +- [Overview](#overview) +- [SXM Multiplexing](#sxm-multiplexing) + - [Motivation](#motivation) + - [But we have storage\_mux.ml](#but-we-have-storage_muxml) + - [Thought experiments on an alternative design](#thought-experiments-on-an-alternative-design) + - [Design](#design) +- [SMAPIv1 migration](#smapiv1-migration) + - [Preparation](#preparation) + - [Establishing mirror](#establishing-mirror) + - [Mirror](#mirror) + - [Snapshot](#snapshot) + - [Copy and compose](#copy-and-compose) + - [Finish](#finish) +- [SMAPIv3 migration](#smapiv3-migration) + - [Preparation](#preparation-1) + - [Establishing mirror](#establishing-mirror-1) + - [Limitations](#limitations) + - [Finish](#finish-1) +- [Error Handling](#error-handling) + - [Preparation (SMAPIv1 and SMAPIv3)](#preparation-smapiv1-and-smapiv3) + - [Snapshot and mirror failure (SMAPIv1)](#snapshot-and-mirror-failure-smapiv1) + - [Mirror failure (SMAPIv3)](#mirror-failure-smapiv3) + - [Copy failure (SMAPIv1)](#copy-failure-smapiv1) +- [SMAPIv1 Migration implementation detail](#smapiv1-migration-implementation-detail) + - [Receiving SXM](#receiving-sxm) + - [Xapi code](#xapi-code) + - [Storage code](#storage-code) + - [Copying a VDI](#copying-a-vdi) + - [Mirroring a VDI](#mirroring-a-vdi) + - [Code walkthrough](#code-walkthrough) + - [DATA.copy](#datacopy) + - [DATA.copy\_into](#datacopy_into) + - [DATA.MIRROR.start](#datamirrorstart) + + ## Overview -{{}} +The core idea of storage migration is surprisingly simple: We have VDIs attached to a VM, +and we wish to migrate these VDIs from one SR to another. This necessarily requires +us to copy the data stored in these VDIs over to the new SR, which can be a long-running +process if there are gigabytes or even terabytes of them. We wish to minimise the +down time of this process to allow the VM to keep running as much as possible. + +At a very high level, the SXM process generally only consists of two stages: preparation +and mirroring. The preparation is about getting the receiving host ready for the +mirroring operation, while the mirroring itself can be further divided into two +more operations: 1. sending new writes to both sides; 2.copying existing data from +source to destination. The exact detail of how to set up a mirror differs significantly +between SMAPIv1 and SMAPIv3, but both of them will have to perform the above two +operations. +Once the mirroring is established, it is a matter of checking the status of the +mirroring and carry on with the follwoing VM migration. + +The reality is more complex than what we had hoped for. For example, in SMAPIv1, +the mirror establishment is quite an involved process and is itself divided into +several stages, which will be discussed in more detail later on. + + +## SXM Multiplexing + +This section is about the design idea behind the additional layer of mutiplexing specifically +for Storage Xen Motion (SXM) from SRs using SMAPIv3. It is recommended that you have read the +[introduction doc](_index.md) for the storage layer first to understand how storage +multiplexing is done between SMAPIv2 and SMAPI{v1, v3} before reading this. + + +### Motivation + +The existing SXM code was designed to work only with SMAPIv1 SRs, and therefore +does not take into account the dramatic difference in the ways SXM is done between +SMAPIv1 and SMAPIv3. The exact difference will be covered later on in this doc, for this section +it is sufficient to assume that they have two ways of doing migration. Therefore, +we need different code paths for migration from SMAPIv1 and SMAPIv3. + +#### But we have storage_mux.ml + +Indeed, storage_mux.ml is responsible for multiplexing and forwarding requests to +the correct storage backend, based on the SR type that the caller specifies. And +in fact, for inbound SXM to SMAPIv3 (i.e. migrating into a SMAPIv3 SR, GFS2 for example), +storage_mux is doing the heavy lifting of multiplexing between different storage +backends. Every time a `Remote.` call is invoked, this will go through the SMAPIv2 +layer to the remote host and get multiplexed on the destination host, based on +whether we are migrating into a SMAPIv1 or SMAPIv3 SR (see the diagram below). +And the inbound SXM is implemented +by implementing the existing SMAPIv2 -> SMAPIv3 calls (see `import_activate` for example) +which may not have been implemented before. + +![mux for inbound](sxm_mux_inbound.svg) + +While this works fine for inbound SXM, it does not work for outbound SXM. A typical SXM +consists of four combinations, the source sr type (v1/v3) and the destiantion sr +type (v1/v3), any of the four combinations is possible. We have already covered the +destination multiplexing (v1/v3) by utilising storage_mux, and at this point we +have run out of multiplexer for multiplexing on the source. In other words, we +can only mutiplex once for each SMAPIv2 call, and we can either use that chance for +either the source or the destination, and we have already used it for the latter. + + +#### Thought experiments on an alternative design + +To make it even more concrete, let us consider an example: the mirroring logic in +SXM is different based on the source SR type of the SXM call. You might imagine +defining a function like `MIRROR.start v3_sr v1_sr` that will be multiplexed +by the storage_mux based on the source SR type, and forwarded to storage_smapiv3_migrate, +or even just xapi-storage-script, which is indeed quite possible. +Now at this point we have already done the multiplexing, but we still wish to +multiplex operations on destination SRs, for example, we might want to attach a +VDI belonging to a SMAPIv1 SR on the remote host. But as we have already done the +multiplexing and is now inside xapi-storage-script, we have lost any chance of doing +any further multiplexing :( + +### Design + +The idea of this new design is to introduce an additional multiplexing layer that +is specific for multiplexing calls based on the source SR type. For example, in +the diagram below the `send_start src_sr dest_sr` will take both the src SR and the +destination SR as parameters, and suppose the mirroring logic is different for different +types of source SRs (i.e. SMAPIv1 or SMAPIv3), the storage migration code will +necessarily choose the right code path based on the source SR type. And this is +exactly what is done in this additional multiplexing layer. The respective logic +for doing {v1,v3}-specifi mirroring, for example, will stay in storage_smapi{v1,v3}_migrate.ml + +![mux for outbound](sxm_mux_outbound.svg) + +Note that later on storage_smapi{v1,v3}_migrate.ml will still have the flexibility +to call remote SMAPIv2 functions, such as `Remote.VDI.attach dest_sr vdi`, and +it will be handled just as before. + +## SMAPIv1 migration + +This section is about migration from SMAPIv1 SRs to SMAPIv1 or SMAPIv3 SRs, since +the migration is driven by the source host, it is usally the source host that +determines most of the logic during a storage migration. + +First we take a look at an overview diagram of what happens during SMAPIv1 SXM: +the diagram is labelled with S1, S2 ... which indicates different stages of the migration. +We will talk about each stage in more detail below. + +![overview-v1](sxm-overview-v1.svg) + +### Preparation + +Before we can start our migration process, there are a number of preparations +needed to prepare for the following mirror. For SMAPIv1 this involves: + +1. Create a new VDI (called leaf) that will be used as the receiving VDI for all the new writes +2. Create a dummy snapshot of the VDI above to make sure it is a differencing disk and can be composed later on +3. Create a VDI (called parent) that will be used to receive the existing content of the disk (the snapshot) + +Note that the leaf VDI needs to be attached and activated on the destination host (to a non-exsiting `mirror_vm`) +since it will later on accept writes to mirror what is written on the source host. + +The parent VDI may be created in two different ways: 1. If there is a "similar VDI", +clone it on the destination host and use it as the parent VDI; 2. If there is no +such VDI, create a new blank VDI. The similarity here is defined by the distances +between different VDIs in the VHD tree, which is exploiting the internal representation +of the storage layer, hence we will not go into too much detail about this here. + +Once these preparations are done, a `mirror_receive_result` data structure is then +passed back to the source host that will contain all the necessary information about +these new VDIs, etc. + +### Establishing mirror + +At a high level, mirror establishment for SMAPIv1 works as follows: + +1. Take a snapshot of a VDI that is attached to VM1. This gives us an immutable +copy of the current state of the VDI, with all the data up until the point we took +the snapshot. This is illustrated in the diagram as a VDI and its snapshot connecting +to a shared parent, which stores the shared content for the snapshot and the writable +VDI from which we took the snapshot (snapshot) +2. Mirror the writable VDI to the server hosts: this means that all writes that goes to the +client VDI will also be written to the mirrored VDI on the remote host (mirror) +3. Copy the immutable snapshot from our local host to the remote (copy) +4. Compose the mirror and the snapshot to form a single VDI +5. Destroy the snapshot on the local host (cleanup) + +#### Mirror + +The mirroring process for SMAPIv1 is rather unconventional, so it is worth +documenting how this works. Instead of a conventional client server architecture, +where the source client connects to the destination server directly through the +NBD protocol in tapdisk, the connection is established in xapi and then passed +onto tapdisk. It was done in this rather unusual way mainly due to authentication +issues. Because it is xapi that is creating the connection, tapdisk does not need +to be concerned about authentication of the connection, thus simplifying the storage +component. This is reasonable as the storage component should focus on handling +storage requests rather than worrying about network security. + +The diagram below illustrates this prcess. First, xapi on the source host will +initiate an https request to the remote xapi. This request contains the necessary +information about the VDI to be mirrored, and the SR that contains it, etc. This +information is then passed onto the https handler on the destination host (called +`nbd_handler`) which then processes this information. Now the unusual step is that +both the source and the destination xapi will pass this connection onto tapdisk, +by sending the fd representing the socket connection to the tapdisk process. On +the source this would be nbd client process of tapdisk, and on the destination +this would be the nbd server process of the tapdisk. After this step, we can consider +a client-server connection is established between two tapdisks on the client and +server, as if the tapdisk on the source host makes a request to the tapdisk on the +destination host and initiates the connection. On the diagram, this is indicated +by the dashed lines between the tapdisk processes. Logically, we can view this as +xapi creates the connection, and then passes this connection down into tapdisk. + +![mirror](sxm-mirror-v1.svg) + +#### Snapshot + +The next step would be create a snapshot of the VDI. This is easily done as a +`VDI.snapshot` operation. If the VDI was in VHD format, then internally this would +create two children for, one for the snapshot, which only contains the metadata +information and tends to be small, the other for the writable VDI where all the +new writes will go to. The shared base copy contains the shared blocks. + +![snapshot](sxm-snapshot-v1.svg) + +#### Copy and compose + +Once the snapshot is created, we can then copy the snapshot from the source +to the destination. This step is done by `sparse_dd` using the nbd protocol. This +is also the step that takes the most time to complete. + +`sparse_dd` is a process forked by xapi that does the copying of the disk blocks. +`sparse_dd` can supports a number of protocols, including nbd. In this case, `sparse_dd` +will initiate an https put request to the destination host, with a url of the form +`
/services/SM/nbdproxy//`. This https request then +gets handled by the https handler on the destination host B, which will then spawn +a handler thread. This handler will find the +"generic" nbd server[^2] of either tapdisk or qemu-dp, depending on the destination +SR type, and then start proxying data between the https connection socket and the +socket connected to the nbd server. + +[^2]: The server is generic because it does not accept fd passing, and I call those +"special" nbd server/fd receiver. + +![sxm new copy](sxm-new-copy-v1.svg) + +Once copying is done, the snapshot and mirrored VDI can be then composed into a +single VDI. + +#### Finish + +At this point the VDI is synchronised to the new host! Mirror is still working at this point +though because that will not be destroyed until the VM itself has been migrated +as well. Some cleanups are done at this point, such as deleting the snapshot +that is taken on the source, destroying the mirror datapath, etc. + +The end results look like the following. Note that VM2 is in dashed line as it +is not yet created yet. The next steps would be to migrate the VM1 itself to the +destination as well, but this is part of the VM migration process and will not +be covered here. + +![final](sxm-final-v1.svg) + + +## SMAPIv3 migration + +This section covers the mechanism of migrations *from* SRs using SMAPIv3 (to +SMAPIv1 or SMAPIv3). Although the core ideas are the same, SMAPIv3 has a rather +different mechanism for mirroring: 1. it does not require xapi to take snapshot +of the VDI anymore, since the mirror itself will take care of replicating the +existing data to the destination; 2. there is no fd passing for connection establishment anymore, and instead proxies are used for connection setup. + +### Preparation + +The preparation work for SMAPIv3 is greatly simplified by the fact that the mirror +at the storage layer will copy the existing data in the VDI to the destination. +This means that snapshot of the source VDI is not required anymore. So we are left +with only one thing: + +1. Create a VDI used for mirroring the data of the source VDI + +For this reason, the implementation logic for SMAPIv3 preparation is also shorter, +as the complexity is now handled by the storage layer, which is where it is supposed +to be handled. + +### Establishing mirror + +The other significant difference is that the storage backend for SMAPIv3 `qemu-dp` +SRs no longer accepts fds, so xapi needs to proxy the data between two nbd client +and nbd server. + +SMAPIv3 provides the `Data.mirror uri domain remote` which needs three parameters: +`uri` for accessing the local disk, `doamin` for the domain slice on which mirroring +should happen, and most importantly for this design, a `remote` url which represents +the remote nbd server to which the blocks of data can be sent to. + +This function itself, when called by xapi and forwarded to the storage layer's qemu-dp +nbd client, will initiate a nbd connection to the nbd server pointed to by `remote`. +This works fine when the storage migration happens entirely within a local host, +where qemu-dp's nbd client and nbd server can communicate over unix domain sockets. +However, it does not work for inter-host migrations as qemu-dp's nbd server is not +exposed publicly over the network (just as tapdisk's nbd server). Therefore a proxying +service on the source host is needed for forwarding the nbd connection from the +source host to the destination host. And it would be the responsiblity of +xapi to manage this proxy service. + +The following diagram illustrates the mirroring process of a single VDI: + +![sxm mirror](sxm-mirror-v3.svg) + +The first step for xapi is then to set up a nbd proxy thread that will be listening +on a local unix domain socket with path `/var/run/nbdproxy/export/` where +domain is the `domain` parameter mentioned above in `Data.mirror`. The nbd proxy +thread will accept nbd connections (or rather any connections, it does not +speak/care about nbd protocol at all) and sends an https put request +to the remote xapi. The proxy itself will then forward the data exactly as it is +to the remote side through the https connection. + +Once the proxy is set up, xapi will call `Data.mirror`, which +will be forwarded to the xapi-storage-script and is further forwarded to the qemu-dp. +This call contains, among other parameters, the destination NBD server url (`remote`) +to be connected. In this case the destination nbd server is exactly the domain +socket to which the proxy thread is listening. Therefore the `remote` parameter +will be of the form `nbd+unix:///?socket=` where the export is provided +by the destination nbd server that represents the VDI prepared on the destination +host, and the socket will be the path of the unix domain socket where the proxy +thread (which we just created) is listening at. + +When this connection is set up, the proxy process will talk to the remote xapi via +https requests, and on the remote side, an https handler will proxy this request to +the appropriate nbd server of either tapdisk or qemu-dp, using exactly the same +[import proxy](#copy-and-compose) as mentioned before. + +Note that this proxying service is tightly integrated with outbound SXM of SMAPIv3 +SRs. This is to make it simple to focus on the migration itself. + +Although there is no need to explicitly copy the VDI anymore, we still need to +transfer the data and wait for it finish. For this we use `Data.stat` call provided +by the storage backend to query the status of the mirror, and wait for it to finish +as needed. + +#### Limitations + +This way of establishing the connection simplifies the implementation of the migration +for SMAPIv3, but it also has limitations: + +One proxy per live VDI migration is needed, which can potentially consume lots of resources in dom0, and we should measure the impact of this before we switch to using more resource-efficient ways such as wire guard that allows establishing a single connection between multiple hosts. + + +### Finish + +As there is no need to copy a VDI, there is also no need to compose or delete the +snapshot. The cleanup procedure would therefore just involve destroy the datapath +that was used for receiving writes for the mirrored VDI. + +## Error Handling + +Storage migration is a long-running process, and is prone to failures in each +step. Hence it is important specifying what errors could be raised at each step +and their significance. This is beneficial both for the user and for triaging. + +There are two general cleanup functions in SXM: `MIRROR.receive_cancel` and +`MIRROR.stop`. The former is for cleaning up whatever has been created by `MIRROR.receive_start` +on the destination host (such as VDIs for receiving mirrored data). The latter is +a more comprehensive function that attempts to "undo" all the side effects that +was done during the SXM, and also calls `receive_cancel` as part of its operations. + +Currently error handling was done by building up a list of cleanup functions in +the `on_fail` list ref as the function executes. For example, if the `receive_start` +has been completed successfully, add `receive_cancel` to the list of cleanup functions. +And whenever an exception is encountered, just execute whatever has been added +to the `on_fail` list ref. This is convenient, but does entangle all the error +handling logic with the core SXM logic itself, making the code rather than hard +to understand and maintain. + +The idea to fix this is to introduce explicit "stages" during the SXM and define +explicitly what error handling should be done if it fails at a certain stage. This +helps separate the error handling logic into the `with` part of a `try with` block, +which is where they are supposed to be. Since we need to accommodate the existing +SMAPIv1 migration (which has more stages than SMAPIv3), the following stages are +introduced: preparation (v1,v3), snapshot(v1), mirror(v1, v3), copy(v1). Note that +each stage also roughly corresponds to a helper function that is called within `Storage_migrate.start`, +which is the wrapper function that initiates storage migration. And each helper +functions themselves would also have error handling logic within themselves as +needed (e.g. see `Storage_smapiv1_migrate.receive_start`) to deal with exceptions +that happen within each helper functions. + +### Preparation (SMAPIv1 and SMAPIv3) + +The preparation stage generally corresponds to what is done in `receive_start`, and +this function itself will handle exceptions when there are partial failures within +the function itself, such as an exception after the receiving VDI is created. +It will use the old-style `on_fail` function but only with a limited scope. + +There is nothing to be done at a higher level (i.e within `MIRROR.start` which +calls `receive_start`) if preparation has failed. + +### Snapshot and mirror failure (SMAPIv1) + +For SMAPIv1, the mirror is done in a bit cumbersome way. The end goal is to establish +connections between two tapdisk processes on the source and destination hosts. +To achieve this goal, xapi will do two main jobs: 1. create a connection between two +hosts and pass the connection to tapdisk; 2. create a snapshot as a starting point +of the mirroring process. + +Therefore handling of failures at these two stages are similar: clean up what was +done in the preparation stage by calling `receive_cancel`, and that is almost it. +Again, we will leave whatever is needed for partial failure handling within those +functions themselves and only clean up at a stage-level in `storage_migrate.ml` + +Note that `receive_cancel` is a multiplexed function for SMAPIv1 and SMAPIv3, which +means different clean up logic will be executed depending on what type of SR we +are migrating from. + +### Mirror failure (SMAPIv3) + +The `Data.stat` call in SMAPIv3 returns a data structure that includes the current +progress of the mirror job, whether it has completed syncing the existing data and +whether the mirorr has failed. Similar to how it is done in SMAPIv1, we wait for +the sync to complete once we issue the `Data.mirror` call, by repeatedly polling +the status of the mirror using the `Data.stat` call. During this process, the status +of the mirror is also checked and if a failure is detected, a `Migration_mirror_failure` +will be raised and then gets handled by the code in `storage_migrate.ml` by calling +`Storage_smapiv3_migrate.receive_cancel2`, which will clean up the mirror datapath +and destroy the mirror VDI, similar to what is done in SMAPIv1. + + +### Copy failure (SMAPIv1) + +The final step of storage migration for SMAPIv1 is to copy the snapshot from the +source to the destination. At this stage, most of the side effectful work has been +done, so we do need to call `MIRROR.stop` to clean things up if we experience an +failure during copying. + + +## SMAPIv1 Migration implementation detail + +{{% notice info %}} +The following doc refers to the xapi a [version](https://github.com/xapi-project/xen-api/blob/v24.37.0/ocaml/xapi/storage_migrate.ml) +of xapi that is before 24.37 after which point this code structure has undergone +many changes as part of adding support for SMAPIv3 SXM. Therefore the following +tutorial might be less relevant in terms of the implementation detail. Although +the general principle should remain the same. +{{% /notice %}} + +```mermaid sequenceDiagram participant local_tapdisk as local tapdisk participant local_smapiv2 as local SMAPIv2 @@ -129,7 +563,7 @@ opt post_detach_hook end Note over xapi: memory image migration by xenopsd Note over xapi: destroy the VM record -{{< /mermaid >}} +``` ### Receiving SXM @@ -162,7 +596,7 @@ the receiving end of storage motion: This is how xapi coordinates storage migration. We'll do it as a code walkthrough through the two layers: xapi and storage-in-xapi (SMAPIv2). -## Xapi code +### Xapi code The entry point is in [xapi_vm_migration.ml](https://github.com/xapi-project/xen-api/blob/f75d51e7a3eff89d952330ec1a739df85a2895e2/ocaml/xapi/xapi_vm_migrate.ml#L786) @@ -1056,7 +1490,7 @@ We also try to remove the VM record from the destination if we managed to send i Finally we check for mirror failure in the task - this is set by the events thread watching for events from the storage layer, in [storage_access.ml](https://github.com/xapi-project/xen-api/blob/f75d51e7a3eff89d952330ec1a739df85a2895e2/ocaml/xapi/storage_access.ml#L1169-L1207) -## Storage code +### Storage code The part of the code that is conceptually in the storage layer, but physically in xapi, is located in [storage_migrate.ml](https://github.com/xapi-project/xen-api/blob/f75d51e7a3eff89d952330ec1a739df85a2895e2/ocaml/xapi/storage_migrate.ml). There are logically a few separate parts to this file: @@ -1069,7 +1503,7 @@ The part of the code that is conceptually in the storage layer, but physically i Let's start by considering the way the storage APIs are intended to be used. -### Copying a VDI +#### Copying a VDI `DATA.copy` takes several parameters: @@ -1119,7 +1553,7 @@ The implementation uses the `url` parameter to make SMAPIv2 calls to the destina The implementation tries to minimize the amount of data copied by looking for related VDIs on the destination SR. See below for more details. -### Mirroring a VDI +#### Mirroring a VDI `DATA.MIRROR.start` takes a similar set of parameters to that of copy: @@ -1156,11 +1590,11 @@ Note that state is a list since the initial phase of the operation requires both Additionally the mirror can be cancelled using the `MIRROR.stop` API call. -### Code walkthrough +#### Code walkthrough let's go through the implementation of `copy`: -#### DATA.copy +##### DATA.copy ```ocaml let copy ~task ~dbg ~sr ~vdi ~dp ~url ~dest = @@ -1296,7 +1730,7 @@ Finally we snapshot the remote VDI to ensure we've got a VDI of type 'snapshot' The exception handler does nothing - so we leak remote VDIs if the exception happens after we've done our cloning :-( -#### DATA.copy_into +##### DATA.copy_into Let's now look at the data-copying part. This is common code shared between `VDI.copy`, `VDI.copy_into` and `MIRROR.start` and hence has some duplication of the calls made above. @@ -1467,7 +1901,7 @@ The last thing we do is to set the local and remote content_id. The local set_co Here we perform the list of cleanup operations. Theoretically. It seems we don't ever actually set this to anything, so this is dead code. -#### DATA.MIRROR.start +##### DATA.MIRROR.start ```ocaml let start' ~task ~dbg ~sr ~vdi ~dp ~url ~dest = @@ -1765,3 +2199,4 @@ let pre_deactivate_hook ~dbg ~dp ~sr ~vdi = s.failed <- true ) ``` + diff --git a/doc/content/xapi/storage/sxm/sxm-final-v1.svg b/doc/content/xapi/storage/sxm/sxm-final-v1.svg new file mode 100644 index 00000000000..7cdb2d540a3 --- /dev/null +++ b/doc/content/xapi/storage/sxm/sxm-final-v1.svg @@ -0,0 +1,4 @@ + + + +
VM1
Host1
VDI
Host2
VDI
VM2
SR1
Mirror
SR2
\ No newline at end of file diff --git a/doc/content/xapi/storage/sxm/sxm-mirror-v1.svg b/doc/content/xapi/storage/sxm/sxm-mirror-v1.svg new file mode 100644 index 00000000000..4b6f61131c5 --- /dev/null +++ b/doc/content/xapi/storage/sxm/sxm-mirror-v1.svg @@ -0,0 +1,4 @@ + + + +
xapi
xapi
VDI
VDI
xapi
xapi
tapdisk
tapdisk
Host A
Host A
Host B
Host B
http connection
http connection
pass client socket of the http connection
via SCM_RIGHTS
pass client socket o...
tapdisk
tapdisk
http handler
http handler
pass server socket of the http connection
pass server socket o...
VDI
VDI
mirror
mirror
Text is not SVG - cannot display
\ No newline at end of file diff --git a/doc/content/xapi/storage/sxm/sxm-mirror-v3.svg b/doc/content/xapi/storage/sxm/sxm-mirror-v3.svg new file mode 100644 index 00000000000..8ed03406acc --- /dev/null +++ b/doc/content/xapi/storage/sxm/sxm-mirror-v3.svg @@ -0,0 +1,4 @@ + + + +
xapi
xapi
Source Host A
Destination Host B
tapdisk
qemu-dp
generic nbd server
generic nbd server
xapi-storage-script
Data.mirror 
qemu-dp 
nbd client
Data.mirror 
nbd exporting proxy
http handler
http request
nbd import proxy
Legend
belongs/spawns
talks to
\ No newline at end of file diff --git a/doc/content/xapi/storage/sxm/sxm-new-copy-v1.svg b/doc/content/xapi/storage/sxm/sxm-new-copy-v1.svg new file mode 100644 index 00000000000..891913850d3 --- /dev/null +++ b/doc/content/xapi/storage/sxm/sxm-new-copy-v1.svg @@ -0,0 +1,4 @@ + + + +
xapi
xapi
Host A
Host B
tapdisk
http connection
qemu-dp
generic nbd server
generic nbd server
proxy
sparse_dd
http handler
\ No newline at end of file diff --git a/doc/content/xapi/storage/sxm/sxm-overview-v1.svg b/doc/content/xapi/storage/sxm/sxm-overview-v1.svg new file mode 100644 index 00000000000..b6002382db2 --- /dev/null +++ b/doc/content/xapi/storage/sxm/sxm-overview-v1.svg @@ -0,0 +1,4 @@ + + + +
VM1
Host1
VDI
VDI snapshot
Host2
VDI
VDI snapshot
VM2
SR1
SR2
S2:Mirror
S1:Snapshot
S3: Copy
S4: Compose
\ No newline at end of file diff --git a/doc/content/xapi/storage/sxm/sxm-snapshot-v1.svg b/doc/content/xapi/storage/sxm/sxm-snapshot-v1.svg new file mode 100644 index 00000000000..5fe0f398c17 --- /dev/null +++ b/doc/content/xapi/storage/sxm/sxm-snapshot-v1.svg @@ -0,0 +1,4 @@ + + + +
VDI
VDI snapshot
base
\ No newline at end of file diff --git a/doc/content/xapi/storage/sxm/sxm_mux_inbound.svg b/doc/content/xapi/storage/sxm/sxm_mux_inbound.svg new file mode 100644 index 00000000000..c38bc36ae5f --- /dev/null +++ b/doc/content/xapi/storage/sxm/sxm_mux_inbound.svg @@ -0,0 +1,4 @@ + + + +
Storage_migrate.start
Host A
Host B
v1
v3
storage_mux server
Remote.VDI.attach
storage_mux server
rpc
\ No newline at end of file diff --git a/doc/content/xapi/storage/sxm/sxm_mux_outbound.svg b/doc/content/xapi/storage/sxm/sxm_mux_outbound.svg new file mode 100644 index 00000000000..915cc7550e3 --- /dev/null +++ b/doc/content/xapi/storage/sxm/sxm_mux_outbound.svg @@ -0,0 +1,4 @@ + + + +
Storage_migrate.start
Host A
Host B
storage_mux server
Remote.VDI.attach
vdi dst_sr
MIRROR.send_start
src_sr dst_sr
storage_smapiv1_migrate.
send_start
storage_smapiv3_migrate.
send_start
RPC to host B
....
new multiplexing layer
mux based on src_sr
\ No newline at end of file diff --git a/dune-project b/dune-project index af6364148da..8b720b99442 100644 --- a/dune-project +++ b/dune-project @@ -1,764 +1,875 @@ (lang dune 3.15) -(formatting (enabled_for ocaml)) +(formatting + (enabled_for ocaml)) + (using menhir 2.0) + (using directory-targets 0.1) +(opam_file_location inside_opam_directory) + (cram enable) + (implicit_transitive_deps false) + (generate_opam_files true) (name "xapi") -(source (github xapi-project/xen-api)) -(license "LGPL-2.1-only WITH OCaml-LGPL-linking-exception") -(authors "xen-api@lists.xen.org") -(maintainers "Xapi project maintainers") -(homepage "https://xapi-project.github.io/") - -(package - (name zstd) -) - - -(package - (name clock) - (synopsis "Xapi's library for managing time") - (authors "Jonathan Ludlam" "Pau Ruiz Safont") - (depends - (ocaml (>= 4.12)) - (alcotest :with-test) - astring - fmt - mtime - ptime - (xapi-log (= :version)) - (qcheck-core :with-test) - (qcheck-alcotest :with-test) - ) -) - -(package - (name tgroup) - (depends - xapi-log - xapi-stdext-unix) -) -(package - (name xml-light2) -) - -(package - (name xapi-sdk) - (license "BSD-2-Clause") - (synopsis "Xen API SDK generation code") - (depends - (alcotest :with-test) - astring - (fmt :with-test) - mustache - (xapi-datamodel (= :version)) - (xapi-stdext-unix (and (= :version) :with-test)) - (xapi-test-utils :with-test) - ) - (allow_empty) -) -(package - (name xen-api-client-lwt) -) - - -(package - (name xen-api-client) - (synopsis "Xen-API client library for remotely-controlling a xapi host") - (authors "David Scott" "Anil Madhavapeddy" "Jerome Maloberti" "John Else" "Jon Ludlam" "Thomas Sanders" "Mike McClurg") - (depends - (alcotest :with-test) - astring - (cohttp (>= "0.22.0")) - re - rpclib - uri - (uuid (= :version)) - (xapi-client (= :version)) - (xapi-idl (= :version)) - (xapi-rrd (= :version)) - (xapi-types (= :version)) - xmlm - ) -) - -(package - (name xe) -) - -(package - (name xapi-types) -) - -(package - (name xapi-tracing) - (depends - ocaml - dune - (alcotest :with-test) - (fmt :with-test) - ppx_deriving_yojson - re - uri - (uuid :with-test) - (xapi-log (= :version)) - (xapi-stdext-threads (= :version)) - yojson - ) - (synopsis "Allows to instrument code to generate tracing information") - (description "This library provides modules to allow gathering runtime traces.") -) - -(package - (name xapi-tracing-export) - (depends - ocaml - cohttp-posix - dune - cohttp - ptime - result - rresult - rpclib - ppx_deriving_rpc - uri - (xapi-log (= :version)) - (xapi-open-uri (= :version)) - (xapi-stdext-threads (= :version)) - (xapi-stdext-unix (= :version)) - (xapi-tracing (= :version)) - (zstd (= :version)) - ) - (synopsis "Export traces in multiple protocols and formats") - (description "This library export traces is able to push traces to http endpoints or generate compressed tarballs in the filesystem.") -) - -(package - (name xapi-storage-script) -) - -(package - (name xapi-storage-cli) -) +(source + (github xapi-project/xen-api)) -(package - (name xapi-storage) -) - -(package - (name xapi-schema) -) - -(package - (name rrdd-plugin) - (synopsis "A plugin library for the xapi performance monitoring daemon") - (description "This library allows one to expose a datasource which can then be sampled by the performance monitoring daemon.") - (depends - ocaml - astring - rpclib - (rrd-transport (= :version)) - (xapi-forkexecd (= :version)) - (xapi-stdext-pervasives (= :version)) - (xapi-stdext-std (= :version)) - (xapi-stdext-threads (= :version)) - (xapi-stdext-unix (= :version)) - (xapi-idl (= :version)) - xenstore - xenstore_transport - ) -) - -(package - (name xapi-open-uri) -) - -(package - (name xapi-nbd) -) - -(package - (name xapi-log) -) - -(package - (name xapi-idl) -) - -(package - (name xapi-forkexecd) - (synopsis "Sub-process control service for xapi") - (description "This daemon creates and manages sub-processes on behalf of xapi.") - (depends - astring - (forkexec (= :version)) - (uuid (= :version)) - (xapi-stdext-unix (= :version)) - ) -) - -(package - (name xapi-expiry-alerts) -) - -(package - (name xapi-datamodel) -) - -(package - (name xapi-consts) -) - -(package - (name xapi-compression) -) - -(package - (name xapi-client) -) - -(package - (name xapi-cli-protocol) -) +(license "LGPL-2.1-only WITH OCaml-LGPL-linking-exception") -(package - (name xapi-debug) - (synopsis "Debugging tools for XAPI") - (description "Tools installed into the non-standard /opt/xensource/debug location") - (depends - alcotest - angstrom - astring - base64 - cmdliner - cohttp - cstruct - ctypes - domain-name - fd-send-recv - fmt - hex - integers - ipaddr - logs - magic-mime - mirage-crypto - mirage-crypto-pk - mirage-crypto-rng - mtime - pci - polly - ppx_deriving - ppx_deriving_rpc - ppx_sexp_conv - psq - ptime - qcheck-alcotest - qcheck-core - re - result - rpclib - rresult - sexplib - sexplib0 - sha - tar - tar-unix - uri - uuidm - uutf - x509 - xapi-backtrace - xapi-log - xapi-types - xapi-stdext-pervasives - xapi-stdext-unix - xen-api-client - xen-api-client-lwt - xenctrl - xenstore_transport - xmlm - yojson - ) -) +(authors "xen-api@lists.xen.org") -(package - (name xapi-tools) - (synopsis "Various daemons and CLI applications required by XAPI") - (description "Includes message-switch, xenopsd, forkexecd, ...") - (depends - astring - base64 - cmdliner - cstruct-unix - fmt - logs - lwt - mtime - netlink - qmp - re - result - rpclib - rresult - uri - xenctrl - xmlm - yojson - ; can't use '= version' here yet, - ; 'xapi-tools' will have version ~dev, not 'master' like all the others - ; because it is not in xs-opam yet - rrd-transport - xapi-tracing-export - xen-api-client - (alcotest :with-test) - (ppx_deriving_rpc :with-test) - (qcheck-core :with-test) - (xapi-test-utils :with-test) - (xenstore_transport :with-test) - ) -) +(maintainers "Xapi project maintainers") -(package - (name xapi) - (synopsis "The toolstack daemon which implements the XenAPI") - (description "This daemon exposes the XenAPI and is used by clients such as 'xe' and 'XenCenter' to manage clusters of Xen-enabled hosts.") - (depends - (ocaml (>= 4.09)) - (alcotest :with-test) - angstrom - astring - base-threads - base64 - (bos :with-test) - cdrom - (clock (= :version)) - cmdliner - cohttp - conf-pam - (crowbar :with-test) - cstruct - ctypes - ctypes-foreign - domain-name - (ezxenstore (= :version)) - fmt - fd-send-recv - hex - (http-lib (and :with-test (= :version))) ; the public library is only used for testing - integers - ipaddr - logs - magic-mime - mirage-crypto - mirage-crypto-pk - (mirage-crypto-rng (>= "0.11.0")) - (message-switch-unix (= :version)) - mtime - opentelemetry-client-ocurl - pci - (pciutil (= :version)) - polly - ppx_deriving_rpc - ppx_sexp_conv - ppx_deriving - psq - ptime - qcheck-alcotest - qcheck-core - re - result - rpclib - (rrdd-plugin (= :version)) - rresult - sexpr - sexplib - sexplib0 - sha - (stunnel (= :version)) - tar - tar-unix - uri - tgroup - (uuid (= :version)) - uutf - uuidm - x509 - xapi-backtrace - (xapi-client (= :version)) - (xapi-cli-protocol (= :version)) - (xapi-consts (= :version)) - (xapi-datamodel (= :version)) - (xapi-expiry-alerts (= :version)) - (xapi-idl (= :version)) - (xapi-inventory (= :version)) - (xapi-log (= :version)) - (xapi-stdext-pervasives (= :version)) - (xapi-stdext-std (= :version)) - (xapi-stdext-threads (= :version)) - (xapi-stdext-unix (= :version)) - (xapi-stdext-zerocheck (= :version)) - (xapi-test-utils :with-test) - (xapi-tracing (= :version)) - (xapi-tracing-export (= :version)) - (xapi-types (= :version)) - (xen-api-client-lwt (= :version)) - xenctrl ; for quicktest - xenstore_transport - xmlm - (xml-light2 (= :version)) - yojson - (zstd (= :version)) - ) -) +(homepage "https://xapi-project.github.io/") (package - (name vhd-tool) - (synopsis "Manipulate .vhd files") - (tags ("org.mirage" "org:xapi-project")) + (name zstd)) + +(package + (name clock) + (synopsis "Xapi's library for managing time") + (authors "Jonathan Ludlam" "Pau Ruiz Safont") + (depends + (ocaml + (>= 4.12)) + (alcotest :with-test) + astring + fmt + mtime + ptime + (xapi-log + (= :version)) + (qcheck-core :with-test) + (qcheck-alcotest :with-test))) + +(package + (name tgroup) + (depends xapi-log xapi-stdext-unix)) + +(package + (name xml-light2)) + +(package + (name xapi-sdk) + (license "BSD-2-Clause") + (synopsis "Xen API SDK generation code") + (depends + (alcotest :with-test) + astring + (fmt :with-test) + mustache + (xapi-datamodel + (= :version)) + (xapi-stdext-unix + (and + (= :version) + :with-test)) + (xapi-test-utils :with-test)) + (allow_empty)) + +(package + (name xen-api-client-lwt)) + +(package + (name xen-api-client) + (synopsis "Xen-API client library for remotely-controlling a xapi host") + (authors + "David Scott" + "Anil Madhavapeddy" + "Jerome Maloberti" + "John Else" + "Jon Ludlam" + "Thomas Sanders" + "Mike McClurg") + (depends + (alcotest :with-test) + astring + (cohttp + (>= "0.22.0")) + re + rpclib + uri + (uuid + (= :version)) + (xapi-client + (= :version)) + (xapi-idl + (= :version)) + (xapi-rrd + (= :version)) + (xapi-types + (= :version)) + xmlm)) + +(package + (name xe)) + +(package + (name xapi-types)) + +(package + (name xapi-tracing) + (depends + ocaml + dune + (alcotest :with-test) + (fmt :with-test) + ppx_deriving_yojson + re + uri + (uuid :with-test) + (xapi-log + (= :version)) + (xapi-stdext-threads + (= :version)) + yojson) + (synopsis "Allows to instrument code to generate tracing information") + (description + "This library provides modules to allow gathering runtime traces.")) + +(package + (name xapi-tracing-export) + (depends + ocaml + cohttp-posix + dune + cohttp + ptime + result + rresult + rpclib + ppx_deriving_rpc + uri + (xapi-log + (= :version)) + (xapi-open-uri + (= :version)) + (xapi-stdext-threads + (= :version)) + (xapi-stdext-unix + (= :version)) + (xapi-tracing + (= :version)) + (zstd + (= :version))) + (synopsis "Export traces in multiple protocols and formats") + (description + "This library export traces is able to push traces to http endpoints or generate compressed tarballs in the filesystem.")) + +(package + (name xapi-storage-script)) + +(package + (name xapi-storage-cli) + (depends + cmdliner + re + rpclib + ppx_deriving_rpc + (xapi-client + (= :version)) + (xapi-idl + (= :version)) + (xapi-types + (= :version))) + (synopsis "A CLI for xapi storage services") + (description + "The CLI allows you to directly manipulate virtual disk images, without them being attached to VMs.")) + +(package + (name xapi-storage)) + +(package + (name xapi-schema)) + +(package + (name rrdd-plugin) + (synopsis "A plugin library for the xapi performance monitoring daemon") + (description + "This library allows one to expose a datasource which can then be sampled by the performance monitoring daemon.") + (depends + ocaml + astring + rpclib + (rrd-transport + (= :version)) + (xapi-forkexecd + (= :version)) + (xapi-stdext-pervasives + (= :version)) + (xapi-stdext-std + (= :version)) + (xapi-stdext-threads + (= :version)) + (xapi-stdext-unix + (= :version)) + (xapi-idl + (= :version)) + xenstore + xenstore_transport)) + +(package + (name xapi-open-uri)) + +(package + (name xapi-nbd)) + +(package + (name xapi-log) + (synopsis "A Logs library required by xapi") + (description + "This package is provided for backwards compatibility only. No new package should use it.") + (depends + astring + fmt + logs + mtime + xapi-backtrace + (xapi-stdext-pervasives (= :version)))) + +(package + (name xapi-idl)) + +(package + (name xapi-forkexecd) + (synopsis "Sub-process control service for xapi") + (description + "This daemon creates and manages sub-processes on behalf of xapi.") + (depends + astring + (forkexec + (= :version)) + (uuid + (= :version)) + (xapi-stdext-unix + (= :version)))) + +(package + (name xapi-expiry-alerts)) + +(package + (name xapi-datamodel)) + +(package + (name xapi-consts)) + +(package + (name xapi-compression)) + +(package + (name xapi-client)) + +(package + (name xapi-cli-protocol)) + +(package + (name xapi-debug) + (synopsis "Debugging tools for XAPI") + (description + "Tools installed into the non-standard /opt/xensource/debug location") + (depends + alcotest + angstrom + astring + base64 + cmdliner + cohttp + cstruct + ctypes + domain-name + fd-send-recv + fmt + hex + integers + ipaddr + logs + magic-mime + mirage-crypto + mirage-crypto-pk + mirage-crypto-rng + mtime + pci + polly + ppx_deriving + ppx_deriving_rpc + ppx_sexp_conv + psq + ptime + qcheck-alcotest + qcheck-core + re + result + rpclib + rrdd-plugin + rresult + sexplib + sexplib0 + sha + tar + tar-unix + uri + uuidm + uutf + x509 + xapi-backtrace + xapi-log + xapi-types + xapi-stdext-pervasives + xapi-stdext-unix + xen-api-client + xen-api-client-lwt + xenctrl + xenstore_transport + xmlm + yojson)) + +(package + (name xapi-tools) + (synopsis "Various daemons and CLI applications required by XAPI") + (description "Includes message-switch, xenopsd, forkexecd, ...") + (depends + astring + base64 + cmdliner + cstruct-unix + fmt + logs + lwt + mtime + netlink + qmp + re + result + rpclib + rresult + uri + tyre + xenctrl + xmlm + yojson + ; can't use '= version' here yet, + ; 'xapi-tools' will have version ~dev, not 'master' like all the others + ; because it is not in xs-opam yet + rrd-transport + rrdd-plugin + xapi-tracing-export + xen-api-client + (alcotest :with-test) + (ppx_deriving_rpc :with-test) + (qcheck-core :with-test) + (xapi-test-utils :with-test) + (xenstore_transport :with-test))) + +(package + (name xapi) + (synopsis "The toolstack daemon which implements the XenAPI") + (description + "This daemon exposes the XenAPI and is used by clients such as 'xe' and 'XenCenter' to manage clusters of Xen-enabled hosts.") + (depends + (ocaml + (>= 4.09)) + (alcotest :with-test) + angstrom + astring + base-threads + base64 + (bos :with-test) + cdrom + (clock + (= :version)) + cmdliner + cohttp + conf-pam + (crowbar :with-test) + cstruct + ctypes + ctypes-foreign + domain-name + (ezxenstore + (= :version)) + fmt + fd-send-recv + hex + (http-lib + (and + :with-test + (= :version))) ; the public library is only used for testing + integers + ipaddr + logs + magic-mime + mirage-crypto + mirage-crypto-pk + (mirage-crypto-rng + (>= "0.11.0")) + (message-switch-unix + (= :version)) + mtime + opentelemetry-client-ocurl + pci + (pciutil + (= :version)) + polly + ppx_deriving_rpc + ppx_sexp_conv + ppx_deriving + psq + ptime + qcheck-alcotest + qcheck-core + re + result + rpclib + (rrdd-plugin + (= :version)) + rresult + sexpr + sexplib + sexplib0 + sha + (stunnel + (= :version)) + tar + tar-unix + uri + tgroup + (uuid + (= :version)) + uutf + uuidm + x509 + xapi-backtrace + (xapi-client + (= :version)) + (xapi-cli-protocol + (= :version)) + (xapi-consts + (= :version)) + (xapi-datamodel + (= :version)) + (xapi-expiry-alerts + (= :version)) + (xapi-idl + (= :version)) + (xapi-inventory + (= :version)) + (xapi-log + (= :version)) + (xapi-stdext-pervasives + (= :version)) + (xapi-stdext-std + (= :version)) + (xapi-stdext-threads + (= :version)) + (xapi-stdext-unix + (= :version)) + (xapi-stdext-zerocheck + (= :version)) + (xapi-test-utils :with-test) + (xapi-tracing + (= :version)) + (xapi-tracing-export + (= :version)) + (xapi-types + (= :version)) + (xen-api-client-lwt + (= :version)) + xenctrl ; for quicktest + xenstore_transport + xmlm + (xml-light2 + (= :version)) + yojson + (zstd + (= :version)))) + +(package + (name vhd-tool) + (synopsis "Manipulate .vhd files") + (tags + ("org.mirage" "org:xapi-project")) + (depends + (alcotest-lwt :with-test) + astring + bigarray-compat + cmdliner + cohttp + cohttp-lwt + conf-libssl + (cstruct + (>= "3.0.0")) + (ezxenstore + (= :version)) + (forkexec + (= :version)) + io-page + lwt + lwt_ssl + nbd + nbd-unix + ppx_cstruct + ppx_deriving_rpc + re + result + rpclib + ssl + sha + tar + uri + (vhd-format + (= :version)) + (vhd-format-lwt + (= :version)) + (xapi-idl + (= :version)) + (xapi-log + (= :version)) + (xen-api-client-lwt + (= :version)) + xenstore + xenstore_transport)) + +(package + (name vhd-format)) + +(package + (name vhd-format-lwt) + (synopsis "Lwt interface to read/write VHD format data") + (description + "A pure OCaml library to read and write\n[vhd](http://en.wikipedia.org/wiki/VHD_(file_format)) format data, plus a\nsimple command-line tool which allows vhd files to be interrogated,\nmanipulated, format-converted and streamed to and from files and remote\nservers.\n\nThis package provides an Lwt compatible interface to the library.") + (authors "Jon Ludlam" "Dave Scott") + (maintainers "Dave Scott ") + (tags + ("org:mirage" "org:xapi-project")) + (homepage "https://github.com/mirage/ocaml-vhd") + (source + (github mirage/ocaml-vhd)) + (depends + (ocaml + (>= "4.10.0")) + (alcotest :with-test) + (alcotest-lwt + (and + :with-test + (>= "1.0.0"))) + (bigarray-compat + (>= "1.1.0")) + (cstruct + (>= "6.0.0")) + cstruct-lwt + (fmt :with-test) + (lwt + (>= "3.2.0")) + (mirage-block + (>= "3.0.0")) + (rresult + (>= "0.7.0")) + (vhd-format + (= :version)) + (io-page + (and + :with-test + (>= "2.4.0"))))) + +(package + (name qcow-stream-tool) + (synopsis "Minimal CLI wrapper for qcow-stream") (depends - (alcotest-lwt :with-test) - astring - bigarray-compat + qcow-stream cmdliner - cohttp - cohttp-lwt - conf-libssl - (cstruct (>= "3.0.0")) - (ezxenstore (= :version)) - (forkexec (= :version)) - io-page - lwt - lwt_ssl - nbd - nbd-unix - ppx_cstruct - ppx_deriving_rpc - re - result - rpclib - ssl - sha - tar - uri - (vhd-format (= :version)) - (vhd-format-lwt (= :version)) - (xapi-idl (= :version)) - (xapi-log (= :version)) - (xen-api-client-lwt (= :version)) - xenstore - xenstore_transport - ) -) - -(package - (name vhd-format) -) - -(package - (name vhd-format-lwt) - (synopsis "Lwt interface to read/write VHD format data") - (description "A pure OCaml library to read and write -[vhd](http://en.wikipedia.org/wiki/VHD_(file_format)) format data, plus a -simple command-line tool which allows vhd files to be interrogated, -manipulated, format-converted and streamed to and from files and remote -servers. - -This package provides an Lwt compatible interface to the library.") - (authors "Jon Ludlam" "Dave Scott") - (maintainers "Dave Scott ") - (tags ("org:mirage" "org:xapi-project")) - (homepage "https://github.com/mirage/ocaml-vhd") - (source (github mirage/ocaml-vhd)) - (depends - (ocaml (>= "4.10.0")) - (alcotest :with-test) - (alcotest-lwt (and :with-test (>= "1.0.0"))) - (bigarray-compat (>= "1.1.0")) - (cstruct (>= "6.0.0")) - cstruct-lwt - (fmt :with-test) - (lwt (>= "3.2.0")) - (mirage-block (>= "3.0.0")) - (rresult (>= "0.7.0")) - (vhd-format (= :version)) - (io-page (and :with-test (>= "2.4.0"))) - ) -) - -(package - (name varstored-guard) -) - -(package - (name uuid) -) - -(package - (name stunnel) - (synopsis "Library used by xapi to herd stunnel processes") - (description "This library allows xapi to configure, launch and terminate stunnel processes that act as clients and servers.") - (depends - astring - (forkexec (= :version)) - (safe-resources (= :version)) - (uuid (= :version)) - (xapi-consts (= :version)) - xapi-inventory - (xapi-log (= :version)) - (xapi-stdext-pervasives (= :version)) - (xapi-stdext-threads (= :version)) - (xapi-stdext-unix (= :version)) - (odoc :with-doc) - ) -) - -(package - (name sexpr) -) - -(package - (name safe-resources) -) - -(package - (name rrd-transport) - (synopsis "Shared-memory protocols for exposing system metrics") - (description "VMs running on a Xen host can use this library to expose performance counters which can be sampled by xapi's metric daemon.") - (authors "John Else") - (depends - (alcotest :with-test) - astring - bigarray-compat - cstruct - crc - (fmt :with-test) - rpclib - yojson - (xapi-idl (= :version)) - (xapi-rrd (= :version)) - (odoc :with-doc) - ) -) - -(package - (name pciutil) -) - -(package - (name message-switch-lwt) -) - -(package - (name message-switch-core) - (synopsis "A simple store-and-forward message switch") - (description "The switch stores messages in queues with well-known names. Clients use a simple HTTP protocol to enqueue and dequeue messages.") - (depends - astring - (cohttp (>= "0.21.1")) - ppx_deriving_rpc - ppx_sexp_conv - rpclib - sexplib - sexplib0 - uri - (xapi-log (= :version)) - (xapi-stdext-threads (= :version)) - (odoc :with-doc) - ) -) - -(package - (name message-switch-cli) -) - -(package - (name message-switch-unix) - (synopsis "A simple store-and-forward message switch") - (description "The switch stores messages in queues with well-known names. Clients use a simple HTTP protocol to enqueue and dequeue messages.") - (depends - base-threads - cohttp - (message-switch-core (= :version)) - ppx_deriving_rpc - rpclib - (xapi-stdext-threads (= :version)) - ) -) - -(package - (name message-switch) -) - -(package - (name http-lib) - (synopsis "An HTTP required used by xapi") - (description "This library allows xapi to perform varios activities related to the HTTP protocol.") - (depends - (alcotest :with-test) - astring - (base64 (>= "3.1.0")) - (clock (= :version)) - fmt - ipaddr - mtime - ppx_deriving_rpc - (qcheck-core :with-test) - rpclib - (safe-resources(= :version)) - sha - (stunnel (= :version)) - tgroup - uri - (uuid (= :version)) - xapi-backtrace - (xapi-idl (= :version)) - (xapi-log (= :version)) - (xapi-stdext-pervasives (= :version)) - (xapi-stdext-threads (= :version)) - (xapi-tracing (= :version)) - (xml-light2 (= :version)) - (odoc :with-doc) - ) -) - -(package - (name gzip) -) - -(package - (name forkexec) - (synopsis "Process-spawning library") - (description "Client and server library to spawn processes.") - (depends - astring - base-threads - (fd-send-recv (>= "2.0.0")) - ppx_deriving_rpc - rpclib - (uuid (= :version)) - xapi-backtrace - (xapi-log (= :version)) - (xapi-stdext-pervasives (= :version)) - (xapi-stdext-unix (= :version)) - (xapi-tracing (= :version)) - ) -) - -(package - (name ezxenstore) -) - -(package - (name cohttp-posix) -) - -(package - (name xapi-rrd) -) - -(package - (name xapi-inventory) -) - -(package - (name xapi-stdext-date) - (synopsis "Xapi's standard library extension, Dates") - (authors "Jonathan Ludlam") - (depends - (clock (= :version)) - ptime - ) -) - -(package - (name xapi-stdext-encodings) - (synopsis "Xapi's standard library extension, Encodings") - (authors "Jonathan Ludlam") - (depends - (ocaml (>= 4.13.0)) - (alcotest (and (>= 0.6.0) :with-test)) - (odoc :with-doc) - (bechamel :with-test) - (bechamel-notty :with-test) - (notty :with-test) ) ) (package - (name xapi-stdext-pervasives) - (synopsis "Xapi's standard library extension, Pervasives") - (authors "Jonathan Ludlam") - (depends - (ocaml (>= 4.08)) - logs - (odoc :with-doc) - xapi-backtrace - ) -) - -(package - (name xapi-stdext-std) - (synopsis "Xapi's standard library extension, Stdlib") - (depends - (ocaml (>= 4.08.0)) - (alcotest :with-test) - (odoc :with-doc) - ) -) - -(package - (name xapi-stdext-threads) - (synopsis "Xapi's standard library extension, Threads") - (authors "Jonathan Ludlam") - (depends - base-threads - base-unix - (alcotest :with-test) - (fmt :with-test) - (odoc :with-doc) - (xapi-stdext-pervasives (= :version)) - (mtime :with-test) - (xapi-stdext-unix (= :version)) - ) -) - -(package - (name xapi-stdext-unix) - (synopsis "Xapi's standard library extension, Unix") - (authors "Jonathan Ludlam") - (depends - (ocaml (>= 4.12.0)) - (alcotest :with-test) - astring - base-unix - (bisect_ppx :with-test) - (clock (and (= :version) :with-test)) - (fd-send-recv (>= 2.0.0)) - fmt - integers - (mtime (and (>= 2.0.0) :with-test)) - (logs :with-test) - (qcheck-core (and (>= 0.21.2) :with-test)) - (odoc :with-doc) - xapi-backtrace - unix-errno - (xapi-stdext-pervasives (= :version)) - polly - ) -) - -(package - (name xapi-stdext-zerocheck) - (synopsis "Xapi's standard library extension, Zerocheck") - (authors "Jonathan Ludlam") - (depends - (odoc :with-doc) - ) -) + (name varstored-guard)) + +(package + (name uuid)) + +(package + (name stunnel) + (synopsis "Library used by xapi to herd stunnel processes") + (description + "This library allows xapi to configure, launch and terminate stunnel processes that act as clients and servers.") + (depends + astring + (forkexec + (= :version)) + (safe-resources + (= :version)) + (uuid + (= :version)) + (xapi-consts + (= :version)) + xapi-inventory + (xapi-log + (= :version)) + (xapi-stdext-pervasives + (= :version)) + (xapi-stdext-threads + (= :version)) + (xapi-stdext-unix + (= :version)) + (odoc :with-doc))) + +(package + (name sexpr)) + +(package + (name safe-resources)) + +(package + (name rrd-transport) + (synopsis "Shared-memory protocols for exposing system metrics") + (description + "VMs running on a Xen host can use this library to expose performance counters which can be sampled by xapi's metric daemon.") + (authors "John Else") + (depends + (alcotest :with-test) + astring + bigarray-compat + cstruct + crc + (fmt :with-test) + rpclib + yojson + (xapi-idl + (= :version)) + (xapi-rrd + (= :version)) + (odoc :with-doc))) + +(package + (name pciutil)) + +(package + (name message-switch-lwt)) + +(package + (name message-switch-core) + (synopsis "A simple store-and-forward message switch") + (description + "The switch stores messages in queues with well-known names. Clients use a simple HTTP protocol to enqueue and dequeue messages.") + (depends + astring + (cohttp + (>= "0.21.1")) + ppx_deriving_rpc + ppx_sexp_conv + rpclib + sexplib + sexplib0 + uri + (xapi-log + (= :version)) + (xapi-stdext-threads + (= :version)) + (xapi-tracing (= :version)) + (odoc :with-doc))) + +(package + (name message-switch-cli)) + +(package + (name message-switch-unix) + (synopsis "A simple store-and-forward message switch") + (description + "The switch stores messages in queues with well-known names. Clients use a simple HTTP protocol to enqueue and dequeue messages.") + (depends + base-threads + cohttp + (message-switch-core + (= :version)) + ppx_deriving_rpc + rpclib + (xapi-stdext-threads + (= :version)) + (xapi-tracing (= :version)))) + +(package + (name message-switch)) + +(package + (name http-lib) + (synopsis "An HTTP required used by xapi") + (description + "This library allows xapi to perform varios activities related to the HTTP protocol.") + (depends + (alcotest :with-test) + astring + (base64 + (>= "3.1.0")) + (clock + (= :version)) + fmt + ipaddr + mtime + ppx_deriving_rpc + (qcheck-core :with-test) + rpclib + (safe-resources + (= :version)) + sha + (stunnel + (= :version)) + tgroup + uri + (uuid + (= :version)) + xapi-backtrace + (xapi-idl + (= :version)) + (xapi-log + (= :version)) + (xapi-stdext-pervasives + (= :version)) + (xapi-stdext-threads + (= :version)) + (xapi-tracing + (= :version)) + (xml-light2 + (= :version)) + (odoc :with-doc))) + +(package + (name gzip)) + +(package + (name forkexec) + (synopsis "Process-spawning library") + (description "Client and server library to spawn processes.") + (depends + astring + base-threads + (fd-send-recv + (>= "2.0.0")) + ppx_deriving_rpc + rpclib + (uuid + (= :version)) + xapi-backtrace + (xapi-log + (= :version)) + (xapi-stdext-pervasives + (= :version)) + (xapi-stdext-unix + (= :version)) + (xapi-tracing + (= :version)))) + +(package + (name ezxenstore)) + +(package + (name cohttp-posix)) + +(package + (name xapi-rrd)) + +(package + (name xapi-inventory)) + +(package + (name xapi-stdext-encodings) + (synopsis "Xapi's standard library extension, Encodings") + (authors "Jonathan Ludlam") + (depends + (ocaml + (>= 4.13.0)) + (alcotest + (and + (>= 0.6.0) + :with-test)) + (odoc :with-doc) + (bechamel :with-test) + (bechamel-notty :with-test) + (notty :with-test))) + +(package + (name xapi-stdext-pervasives) + (synopsis "Xapi's standard library extension, Pervasives") + (authors "Jonathan Ludlam") + (depends + (ocaml + (>= 4.08)) + logs + (odoc :with-doc) + xapi-backtrace)) + +(package + (name xapi-stdext-std) + (synopsis "Xapi's standard library extension, Stdlib") + (depends + (ocaml + (>= 4.08.0)) + (alcotest :with-test) + (odoc :with-doc))) + +(package + (name xapi-stdext-threads) + (synopsis "Xapi's standard library extension, Threads") + (authors "Jonathan Ludlam") + (depends + ambient-context + base-threads + base-unix + (alcotest :with-test) + (clock + (= :version)) + (fmt :with-test) + mtime + tgroup + (xapi-log + (= :version)) + (xapi-stdext-pervasives + (= :version)) + (xapi-stdext-unix + (= :version)))) + +(package + (name xapi-stdext-unix) + (synopsis "Xapi's standard library extension, Unix") + (authors "Jonathan Ludlam") + (depends + (ocaml + (>= 4.12.0)) + (alcotest :with-test) + astring + base-unix + (bisect_ppx :with-test) + (clock + (and + (= :version) + :with-test)) + (fd-send-recv + (>= 2.0.0)) + fmt + integers + (mtime + (and + (>= 2.0.0) + :with-test)) + (logs :with-test) + (qcheck-core + (and + (>= 0.21.2) + :with-test)) + (odoc :with-doc) + xapi-backtrace + unix-errno + (xapi-stdext-pervasives + (= :version)) + polly)) + +(package + (name xapi-stdext-zerocheck) + (synopsis "Xapi's standard library extension, Zerocheck") + (authors "Jonathan Ludlam") + (depends + (odoc :with-doc))) diff --git a/ocaml/database/block_device_io.mli b/ocaml/database/block_device_io.mli new file mode 100644 index 00000000000..cabf42bbb8e --- /dev/null +++ b/ocaml/database/block_device_io.mli @@ -0,0 +1 @@ +(* this file is empty on purpose: this is an executable file *) diff --git a/ocaml/database/block_device_io_errors.mli b/ocaml/database/block_device_io_errors.mli new file mode 100644 index 00000000000..260c8b701ef --- /dev/null +++ b/ocaml/database/block_device_io_errors.mli @@ -0,0 +1,19 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val timeout_error_msg : string + +val not_enough_space_error_msg : string + +val not_initialised_error_msg : string diff --git a/ocaml/database/database_server_main.ml b/ocaml/database/database_server_main.ml index e75539a5592..a4ebb21ab47 100644 --- a/ocaml/database/database_server_main.ml +++ b/ocaml/database/database_server_main.ml @@ -33,8 +33,9 @@ let remote_database_access_handler_v2 req bio = flush stdout ; raise e +open Xapi_database module Local_tests = - Xapi_database.Database_test.Tests (Xapi_database.Db_cache_impl) + Database_test.Tests (Db_interface_compat.OfCached (Db_cache_impl)) let schema = Test_schemas.schema diff --git a/ocaml/database/database_server_main.mli b/ocaml/database/database_server_main.mli new file mode 100644 index 00000000000..cabf42bbb8e --- /dev/null +++ b/ocaml/database/database_server_main.mli @@ -0,0 +1 @@ +(* this file is empty on purpose: this is an executable file *) diff --git a/ocaml/database/database_test.ml b/ocaml/database/database_test.ml index dc176488f3b..b3e771e774c 100644 --- a/ocaml/database/database_test.ml +++ b/ocaml/database/database_test.ml @@ -205,11 +205,13 @@ functor let db = db |> add_row "bar" "bar:1" - (Row.add 0L Db_names.ref (String "bar:1") + (Row.add 0L Db_names.ref + (Schema.Value.string "bar:1") (Row.add 0L "foos" (Set []) Row.empty) ) |> add_row "foo" "foo:1" - (Row.add 0L Db_names.ref (String "foo:1") + (Row.add 0L Db_names.ref + (Schema.Value.string "foo:1") (Row.add 0L "bars" (Set []) Row.empty) ) |> set_field "foo" "foo:1" "bars" (add_to_set "bar:1" (Set [])) @@ -219,7 +221,7 @@ functor Table.find "bar:1" (TableSet.find "bar" (Database.tableset db)) in let bar_foos = Row.find "foos" bar_1 in - if bar_foos <> Set ["foo:1"] then + if bar_foos <> Schema.Value.set ["foo:1"] then failwith_fmt "check_many_to_many: bar(bar:1).foos expected ('foo:1') got %s" (Schema.Value.marshal bar_foos) ; @@ -235,13 +237,13 @@ functor failwith_fmt "check_many_to_many: bar(bar:1).foos expected () got %s" (Schema.Value.marshal bar_foos) ; (* add 'bar' to foo.bars *) - let db = set_field "foo" "foo:1" "bars" (Set ["bar:1"]) db in + let db = set_field "foo" "foo:1" "bars" (Schema.Value.set ["bar:1"]) db in (* check that 'bar.foos' includes 'foo' *) let bar_1 = Table.find "bar:1" (TableSet.find "bar" (Database.tableset db)) in let bar_foos = Row.find "foos" bar_1 in - if bar_foos <> Set ["foo:1"] then + if bar_foos <> Schema.Value.set ["foo:1"] then failwith_fmt "check_many_to_many: bar(bar:1).foos expected ('foo:1') got %s - 2" (Schema.Value.marshal bar_foos) ; @@ -269,9 +271,9 @@ functor let row = Db_cache_types.Table.find r table in let s = Db_cache_types.Row.fold_over_recent g - (fun k _ v acc -> + (fun k _ cached acc -> Printf.sprintf "%s %s=%s" acc k - (Schema.Value.marshal v) + (Schema.CachedValue.string_of cached) ) row "" in diff --git a/ocaml/database/db_action_helper.ml b/ocaml/database/db_action_helper.ml index a553846e3d7..87ff4884933 100644 --- a/ocaml/database/db_action_helper.ml +++ b/ocaml/database/db_action_helper.ml @@ -20,16 +20,5 @@ let __callback : let events_register f = __callback := Some f -let events_unregister () = __callback := None - let events_notify ?snapshot ty op ref = match !__callback with None -> () | Some f -> f ?snapshot ty op ref - -(* -exception Db_set_or_map_parse_fail of string - -let parse_sexpr s : SExpr.t list = - match SExpr_TS.of_string s with - | SExpr.Node xs -> xs - | _ -> raise (Db_set_or_map_parse_fail s) -*) diff --git a/ocaml/database/db_action_helper.mli b/ocaml/database/db_action_helper.mli new file mode 100644 index 00000000000..81fb7eb480d --- /dev/null +++ b/ocaml/database/db_action_helper.mli @@ -0,0 +1,18 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val events_register : + (?snapshot:Rpc.t -> string -> string -> string -> unit) -> unit + +val events_notify : ?snapshot:Rpc.t -> string -> string -> string -> unit diff --git a/ocaml/database/db_backend.ml b/ocaml/database/db_backend.ml index 92954540c33..b92b021dadd 100644 --- a/ocaml/database/db_backend.ml +++ b/ocaml/database/db_backend.ml @@ -21,11 +21,11 @@ let db_FLUSH_TIMER = 2.0 (* --------------------- Util functions on db datastructures *) -let master_database = ref (Db_cache_types.Database.make Schema.empty) +let master_database = Atomic.make (Db_cache_types.Database.make Schema.empty) -let __test_set_master_database db = master_database := db +let __test_set_master_database db = Atomic.set master_database db -let make () = Db_ref.in_memory (ref master_database) +let make () = Db_ref.in_memory master_database (* !!! Right now this is called at cache population time. It would probably be preferable to call it on flush time instead, so we don't waste writes storing non-persistent field values on disk.. At the moment there's not much to worry about, since there are @@ -43,7 +43,10 @@ let blow_away_non_persistent_fields (schema : Schema.t) db = let col = Schema.Table.find name schema in let empty = col.Schema.Column.empty in let v', modified' = - if col.Schema.Column.persistent then (v, modified) else (empty, g) + if col.Schema.Column.persistent then + (Schema.CachedValue.value_of v, modified) + else + (empty, g) in ( Row.update modified' name empty (fun _ -> v') diff --git a/ocaml/database/db_cache.ml b/ocaml/database/db_cache.ml index eba091889ec..c6ec25d6130 100644 --- a/ocaml/database/db_cache.ml +++ b/ocaml/database/db_cache.ml @@ -19,30 +19,32 @@ module D = Debug.Make (struct let name = "db_cache" end) open D (** Masters will use this to modify the in-memory cache directly *) -module Local_db : DB_ACCESS = Db_cache_impl +module Local_db : DB_ACCESS2 = Db_cache_impl (** Slaves will use this to call the master by XMLRPC *) -module Remote_db : DB_ACCESS = Db_rpc_client_v1.Make (struct +module Remote_db : DB_ACCESS2 = +Db_interface_compat.OfCompat (Db_rpc_client_v1.Make (struct let initialise () = ignore (Master_connection.start_master_connection_watchdog ()) ; ignore (Master_connection.open_secure_connection ()) let rpc request = Master_connection.execute_remote_fn request -end) +end)) let get = function | Db_ref.In_memory _ -> - (module Local_db : DB_ACCESS) + (module Local_db : DB_ACCESS2) | Db_ref.Remote -> - (module Remote_db : DB_ACCESS) + (module Remote_db : DB_ACCESS2) let lifecycle_state_of ~obj fld = let open Datamodel in let {fld_states; _} = StringMap.find obj all_lifecycles in StringMap.find fld fld_states +module DB = Db_interface_compat.OfCached (Local_db) + let apply_delta_to_cache entry db_ref = - let module DB : DB_ACCESS = Local_db in match entry with | Redo_log.CreateRow (tblname, objref, kvs) -> debug "Redoing create_row %s (%s)" tblname objref ; diff --git a/ocaml/database/db_cache.mli b/ocaml/database/db_cache.mli new file mode 100644 index 00000000000..ed1de2cd9ad --- /dev/null +++ b/ocaml/database/db_cache.mli @@ -0,0 +1,17 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val get : Db_ref.t -> (module Db_interface.DB_ACCESS2) + +val apply_delta_to_cache : Redo_log.t -> Db_ref.t -> unit diff --git a/ocaml/database/db_cache_impl.ml b/ocaml/database/db_cache_impl.ml index 7bbf062bd02..97e1def4acb 100644 --- a/ocaml/database/db_cache_impl.ml +++ b/ocaml/database/db_cache_impl.ml @@ -19,6 +19,8 @@ functions have the suffix "_locked" to clearly identify them. 2. functions which only read must only call "get_database" once, to ensure they see a consistent snapshot. + With the exception of looking at the database schema, which is assumed to not change + concurrently. *) open Db_exn open Db_lock @@ -34,6 +36,10 @@ open Db_ref let fist_delay_read_records_where = ref false +type field_in = Schema.Value.t + +type field_out = Schema.maybe_cached_value + (* Only needed by the DB_ACCESS signature *) let initialise () = () @@ -47,14 +53,13 @@ let is_valid_ref t objref = let read_field_internal _ tblname fldname objref db = try - Row.find fldname + Row.find' fldname (Table.find objref (TableSet.find tblname (Database.tableset db))) with Not_found -> raise (DBCache_NotFound ("missing row", tblname, objref)) (* Read field from cache *) let read_field t tblname fldname objref = - Schema.Value.marshal - (read_field_internal t tblname fldname objref (get_database t)) + read_field_internal t tblname fldname objref (get_database t) (** Finds the longest XML-compatible UTF-8 prefix of the given string, by truncating the string at the first incompatible @@ -62,36 +67,49 @@ let read_field t tblname fldname objref = occurs. *) let ensure_utf8_xml string = let length = String.length string in - let prefix = - Xapi_stdext_encodings.Encodings.UTF8_XML.longest_valid_prefix string - in + let prefix = Xapi_stdext_encodings.Utf8.XML.longest_valid_prefix string in if length > String.length prefix then warn "string truncated to: '%s'." prefix ; prefix +let ensure_utf8_xml_and_share string = string |> ensure_utf8_xml |> Share.merge + (* Write field in cache *) let write_field_locked t tblname objref fldname newval = let current_val = get_field tblname objref fldname (get_database t) in if current_val <> newval then ( - ( match newval with - | Schema.Value.String s -> - if not (Xapi_stdext_encodings.Encodings.UTF8_XML.is_valid s) then - raise Invalid_value - | _ -> - () - ) ; update_database t (set_field tblname objref fldname newval) ; Database.notify (WriteField (tblname, objref, fldname, current_val, newval)) (get_database t) ) +(** Ensure a value is conforming to UTF-8 with XML restrictions *) +let is_valid v = + let valid = Xapi_stdext_encodings.Utf8.XML.is_valid in + let valid_pair (x, y) = valid x && valid y in + match v with + | Schema.Value.String s -> + valid s + | Schema.Value.Set ss -> + List.for_all valid ss + | Schema.Value.Pairs pairs -> + List.for_all valid_pair pairs + +let share_string = function + | Schema.Value.String s -> + Schema.Value.String (Share.merge s) + | v -> + (* we assume strings in the tree have been shared already *) + v + let write_field t tblname objref fldname newval = - let db = get_database t in - let schema = Schema.table tblname (Database.schema db) in - let column = Schema.Table.find fldname schema in - let newval = Schema.Value.unmarshal column.Schema.Column.ty newval in - with_lock (fun () -> write_field_locked t tblname objref fldname newval) + if not @@ is_valid newval then + raise Invalid_value + else + with_lock (fun () -> + write_field_locked t tblname objref fldname (share_string newval) + ) let touch_row t tblname objref = update_database t (touch tblname objref) ; @@ -103,7 +121,7 @@ let touch_row t tblname objref = and iterates through set-refs [returning (fieldname, ref list) list; where fieldname is the name of the Set Ref field in tbl; and ref list is the list of foreign keys from related table with remote-fieldname=objref] *) -let read_record_internal db tblname objref = +let read_record_internal conv db tblname objref = try let tbl = TableSet.find tblname (Database.tableset db) in let row = Table.find objref tbl in @@ -116,84 +134,80 @@ let read_record_internal db tblname objref = else None in - let map_fvlist v = Schema.Value.marshal v in (* Unfortunately the interface distinguishes between Set(Ref _) types and ordinary fields *) Row.fold - (fun k _ d (accum_fvlist, accum_setref) -> + (fun k _ cached (accum_fvlist, accum_setref) -> let accum_setref = - match map_setref_opt k d with + match map_setref_opt k (Schema.CachedValue.value_of cached) with | Some v -> (k, v) :: accum_setref | None -> accum_setref in - let accum_fvlist = (k, map_fvlist d) :: accum_fvlist in + let accum_fvlist = (k, conv cached) :: accum_fvlist in (accum_fvlist, accum_setref) ) row ([], []) with Not_found -> raise (DBCache_NotFound ("missing row", tblname, objref)) -let read_record t = read_record_internal (get_database t) +let read_record t = + read_record_internal Schema.CachedValue.open_present (get_database t) (* Delete row from tbl *) let delete_row_locked t tblname objref = try - W.debug "delete_row %s (%s)" tblname objref ; let tbl = TableSet.find tblname (Database.tableset (get_database t)) in let row = Table.find objref tbl in let db = get_database t in Database.notify (PreDelete (tblname, objref)) db ; update_database t (remove_row tblname objref) ; Database.notify - (Delete (tblname, objref, Row.fold (fun k _ v acc -> (k, v) :: acc) row []) + (Delete + ( tblname + , objref + , Row.fold + (fun k _ v acc -> (k, Schema.CachedValue.value_of v) :: acc) + row [] + ) ) (get_database t) with Not_found -> raise (DBCache_NotFound ("missing row", tblname, objref)) let delete_row t tblname objref = + W.debug "delete_row %s (%s)" tblname objref ; with_lock (fun () -> delete_row_locked t tblname objref) (* Create new row in tbl containing specified k-v pairs *) let create_row_locked t tblname kvs' new_objref = let db = get_database t in - let schema = Schema.table tblname (Database.schema db) in - let kvs' = - List.map - (fun (key, value) -> - let value = ensure_utf8_xml value in - let column = Schema.Table.find key schema in - (key, Schema.Value.unmarshal column.Schema.Column.ty value) - ) - kvs' - in - (* we add the reference to the row itself so callers can use read_field_where to - return the reference: awkward if it is just the key *) - let kvs' = (Db_names.ref, Schema.Value.String new_objref) :: kvs' in - let g = Manifest.generation (Database.manifest (get_database t)) in + let g = Manifest.generation (Database.manifest db) in let row = - List.fold_left (fun row (k, v) -> Row.add g k v row) Row.empty kvs' + List.fold_left (fun row (k, v) -> Row.add' g k v row) Row.empty kvs' in - let schema = Schema.table tblname (Database.schema (get_database t)) in + let schema = Schema.table tblname (Database.schema db) in (* fill in default values if kv pairs for these are not supplied already *) let row = Row.add_defaults g schema row in - W.debug "create_row %s (%s) [%s]" tblname new_objref - (String.concat "," (List.map (fun (k, _) -> Printf.sprintf "(%s,v)" k) kvs')) ; update_database t (add_row tblname new_objref row) ; Database.notify (Create - (tblname, new_objref, Row.fold (fun k _ v acc -> (k, v) :: acc) row []) + ( tblname + , new_objref + , Row.fold + (fun k _ v acc -> (k, Schema.CachedValue.value_of v) :: acc) + row [] + ) ) (get_database t) let fld_check t tblname objref (fldname, value) = let v = - Schema.Value.marshal + Schema.CachedValue.string_of (read_field_internal t tblname fldname objref (get_database t)) in - (v = value, fldname, v) + (v = Schema.CachedValue.string_of value, fldname, v) -let create_row t tblname kvs' new_objref = +let create_row' t tblname kvs' new_objref = with_lock (fun () -> if is_valid_ref t new_objref then let uniq_check_list = List.map (fld_check t tblname new_objref) kvs' in @@ -206,26 +220,65 @@ let create_row t tblname kvs' new_objref = | _ -> () else + (* we add the reference to the row itself so callers can use read_field_where to + return the reference: awkward if it is just the key *) + let kvs' = + (Db_names.ref, Schema.Value.string new_objref |> Schema.CachedValue.v) + :: kvs' + in + W.debug "create_row %s (%s) [%s]" tblname new_objref + (String.concat "," + (List.map (fun (k, _) -> Printf.sprintf "(%s,v)" k) kvs') + ) ; create_row_locked t tblname kvs' new_objref ) +let create_row t tblname kvs' new_objref = + let kvs' = + List.map + (fun (key, value) -> + let value = + match value with + | Schema.Value.String x -> + Schema.Value.String (ensure_utf8_xml_and_share x) + | Schema.Value.Pairs ps -> + Schema.Value.Pairs + (List.map + (fun (x, y) -> + (ensure_utf8_xml_and_share x, ensure_utf8_xml_and_share y) + ) + ps + ) + | Schema.Value.Set xs -> + Schema.Value.Set (List.map ensure_utf8_xml_and_share xs) + in + (key, Schema.CachedValue.v value) + ) + kvs' + in + create_row' t tblname kvs' new_objref + (* Do linear scan to find field values which match where clause *) -let read_field_where t rcd = +let read_field_where' conv t rcd = let db = get_database t in let tbl = TableSet.find rcd.table (Database.tableset db) in Table.fold (fun _ _ row acc -> - let field = Schema.Value.marshal (Row.find rcd.where_field row) in + let field = + Schema.CachedValue.string_of (Row.find' rcd.where_field row) + in if field = rcd.where_value then - Schema.Value.marshal (Row.find rcd.return row) :: acc + conv (Row.find' rcd.return row) :: acc else acc ) tbl [] +let read_field_where t rcd = read_field_where' Fun.id t rcd + let db_get_by_uuid t tbl uuid_val = match - read_field_where t + read_field_where' Schema.CachedValue.string_of t { table= tbl ; return= Db_names.ref @@ -242,7 +295,7 @@ let db_get_by_uuid t tbl uuid_val = let db_get_by_uuid_opt t tbl uuid_val = match - read_field_where t + read_field_where' Schema.CachedValue.string_of t { table= tbl ; return= Db_names.ref @@ -257,7 +310,7 @@ let db_get_by_uuid_opt t tbl uuid_val = (** Return reference fields from tbl that matches specified name_label field *) let db_get_by_name_label t tbl label = - read_field_where t + read_field_where' Schema.CachedValue.string_of t { table= tbl ; return= Db_names.ref @@ -291,17 +344,17 @@ let find_refs_with_filter_internal db (tblname : Db_interface.table) let find_refs_with_filter t = find_refs_with_filter_internal (get_database t) -let read_records_where t tbl expr = +let read_records_where' conv t tbl expr = let db = get_database t in let reqd_refs = find_refs_with_filter_internal db tbl expr in if !fist_delay_read_records_where then Thread.delay 0.5 ; - List.map (fun ref -> (ref, read_record_internal db tbl ref)) reqd_refs + List.map (fun ref -> (ref, read_record_internal conv db tbl ref)) reqd_refs + +let read_records_where t tbl expr = + read_records_where' Schema.CachedValue.open_present t tbl expr let process_structured_field_locked t (key, value) tblname fld objref proc_fn_selector = - (* Ensure that both keys and values are valid for UTF-8-encoded XML. *) - let key = ensure_utf8_xml key in - let value = ensure_utf8_xml value in try let tbl = TableSet.find tblname (Database.tableset (get_database t)) in let row = Table.find objref tbl in @@ -338,6 +391,9 @@ let process_structured_field_locked t (key, value) tblname fld objref let process_structured_field t (key, value) tblname fld objref proc_fn_selector = + (* Ensure that both keys and values are valid for UTF-8-encoded XML. *) + let key = ensure_utf8_xml_and_share key in + let value = ensure_utf8_xml_and_share value in with_lock (fun () -> process_structured_field_locked t (key, value) tblname fld objref proc_fn_selector @@ -426,24 +482,24 @@ let spawn_db_flush_threads () = try Thread.delay Db_backend.db_FLUSH_TIMER ; (* If I have some writing capacity left in this write period then consider doing a write; or - if the connection is not write-limited then consider doing a write too. - We also have to consider doing a write if exit_on_next_flush is set: because when this is - set (by a signal handler) we want to do a flush whether or not our write limit has been - exceeded. + if the connection is not write-limited then consider doing a write too. + We also have to consider doing a write if exit_on_next_flush is set: because when this is + set (by a signal handler) we want to do a flush whether or not our write limit has been + exceeded. *) + (* always flush straight away; this request is urgent + otherwise, we only write if + (i) "coalesscing period has come to an end"; and + (ii) "write limiting requirements are met": *) ( if !Db_connections.exit_on_next_flush - (* always flush straight away; this request is urgent *) - || (* otherwise, we only write if (i) "coalesscing period has come to an end"; and (ii) "write limiting requirements are met": *) - (not (in_coallescing_period ())) - (* see (i) above *) - && (!my_writes_this_period - < dbconn.Parse_db_conf.write_limit_write_cycles - || dbconn.Parse_db_conf.mode - = Parse_db_conf.No_limit - (* (ii) above *) - ) - then (* debug "[%s] considering flush" db_path; *) + || (not (in_coallescing_period ())) + && (!my_writes_this_period + < dbconn.Parse_db_conf.write_limit_write_cycles + || dbconn.Parse_db_conf.mode + = Parse_db_conf.No_limit + ) + then let was_anything_flushed = Xapi_stdext_threads.Threadext.Mutex.execute Db_lock.global_flush_mutex (fun () -> @@ -453,7 +509,7 @@ let spawn_db_flush_threads () = if was_anything_flushed then ( my_writes_this_period := !my_writes_this_period + 1 ; (* when we do a write, reset the coallesce_period_start to now -- recall that this - variable tracks the time since last write *) + variable tracks the time since last write *) coallesce_period_start := Unix.gettimeofday () ) ) ; @@ -497,3 +553,41 @@ let stats t = ) (Database.tableset (get_database t)) [] + +module Compat = struct + type field_in = string + + type field_out = string + + let read_field_where t rcd = + read_field_where' Schema.CachedValue.string_of t rcd + + let read_field t tblname fldname objref = + read_field t tblname fldname objref |> Schema.CachedValue.string_of + + let write_field t tblname objref fldname newval = + let db = get_database t in + let schema = Schema.table tblname (Database.schema db) in + let column = Schema.Table.find fldname schema in + let newval = Schema.Value.unmarshal column.Schema.Column.ty newval in + write_field t tblname objref fldname newval + + let read_record t = + read_record_internal Schema.CachedValue.string_of (get_database t) + + let read_records_where t tbl expr = + read_records_where' Schema.CachedValue.string_of t tbl expr + + let create_row t tblname kvs' new_objref = + let db = get_database t in + let schema = Schema.table tblname (Database.schema db) in + let kvs' = + List.map + (fun (key, value) -> + let column = Schema.Table.find key schema in + (key, Schema.CachedValue.of_typed_string column.Schema.Column.ty value) + ) + kvs' + in + create_row' t tblname kvs' new_objref +end diff --git a/ocaml/database/db_cache_impl.mli b/ocaml/database/db_cache_impl.mli index b9b26cfc0ee..8dd161b0f8e 100644 --- a/ocaml/database/db_cache_impl.mli +++ b/ocaml/database/db_cache_impl.mli @@ -1,4 +1,4 @@ -include Db_interface.DB_ACCESS +include Db_interface.DB_ACCESS2 val make : Db_ref.t -> Parse_db_conf.db_connection list -> Schema.t -> unit (** [make t connections default_schema] initialises the in-memory cache *) diff --git a/ocaml/database/db_cache_test.ml b/ocaml/database/db_cache_test.ml index ed2a3296940..aa472419bfc 100644 --- a/ocaml/database/db_cache_test.ml +++ b/ocaml/database/db_cache_test.ml @@ -29,11 +29,13 @@ let check_many_to_many () = let db = db |> add_row "bar" "bar:1" - (Row.add 0L Db_names.ref (Schema.Value.String "bar:1") + (Row.add 0L Db_names.ref + (Schema.Value.string "bar:1") (Row.add 0L "foos" (Schema.Value.Set []) Row.empty) ) |> add_row "foo" "foo:1" - (Row.add 0L Db_names.ref (Schema.Value.String "foo:1") + (Row.add 0L Db_names.ref + (Schema.Value.string "foo:1") (Row.add 0L "bars" (Schema.Value.Set []) Row.empty) ) |> set_field "foo" "foo:1" "bars" (add_to_set "bar:1" (Schema.Value.Set [])) @@ -41,7 +43,7 @@ let check_many_to_many () = (* check that 'bar.foos' includes 'foo' *) let bar_1 = Table.find "bar:1" (TableSet.find "bar" (Database.tableset db)) in let bar_foos = Row.find "foos" bar_1 in - if bar_foos <> Schema.Value.Set ["foo:1"] then + if bar_foos <> Schema.Value.set ["foo:1"] then failwith (Printf.sprintf "check_many_to_many: bar(bar:1).foos expected ('foo:1') got %s" @@ -59,11 +61,11 @@ let check_many_to_many () = (Sexplib.Sexp.to_string (Schema.Value.sexp_of_t bar_foos)) ) ; (* add 'bar' to foo.bars *) - let db = set_field "foo" "foo:1" "bars" (Schema.Value.Set ["bar:1"]) db in + let db = set_field "foo" "foo:1" "bars" (Schema.Value.set ["bar:1"]) db in (* check that 'bar.foos' includes 'foo' *) let bar_1 = Table.find "bar:1" (TableSet.find "bar" (Database.tableset db)) in let bar_foos = Row.find "foos" bar_1 in - if bar_foos <> Schema.Value.Set ["foo:1"] then + if bar_foos <> Schema.Value.set ["foo:1"] then failwith (Printf.sprintf "check_many_to_many: bar(bar:1).foos expected ('foo:1') got %s - 2" diff --git a/ocaml/database/db_cache_test.mli b/ocaml/database/db_cache_test.mli new file mode 100644 index 00000000000..cabf42bbb8e --- /dev/null +++ b/ocaml/database/db_cache_test.mli @@ -0,0 +1 @@ +(* this file is empty on purpose: this is an executable file *) diff --git a/ocaml/database/db_cache_types.ml b/ocaml/database/db_cache_types.ml index be73b91958f..63c91d14bb4 100644 --- a/ocaml/database/db_cache_types.ml +++ b/ocaml/database/db_cache_types.ml @@ -24,11 +24,33 @@ module HashedString = struct let hash = Hashtbl.hash end -module StringPool = Weak.Make (HashedString) - -let share = - let pool = StringPool.create 2048 in - StringPool.merge pool +module Share : sig + val merge : string -> string + (** [merge str] merges [str] into the stringpool. + It returns a string equal to [str]. + + This function is thread-safe, it skips adding the string to the pool + when called concurrently. + For best results call this while holding another lock. + *) +end = struct + module StringPool = Weak.Make (HashedString) + + let pool = StringPool.create 2048 + + let merge_running = Atomic.make 0 + + let merge str = + let str = + if Atomic.fetch_and_add merge_running 1 = 0 then + StringPool.merge pool str + else + (* no point in using a mutex here, just fall back to not sharing, + which is quicker. *) + str + in + Atomic.decr merge_running ; str +end module Stat = struct type t = {created: Time.t; modified: Time.t; deleted: Time.t} @@ -45,7 +67,7 @@ module StringMap = struct let compare = String.compare end) - let add key v t = add (share key) v t + let add key v t = add (Share.merge key) v t end module type VAL = sig @@ -136,27 +158,37 @@ functor end module Row = struct - include Make (Schema.Value) - - let add gen key v = - add gen key - @@ - match v with - | Schema.Value.String x -> - Schema.Value.String (share x) - | Schema.Value.Pairs ps -> - Schema.Value.Pairs (List.map (fun (x, y) -> (share x, share y)) ps) - | Schema.Value.Set xs -> - Schema.Value.Set (List.map share xs) + module CachedValue = struct + type t = Schema.cached_value + + let v = Schema.CachedValue.v + end + + include Make (CachedValue) + + let add' = add + + let add gen key v = add' gen key @@ CachedValue.v v type t = map_t type value = Schema.Value.t - let find key t = - try find key t + let iter f t = iter (fun k v -> f k (Schema.CachedValue.value_of v)) t + + let touch generation key default row = + touch generation key (CachedValue.v default) row + + let update gen key default f row = + let f v = v |> Schema.CachedValue.value_of |> f |> CachedValue.v in + update gen key (CachedValue.v default) f row + + let find' key t = + try find key t |> Schema.CachedValue.open_present with Not_found -> raise (DBCache_NotFound ("missing field", key, "")) + let find key t = find' key t |> Schema.CachedValue.value_of + let add_defaults g (schema : Schema.Table.t) t = let schema = Schema.Table.t'_of_t schema in List.fold_left @@ -518,9 +550,11 @@ let get_field tblname objref fldname db = (Table.find objref (TableSet.find tblname (Database.tableset db))) with Not_found -> raise (DBCache_NotFound ("missing row", tblname, objref)) +let empty = Schema.Value.string "" + let unsafe_set_field g tblname objref fldname newval = (fun _ -> newval) - |> Row.update g fldname (Schema.Value.String "") + |> Row.update g fldname empty |> Table.update g objref Row.empty |> TableSet.update g tblname Table.empty |> Database.update @@ -602,7 +636,7 @@ let set_field tblname objref fldname newval db = |> update_one_to_many g tblname objref remove_from_set |> Database.update ((fun _ -> newval) - |> Row.update g fldname (Schema.Value.String "") + |> Row.update g fldname empty |> Table.update g objref Row.empty |> TableSet.update g tblname Table.empty ) @@ -613,7 +647,7 @@ let set_field tblname objref fldname newval db = let g = Manifest.generation (Database.manifest db) in db |> ((fun _ -> newval) - |> Row.update g fldname (Schema.Value.String "") + |> Row.update g fldname empty |> Table.update g objref Row.empty |> TableSet.update g tblname Table.empty |> Database.update diff --git a/ocaml/database/db_cache_types.mli b/ocaml/database/db_cache_types.mli index 2ffe79c411b..f06af9a31c6 100644 --- a/ocaml/database/db_cache_types.mli +++ b/ocaml/database/db_cache_types.mli @@ -79,9 +79,39 @@ module type MAP = sig On exit there will be a binding of [key] whose modification time is [now] *) end +module Share : sig + val merge : string -> string + (** [merge str] merges [str] into the stringpool. + It returns a string equal to [str]. + + This function is thread-safe, it skips adding the string to the pool + when called concurrently. + For best results call this while holding another lock. + *) +end + module Row : sig include MAP with type value = Schema.Value.t + val add' : Time.t -> string -> Schema.cached_value -> t -> t + (** [add now key value map] returns a new map with [key] associated with [value], + with creation time [now] *) + + val find' : string -> t -> [> Schema.present] Schema.CachedValue.t + (** [find key t] returns the value associated with [key] in [t] or raises + [DBCache_NotFound] *) + + val fold : + (string -> Stat.t -> Schema.cached_value -> 'b -> 'b) -> t -> 'b -> 'b + (** [fold f t initial] folds [f key stats value acc] over the items in [t] *) + + val fold_over_recent : + Time.t + -> (string -> Stat.t -> Schema.cached_value -> 'b -> 'b) + -> t + -> 'b + -> 'b + val add_defaults : Time.t -> Schema.Table.t -> t -> t (** [add_defaults now schema t]: returns a row which is [t] extended to contain all the columns specified in the schema, with default values set if not already diff --git a/ocaml/database/db_connections.ml b/ocaml/database/db_connections.ml index 9b390967fce..18152a18c4e 100644 --- a/ocaml/database/db_connections.ml +++ b/ocaml/database/db_connections.ml @@ -62,22 +62,12 @@ let preferred_write_db () = List.hd (Db_conn_store.read_db_connections ()) let exit_on_next_flush = ref false (* db flushing thread refcount: the last thread out of the door does the exit(0) when flush_on_exit is true *) -let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute +let db_flush_thread_refcount = Atomic.make 0 -let db_flush_thread_refcount_m = Mutex.create () - -let db_flush_thread_refcount = ref 0 - -let inc_db_flush_thread_refcount () = - with_lock db_flush_thread_refcount_m (fun () -> - db_flush_thread_refcount := !db_flush_thread_refcount + 1 - ) +let inc_db_flush_thread_refcount () = Atomic.incr db_flush_thread_refcount let dec_and_read_db_flush_thread_refcount () = - with_lock db_flush_thread_refcount_m (fun () -> - db_flush_thread_refcount := !db_flush_thread_refcount - 1 ; - !db_flush_thread_refcount - ) + Atomic.fetch_and_add db_flush_thread_refcount (-1) let pre_exit_hook () = (* We're about to exit. Close the active redo logs. *) diff --git a/ocaml/database/db_connections.mli b/ocaml/database/db_connections.mli new file mode 100644 index 00000000000..81ec405a581 --- /dev/null +++ b/ocaml/database/db_connections.mli @@ -0,0 +1,29 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val get_dbs_and_gen_counts : unit -> (int64 * Parse_db_conf.db_connection) list + +val choose : + Parse_db_conf.db_connection list -> Parse_db_conf.db_connection option + +val preferred_write_db : unit -> Parse_db_conf.db_connection + +val exit_on_next_flush : bool ref + +val inc_db_flush_thread_refcount : unit -> unit + +val flush_dirty_and_maybe_exit : + Parse_db_conf.db_connection -> int option -> bool + +val flush : Parse_db_conf.db_connection -> Db_cache_types.Database.t -> unit diff --git a/ocaml/database/db_exn.mli b/ocaml/database/db_exn.mli new file mode 100644 index 00000000000..53b686e1f4c --- /dev/null +++ b/ocaml/database/db_exn.mli @@ -0,0 +1,39 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** class * field * uuid * key *) +exception Duplicate_key of string * string * string * string + +(** message * class * key *) +exception DBCache_NotFound of string * string * string + +(** class * field * key *) +exception Uniqueness_constraint_violation of string * string * string + +(** class * field * value *) +exception Integrity_violation of string * string * string + +(** class * _ * uuid *) +exception Read_missing_uuid of string * string * string + +(** class * _ * uuid *) +exception Too_many_values of string * string * string + +exception Remote_db_server_returned_unknown_exception + +exception Remote_db_server_returned_bad_message + +exception Empty_key_in_map + +exception Invalid_value diff --git a/ocaml/database/db_filter.ml b/ocaml/database/db_filter.ml index 25a171c8384..915162ae8db 100644 --- a/ocaml/database/db_filter.ml +++ b/ocaml/database/db_filter.ml @@ -18,33 +18,6 @@ open Db_filter_types -let string_of_val = function - | Field x -> - "Field " ^ x - | Literal x -> - "Literal " ^ x - -let rec string_of_expr = - let binexpr name a b = - Printf.sprintf "%s (%s, %s)" name (string_of_expr a) (string_of_expr b) - in - let binval name a b = - Printf.sprintf "%s (%s, %s)" name (string_of_val a) (string_of_val b) - in - function - | True -> - "True" - | False -> - "False" - | Not x -> - Printf.sprintf "Not ( %s )" (string_of_expr x) - | And (a, b) -> - binexpr "And" a b - | Or (a, b) -> - binexpr "Or" a b - | Eq (a, b) -> - binval "Eq" a b - exception XML_unmarshall_error let val_of_xml xml = diff --git a/ocaml/database/db_filter.mli b/ocaml/database/db_filter.mli new file mode 100644 index 00000000000..392974c470e --- /dev/null +++ b/ocaml/database/db_filter.mli @@ -0,0 +1,25 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +exception XML_unmarshall_error + +exception Expression_error of (string * exn) + +val expr_of_xml : XMLRPC.xmlrpc -> Db_filter_types.expr + +val expr_of_string : string -> Db_filter_types.expr + +val xml_of_expr : Db_filter_types.expr -> XMLRPC.xmlrpc + +val eval_expr : (Db_filter_types._val -> string) -> Db_filter_types.expr -> bool diff --git a/ocaml/database/db_filter_lex.mli b/ocaml/database/db_filter_lex.mli new file mode 100644 index 00000000000..63834965084 --- /dev/null +++ b/ocaml/database/db_filter_lex.mli @@ -0,0 +1,15 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val lexer : Lexing.lexbuf -> Db_filter_parse.token diff --git a/ocaml/database/db_filter_types.mli b/ocaml/database/db_filter_types.mli new file mode 100644 index 00000000000..1584d7b3497 --- /dev/null +++ b/ocaml/database/db_filter_types.mli @@ -0,0 +1,31 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type _val = Field of string | Literal of string + +val rpc_of__val : _val -> Rpc.t + +val _val_of_rpc : Rpc.t -> _val + +type expr = + | True + | False + | Not of expr + | Eq of _val * _val + | And of expr * expr + | Or of expr * expr + +val rpc_of_expr : expr -> Rpc.t + +val expr_of_rpc : Rpc.t -> expr diff --git a/ocaml/database/db_globs.mli b/ocaml/database/db_globs.mli new file mode 100644 index 00000000000..d51d569907d --- /dev/null +++ b/ocaml/database/db_globs.mli @@ -0,0 +1,67 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val redo_log_block_device_io : string ref + +val redo_log_connect_delay : float ref + +val redo_log_max_block_time_empty : float ref + +val redo_log_max_block_time_read : float ref + +val redo_log_max_block_time_writedelta : float ref + +val redo_log_max_block_time_writedb : float ref + +val redo_log_initial_backoff_delay : int + +val redo_log_exponentiation_base : int + +val redo_log_maximum_backoff_delay : int + +val redo_log_max_dying_processes : int + +val redo_log_comms_socket_stem : string + +val redo_log_max_startup_time : float ref + +val redo_log_length_of_half : int + +val ha_metadata_db : string + +val gen_metadata_db : string + +val static_vdis_dir : string ref + +val http_limit_max_rpc_size : int + +val idempotent_map : bool ref + +val permanent_master_failure_retry_interval : float ref + +val master_connection_reset_timeout : float ref + +val master_connection_retry_timeout : float ref + +val master_connection_default_timeout : float ref + +val pool_secret : Db_secret_string.t ref + +val restart_fn : (unit -> unit) ref + +val https_port : int ref + +val snapshot_db : string + +val db_conf_path : string ref diff --git a/ocaml/database/db_interface.ml b/ocaml/database/db_interface.mli similarity index 78% rename from ocaml/database/db_interface.ml rename to ocaml/database/db_interface.mli index 9343ed87e8d..af1d4572909 100644 --- a/ocaml/database/db_interface.ml +++ b/ocaml/database/db_interface.mli @@ -33,15 +33,15 @@ type db_ref = string type uuid = string -type regular_fields = (field_name * field) list +type 'field regular_fields = (field_name * 'field) list type associated_fields = (field_name * db_ref list) list (** dictionary of regular fields x dictionary of associated set_ref values *) -type db_record = regular_fields * associated_fields +type 'field db_record = 'field regular_fields * associated_fields (** The client interface to the database *) -module type DB_ACCESS = sig +module type DB_ACCESS_COMMON = sig val initialise : unit -> unit (** [initialise ()] must be called before any other function in this interface *) @@ -61,11 +61,6 @@ module type DB_ACCESS = sig (** [find_refs_with_filter tbl expr] returns a list of all references to rows which match [expr] *) - val read_field_where : Db_ref.t -> Db_cache_types.where_record -> field list - (** [read_field_where {tbl,return,where_field,where_value}] returns a - list of the [return] fields in table [tbl] where the [where_field] - equals [where_value] *) - val db_get_by_uuid : Db_ref.t -> table -> uuid -> db_ref (** [db_get_by_uuid tbl uuid] returns the single object reference associated with [uuid] *) @@ -79,40 +74,76 @@ module type DB_ACCESS = sig (** [db_get_by_name_label tbl label] returns the list of object references associated with [label] *) - val create_row : Db_ref.t -> table -> regular_fields -> db_ref -> unit - (** [create_row tbl kvpairs ref] create a new row in [tbl] with - key [ref] and contents [kvpairs] *) - val delete_row : Db_ref.t -> db_ref -> table -> unit (** [delete_row context tbl ref] deletes row [ref] from table [tbl] *) - val write_field : Db_ref.t -> table -> db_ref -> field_name -> field -> unit + val process_structured_field : + Db_ref.t + -> field_name * string + -> table + -> field_name + -> db_ref + -> Db_cache_types.structured_op_t + -> unit + (** [process_structured_field context kv tbl fld ref op] modifies the + value of field [fld] in row [ref] in table [tbl] according to [op] + which may be one of AddSet RemoveSet AddMap RemoveMap with + arguments [kv] *) +end + +module type DB_ACCESS_FIELD = sig + type field_in + + type field_out + + val read_field_where : + Db_ref.t -> Db_cache_types.where_record -> field_out list + (** [read_field_where {tbl,return,where_field,where_value}] returns a + list of the [return] fields in table [tbl] where the [where_field] + equals [where_value] *) + + val create_row : + Db_ref.t -> table -> field_in regular_fields -> db_ref -> unit + (** [create_row tbl kvpairs ref] create a new row in [tbl] with + key [ref] and contents [kvpairs] *) + + val write_field : + Db_ref.t -> table -> db_ref -> field_name -> field_in -> unit (** [write_field context tbl ref fld val] changes field [fld] to [val] in row [ref] in table [tbl] *) - val read_field : Db_ref.t -> table -> field_name -> db_ref -> field + val read_field : Db_ref.t -> table -> field_name -> db_ref -> field_out (** [read_field context tbl fld ref] returns the value of field [fld] in row [ref] in table [tbl] *) - val read_record : Db_ref.t -> table -> db_ref -> db_record + val read_record : Db_ref.t -> table -> db_ref -> field_out db_record (** [read_record tbl ref] returns [ (field, value) ] * [ (set_ref fieldname * [ ref ]) ] *) val read_records_where : - Db_ref.t -> table -> Db_filter_types.expr -> (db_ref * db_record) list + Db_ref.t + -> table + -> Db_filter_types.expr + -> (db_ref * field_out db_record) list (** [read_records_where tbl expr] returns a list of the values returned by read_record that match the expression *) +end - val process_structured_field : - Db_ref.t - -> field_name * field - -> table - -> field_name - -> db_ref - -> Db_cache_types.structured_op_t - -> unit - (** [process_structured_field context kv tbl fld ref op] modifies the - value of field [fld] in row [ref] in table [tbl] according to [op] - which may be one of AddSet RemoveSet AddMap RemoveMap with - arguments [kv] *) +module type DB_ACCESS = sig + include DB_ACCESS_COMMON + + include + DB_ACCESS_FIELD with type field_in = string and type field_out = string +end + +module type DB_ACCESS2 = sig + include DB_ACCESS_COMMON + + include + DB_ACCESS_FIELD + with type field_in = Schema.Value.t + and type field_out = Schema.maybe_cached_value + + module Compat : + DB_ACCESS_FIELD with type field_in = string and type field_out = string end diff --git a/ocaml/database/db_interface_compat.ml b/ocaml/database/db_interface_compat.ml new file mode 100644 index 00000000000..a1c981a9e7e --- /dev/null +++ b/ocaml/database/db_interface_compat.ml @@ -0,0 +1,61 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Db_interface + +module OfCached (DB : DB_ACCESS2) : DB_ACCESS = struct + include DB include DB.Compat +end + +module OfCompat (DB : DB_ACCESS) : DB_ACCESS2 = struct + module Compat = DB + include Compat + + type field_in = Schema.Value.t + + type field_out = Schema.maybe_cached_value + + let field_of_compat = Schema.CachedValue.of_string + + let compat_of_field = Schema.Value.marshal + + let regular_field_of_compat (k, v) = (k, field_of_compat v) + + let regular_fields_of_compat l = List.map regular_field_of_compat l + + let compat_of_regular_field (k, v) = (k, compat_of_field v) + + let compat_of_regular_fields l = List.map compat_of_regular_field l + + let db_record_of_compat (regular, assoc) = + (regular_fields_of_compat regular, assoc) + + let db_record_entry_of_compat (ref, record) = (ref, db_record_of_compat record) + + let read_field_where t where = + read_field_where t where |> List.map field_of_compat + + let create_row t tbl fields ref = + create_row t tbl (compat_of_regular_fields fields) ref + + let write_field t tbl ref fld field = + write_field t tbl ref fld (compat_of_field field) + + let read_field t tbl fld ref = read_field t tbl fld ref |> field_of_compat + + let read_record t tbl ref = read_record t tbl ref |> db_record_of_compat + + let read_records_where t tbl expr = + read_records_where t tbl expr |> List.map db_record_entry_of_compat +end diff --git a/ocaml/database/db_interface_compat.mli b/ocaml/database/db_interface_compat.mli new file mode 100644 index 00000000000..a735cf122dc --- /dev/null +++ b/ocaml/database/db_interface_compat.mli @@ -0,0 +1,18 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) +open Db_interface + +module OfCached : functor (_ : DB_ACCESS2) -> DB_ACCESS + +module OfCompat : functor (_ : DB_ACCESS) -> DB_ACCESS2 diff --git a/ocaml/database/db_lock.ml b/ocaml/database/db_lock.ml index e893050f58c..648ca94dc26 100644 --- a/ocaml/database/db_lock.ml +++ b/ocaml/database/db_lock.ml @@ -59,9 +59,7 @@ module ReentrantLock : REENTRANT_LOCK = struct type t = { holder: tid option Atomic.t (* The holder of the lock *) ; mutable holds: int (* How many holds the holder has on the lock *) - ; lock: Mutex.t (* Barrier to signal waiting threads *) - ; condition: Condition.t - (* Waiting threads are signalled via this condition to reattempt to acquire the lock *) + ; lock: Mutex.t (* Mutex held by the holder thread *) ; statistics: statistics (* Bookkeeping of time taken to acquire lock *) } @@ -73,7 +71,6 @@ module ReentrantLock : REENTRANT_LOCK = struct holder= Atomic.make None ; holds= 0 ; lock= Mutex.create () - ; condition= Condition.create () ; statistics= create_statistics () } @@ -94,9 +91,7 @@ module ReentrantLock : REENTRANT_LOCK = struct let intended = Some me in let counter = Mtime_clock.counter () in Mutex.lock l.lock ; - while not (Atomic.compare_and_set l.holder None intended) do - Condition.wait l.condition l.lock - done ; + Atomic.set l.holder intended ; lock_acquired () ; let stats = l.statistics in let delta = Clock.Timer.span_to_s (Mtime_clock.count counter) in @@ -104,7 +99,7 @@ module ReentrantLock : REENTRANT_LOCK = struct stats.min_time <- Float.min delta stats.min_time ; stats.max_time <- Float.max delta stats.max_time ; stats.acquires <- stats.acquires + 1 ; - Mutex.unlock l.lock ; + (* do not unlock, it will be done when holds reaches 0 instead *) l.holds <- 1 let unlock l = @@ -114,10 +109,8 @@ module ReentrantLock : REENTRANT_LOCK = struct l.holds <- l.holds - 1 ; if l.holds = 0 then ( let () = Atomic.set l.holder None in - Mutex.lock l.lock ; - Condition.signal l.condition ; - Mutex.unlock l.lock ; - lock_released () + (* the lock is held (acquired in [lock]), we only need to unlock *) + Mutex.unlock l.lock ; lock_released () ) | _ -> failwith diff --git a/ocaml/database/db_names.mli b/ocaml/database/db_names.mli new file mode 100644 index 00000000000..b1bb79d751c --- /dev/null +++ b/ocaml/database/db_names.mli @@ -0,0 +1,85 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val uuid : string + +val ref : string + +val suspend_VDI : string + +val vm : string + +val console : string + +val name_label : string + +val power_state : string + +val allowed_operations : string + +val current_operations : string + +val memory_dynamic_max : string + +val memory_dynamic_min : string + +val memory_static_max : string + +val memory_static_min : string + +val memory_target : string + +val is_a_template : string + +val is_default_template : string + +val is_a_snapshot : string + +val is_control_domain : string + +val platform : string + +val other_config : string + +val metrics : string + +val guest_metrics : string + +val parent : string + +val snapshot_of : string + +val snapshot_time : string + +val transportable_snapshot_id : string + +val resident_on : string + +val scheduled_to_be_resident_on : string + +val domid : string + +val ha_always_run : string + +val host : string + +val pool : string + +val master : string + +val bios_strings : string + +val protection_policy : string + +val snapshot_schedule : string diff --git a/ocaml/database/db_ref.ml b/ocaml/database/db_ref.ml index c1819e5aa22..100fea3701c 100644 --- a/ocaml/database/db_ref.ml +++ b/ocaml/database/db_ref.ml @@ -12,15 +12,15 @@ * GNU Lesser General Public License for more details. *) -type t = In_memory of Db_cache_types.Database.t ref ref | Remote +type t = In_memory of Db_cache_types.Database.t Atomic.t | Remote exception Database_not_in_memory -let in_memory (rf : Db_cache_types.Database.t ref ref) = In_memory rf +let in_memory (rf : Db_cache_types.Database.t Atomic.t) = In_memory rf let get_database = function | In_memory x -> - !(!x) + Atomic.get x | Remote -> raise Database_not_in_memory @@ -28,6 +28,6 @@ let update_database t f = match t with | In_memory x -> let d : Db_cache_types.Database.t = f (get_database t) in - !x := d + Atomic.set x d | Remote -> raise Database_not_in_memory diff --git a/ocaml/database/db_ref.mli b/ocaml/database/db_ref.mli new file mode 100644 index 00000000000..93ab8655868 --- /dev/null +++ b/ocaml/database/db_ref.mli @@ -0,0 +1,24 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type t = In_memory of Db_cache_types.Database.t Atomic.t | Remote + +exception Database_not_in_memory + +val in_memory : Db_cache_types.Database.t Atomic.t -> t + +val get_database : t -> Db_cache_types.Database.t + +val update_database : + t -> (Db_cache_types.Database.t -> Db_cache_types.Database.t) -> unit diff --git a/ocaml/database/db_remote_cache_access_v1.ml b/ocaml/database/db_remote_cache_access_v1.ml index 1499fa3fc13..6cb7af729c5 100644 --- a/ocaml/database/db_remote_cache_access_v1.ml +++ b/ocaml/database/db_remote_cache_access_v1.ml @@ -6,9 +6,7 @@ module DBCacheRemoteListener = struct exception DBCacheListenerUnknownMessageName of string - let ctr_mutex = Mutex.create () - - let calls_processed = ref 0 + let calls_processed = Atomic.make 0 let success xml = let resp = XMLRPC.To.array [XMLRPC.To.string "success"; xml] in @@ -28,14 +26,14 @@ module DBCacheRemoteListener = struct (* update_lengths xml resp; *) resp - module DBCache : Db_interface.DB_ACCESS = Db_cache_impl + module DBCache : Db_interface.DB_ACCESS = + Db_interface_compat.OfCached (Db_cache_impl) (** Unmarshals the request, calls the DBCache function and marshals the result. Note that, although the messages still contain the pool_secret for historical reasons, access has already been applied by the RBAC code in Xapi_http.add_handler. *) let process_xmlrpc xml = - let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute in - with_lock ctr_mutex (fun () -> calls_processed := !calls_processed + 1) ; + Atomic.incr calls_processed ; let fn_name, args = match XMLRPC.From.array (fun x -> x) xml with | [fn_name; _; args] -> diff --git a/ocaml/database/db_remote_cache_access_v2.ml b/ocaml/database/db_remote_cache_access_v2.ml index 754fd2fa340..51a1177cabd 100644 --- a/ocaml/database/db_remote_cache_access_v2.ml +++ b/ocaml/database/db_remote_cache_access_v2.ml @@ -19,7 +19,8 @@ open Db_exn (** Convert a marshalled Request Rpc.t into a marshalled Response Rpc.t *) let process_rpc (req : Rpc.t) = - let module DB : Db_interface.DB_ACCESS = Db_cache_impl in + let module DB : Db_interface.DB_ACCESS = + Db_interface_compat.OfCached (Db_cache_impl) in let t = Db_backend.make () in Response.rpc_of_t ( try diff --git a/ocaml/database/db_rpc_client_v1.ml b/ocaml/database/db_rpc_client_v1.ml index 7adbcd6bbed..9219779966b 100644 --- a/ocaml/database/db_rpc_client_v1.ml +++ b/ocaml/database/db_rpc_client_v1.ml @@ -22,6 +22,10 @@ functor struct exception Remote_db_server_returned_unknown_exception + type field_in = string + + type field_out = string + (* Process an exception returned from server, throwing local exception *) let process_exception_xml xml = match XMLRPC.From.array (fun x -> x) xml with diff --git a/ocaml/database/db_rpc_client_v2.ml b/ocaml/database/db_rpc_client_v2.ml index 2e03f069497..434677d3990 100644 --- a/ocaml/database/db_rpc_client_v2.ml +++ b/ocaml/database/db_rpc_client_v2.ml @@ -22,6 +22,10 @@ functor (RPC : Db_interface.RPC) -> struct + type field_in = string + + type field_out = string + let initialise = RPC.initialise let rpc x = RPC.rpc (Jsonrpc.to_string x) |> Jsonrpc.of_string diff --git a/ocaml/database/db_rpc_common_v1.mli b/ocaml/database/db_rpc_common_v1.mli new file mode 100644 index 00000000000..baba04f45d9 --- /dev/null +++ b/ocaml/database/db_rpc_common_v1.mli @@ -0,0 +1,175 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +exception DB_remote_marshall_error + +val marshall_4strings : string * string * string * string -> XMLRPC.xmlrpc + +val unmarshall_4strings : XMLRPC.xmlrpc -> string * string * string * string + +val marshall_3strings : string * string * string -> XMLRPC.xmlrpc + +val unmarshall_3strings : XMLRPC.xmlrpc -> string * string * string + +val marshall_get_table_from_ref_args : string -> XMLRPC.xmlrpc + +val unmarshall_get_table_from_ref_args : XMLRPC.xmlrpc -> string + +val marshall_get_table_from_ref_response : string option -> XMLRPC.xmlrpc + +val unmarshall_get_table_from_ref_response : XMLRPC.xmlrpc -> string option + +val marshall_is_valid_ref_args : string -> XMLRPC.xmlrpc + +val unmarshall_is_valid_ref_args : XMLRPC.xmlrpc -> string + +val marshall_is_valid_ref_response : bool -> XMLRPC.xmlrpc + +val unmarshall_is_valid_ref_response : XMLRPC.xmlrpc -> bool + +val marshall_read_refs_args : string -> XMLRPC.xmlrpc + +val unmarshall_read_refs_args : XMLRPC.xmlrpc -> string + +val marshall_read_refs_response : string list -> XMLRPC.xmlrpc + +val unmarshall_read_refs_response : XMLRPC.xmlrpc -> string list + +val marshall_read_field_where_args : + Db_cache_types.where_record -> XMLRPC.xmlrpc + +val unmarshall_read_field_where_args : + XMLRPC.xmlrpc -> Db_cache_types.where_record + +val marshall_read_field_where_response : string list -> XMLRPC.xmlrpc + +val unmarshall_read_field_where_response : XMLRPC.xmlrpc -> string list + +val marshall_db_get_by_uuid_args : string * string -> XMLRPC.xmlrpc + +val unmarshall_db_get_by_uuid_args : XMLRPC.xmlrpc -> string * string + +val marshall_db_get_by_uuid_response : string -> XMLRPC.xmlrpc + +val marshall_db_get_by_uuid_opt_response : string option -> XMLRPC.xmlrpc + +val unmarshall_db_get_by_uuid_response : XMLRPC.xmlrpc -> string + +val unmarshall_db_get_by_uuid_opt_response : XMLRPC.xmlrpc -> string option + +val marshall_db_get_by_name_label_args : string * string -> XMLRPC.xmlrpc + +val unmarshall_db_get_by_name_label_args : XMLRPC.xmlrpc -> string * string + +val marshall_db_get_by_name_label_response : string list -> XMLRPC.xmlrpc + +val unmarshall_db_get_by_name_label_response : XMLRPC.xmlrpc -> string list + +val marshall_create_row_args : + string * (string * string) list * string -> XMLRPC.xmlrpc + +val unmarshall_create_row_args : + XMLRPC.xmlrpc -> string * (string * string) list * string + +val marshall_create_row_response : unit -> XMLRPC.xmlrpc + +val unmarshall_create_row_response : XMLRPC.xmlrpc -> unit + +val marshall_delete_row_args : string * string -> XMLRPC.xmlrpc + +val unmarshall_delete_row_args : XMLRPC.xmlrpc -> string * string + +val marshall_delete_row_response : unit -> XMLRPC.xmlrpc + +val unmarshall_delete_row_response : XMLRPC.xmlrpc -> unit + +val marshall_write_field_args : + string * string * string * string -> XMLRPC.xmlrpc + +val unmarshall_write_field_args : + XMLRPC.xmlrpc -> string * string * string * string + +val marshall_write_field_response : unit -> XMLRPC.xmlrpc + +val unmarshall_write_field_response : XMLRPC.xmlrpc -> unit + +val marshall_read_field_args : string * string * string -> XMLRPC.xmlrpc + +val unmarshall_read_field_args : XMLRPC.xmlrpc -> string * string * string + +val marshall_read_field_response : string -> XMLRPC.xmlrpc + +val unmarshall_read_field_response : XMLRPC.xmlrpc -> string + +val marshall_find_refs_with_filter_args : + string * Db_filter_types.expr -> XMLRPC.xmlrpc + +val unmarshall_find_refs_with_filter_args : + XMLRPC.xmlrpc -> string * Db_filter_types.expr + +val marshall_find_refs_with_filter_response : string list -> XMLRPC.xmlrpc + +val unmarshall_find_refs_with_filter_response : XMLRPC.xmlrpc -> string list + +val marshall_process_structured_field_args : + (string * string) + * string + * string + * string + * Db_cache_types.structured_op_t + -> XMLRPC.xmlrpc + +val unmarshall_process_structured_field_args : + XMLRPC.xmlrpc + -> (string * string) + * string + * string + * string + * Db_cache_types.structured_op_t + +val marshall_process_structured_field_response : unit -> XMLRPC.xmlrpc + +val unmarshall_process_structured_field_response : XMLRPC.xmlrpc -> unit + +val marshall_read_record_args : string * string -> XMLRPC.xmlrpc + +val unmarshall_read_record_args : XMLRPC.xmlrpc -> string * string + +val marshall_read_record_response : + (string * string) list * (string * string list) list -> XMLRPC.xmlrpc + +val unmarshall_read_record_response : + XMLRPC.xmlrpc -> (string * string) list * (string * string list) list + +val marshall_read_records_where_args : + string * Db_filter_types.expr -> XMLRPC.xmlrpc + +val unmarshall_read_records_where_args : + XMLRPC.xmlrpc -> string * Db_filter_types.expr + +val marshall_read_records_where_response : + (string * ((string * string) list * (string * string list) list)) list + -> XMLRPC.xmlrpc + +val unmarshall_read_records_where_response : + XMLRPC.xmlrpc + -> (string * ((string * string) list * (string * string list) list)) list + +val marshall_stringstringlist : (string * string) list -> Xml.xml + +val unmarshall_stringstringlist : Xml.xml -> (string * string) list + +val marshall_structured_op : Db_cache_types.structured_op_t -> Xml.xml + +val unmarshall_structured_op : Xml.xml -> Db_cache_types.structured_op_t diff --git a/ocaml/database/db_rpc_common_v2.mli b/ocaml/database/db_rpc_common_v2.mli new file mode 100644 index 00000000000..3555e696096 --- /dev/null +++ b/ocaml/database/db_rpc_common_v2.mli @@ -0,0 +1,70 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module Request : sig + type t = + | Get_table_from_ref of string + | Is_valid_ref of string + | Read_refs of string + | Find_refs_with_filter of string * Db_filter_types.expr + | Read_field_where of Db_cache_types.where_record + | Db_get_by_uuid of string * string + | Db_get_by_uuid_opt of string * string + | Db_get_by_name_label of string * string + | Create_row of string * (string * string) list * string + | Delete_row of string * string + | Write_field of string * string * string * string + | Read_field of string * string * string + | Read_record of string * string + | Read_records_where of string * Db_filter_types.expr + | Process_structured_field of + (string * string) + * string + * string + * string + * Db_cache_types.structured_op_t + + val t_of_rpc : Rpc.t -> t + + val rpc_of_t : t -> Rpc.t +end + +module Response : sig + type t = + | Get_table_from_ref of string option + | Is_valid_ref of bool + | Read_refs of string list + | Find_refs_with_filter of string list + | Read_field_where of string list + | Db_get_by_uuid of string + | Db_get_by_uuid_opt of string option + | Db_get_by_name_label of string list + | Create_row of unit + | Delete_row of unit + | Write_field of unit + | Read_field of string + | Read_record of (string * string) list * (string * string list) list + | Read_records_where of + (string * ((string * string) list * (string * string list) list)) list + | Process_structured_field of unit + | Dbcache_notfound of string * string * string + | Duplicate_key_of of string * string * string * string + | Uniqueness_constraint_violation of string * string * string + | Read_missing_uuid of string * string * string + | Too_many_values of string * string * string + + val rpc_of_t : t -> Rpc.t + + val t_of_rpc : Rpc.t -> t +end diff --git a/ocaml/database/db_secret_string.mli b/ocaml/database/db_secret_string.mli index f04812ebad3..e0587875a47 100644 --- a/ocaml/database/db_secret_string.mli +++ b/ocaml/database/db_secret_string.mli @@ -13,10 +13,10 @@ *) (* Prevent direct conversions to string to avoid accidental misuse. - * It is still possible to convert it to Rpc.t and recover it that way, - * it is not a protection against willfully recovering the protected string - * (we do need to send these as parameters in RPCs). - * *) + It is still possible to convert it to Rpc.t and recover it that way, + it is not a protection against willfully recovering the protected string + (we do need to send these as parameters in RPCs). +*) (** a type with no direct conversions to string *) type t diff --git a/ocaml/database/db_upgrade.mli b/ocaml/database/db_upgrade.mli new file mode 100644 index 00000000000..90eb5bf6912 --- /dev/null +++ b/ocaml/database/db_upgrade.mli @@ -0,0 +1,16 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val generic_database_upgrade : + Db_cache_types.Database.t -> Db_cache_types.Database.t diff --git a/ocaml/database/db_xml.ml b/ocaml/database/db_xml.ml index 1795cdef3bd..b9224f5ce5a 100644 --- a/ocaml/database/db_xml.ml +++ b/ocaml/database/db_xml.ml @@ -39,8 +39,6 @@ module To = struct Xmlm.output output `El_end (* Write out a string *) - let string (output : Xmlm.output) (key : string) (x : string) = - pair output key x (* Write out an int *) let int (output : Xmlm.output) (key : string) (x : int) = @@ -68,7 +66,8 @@ module To = struct (List.rev (Row.fold (fun k _ v acc -> - (k, Xml_spaces.protect (Schema.Value.marshal v)) :: acc + (k, Xml_spaces.protect (Schema.CachedValue.string_of v)) + :: acc ) row preamble ) diff --git a/ocaml/database/db_xml.mli b/ocaml/database/db_xml.mli new file mode 100644 index 00000000000..24a969c95cb --- /dev/null +++ b/ocaml/database/db_xml.mli @@ -0,0 +1,27 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +exception Unmarshall_error of string + +module To : sig + val fd : Unix.file_descr -> Db_cache_types.Database.t -> unit + + val file : string -> Db_cache_types.Database.t -> unit +end + +module From : sig + val file : Schema.t -> string -> Db_cache_types.Database.t + + val channel : Schema.t -> in_channel -> Db_cache_types.Database.t +end diff --git a/ocaml/database/dune b/ocaml/database/dune index 1b67e2146d9..7422d6dc900 100644 --- a/ocaml/database/dune +++ b/ocaml/database/dune @@ -1,134 +1,133 @@ (ocamllex db_filter_lex) -(menhir (modules db_filter_parse)) +(menhir + (modules db_filter_parse)) (library - (name xapi_schema) - (public_name xapi-schema) - (modules - db_names db_exn schema string_marshall_helper string_unmarshall_helper - test_schemas) - (libraries - sexpr - xapi-log - xapi-stdext-encodings - ) - (wrapped false) - (preprocess (per_module ((pps ppx_sexp_conv) Schema))) -) + (name xapi_schema) + (public_name xapi-schema) + (modules + db_names + db_exn + schema + string_marshall_helper + string_unmarshall_helper + test_schemas) + (libraries sexpr xapi-log xapi-stdext-encodings) + (wrapped false) + (preprocess + (per_module + ((pps ppx_sexp_conv) + Schema)))) (library - (name xapi_database) - (modes best) - (modules - (:standard \ database_server_main db_cache_test db_names db_exn - block_device_io string_marshall_helper string_unmarshall_helper schema - test_schemas unit_test_marshall unit_test_sql)) - (libraries - forkexec - gzip - mtime - mtime.clock.os - clock - rpclib.core - rpclib.json - safe-resources - stunnel - threads.posix - http_lib - httpsvr - uuid - xapi-backtrace - xapi-datamodel - xapi-log - (re_export xapi-schema) - xapi-idl.updates - xapi-stdext-encodings - xapi-stdext-pervasives - xapi-stdext-std - xapi-stdext-threads - xapi-stdext-unix - xapi_timeslice - xml-light2 - xmlm - ) - (preprocess - (per_module - ((pps ppx_deriving_rpc) - Db_cache_types Db_filter_types Db_rpc_common_v2 Db_secret_string))) -) + (name xapi_database) + (modes best) + (modules + (:standard + \ + database_server_main + db_cache_test + db_names + db_exn + block_device_io + string_marshall_helper + string_unmarshall_helper + schema + test_schemas + unit_test_marshall + unit_test_sql)) + (modules_without_implementation db_interface) + (libraries + forkexec + gzip + mtime + mtime.clock.os + clock + rpclib.core + rpclib.json + safe-resources + stunnel + threads.posix + http_lib + httpsvr + uuid + xapi-backtrace + xapi-datamodel + xapi-log + (re_export xapi-schema) + xapi-idl.updates + xapi-stdext-encodings + xapi-stdext-pervasives + xapi-stdext-std + xapi-stdext-threads + xapi-stdext-unix + xapi_timeslice + xml-light2 + xmlm) + (preprocess + (per_module + ((pps ppx_deriving_rpc) + Db_cache_types + Db_filter_types + Db_rpc_common_v2 + Db_secret_string)))) (executable - (modes exe) - (name block_device_io) - (modules block_device_io) - (libraries - - xapi_database - xapi-log - xapi-stdext-pervasives - xapi-stdext-unix - uuid - ) -) + (modes exe) + (name block_device_io) + (modules block_device_io) + (libraries + xapi_database + xapi-log + xapi-stdext-pervasives + xapi-stdext-unix + uuid)) (install - (package xapi) - (files (block_device_io.exe as block_device_io)) - (section libexec_root) -) + (package xapi) + (files + (block_device_io.exe as block_device_io)) + (section libexec_root)) (executable - (name database_server_main) - (modes exe) - (modules database_server_main) - (libraries - - http_lib - httpsvr - threads.posix - xapi_database - xapi-stdext-threads - xapi-stdext-unix - ) -) + (name database_server_main) + (modes exe) + (modules database_server_main) + (libraries + http_lib + httpsvr + threads.posix + xapi_database + xapi-stdext-threads + xapi-stdext-unix)) (tests - (names unit_test_marshall db_cache_test) - (modes exe) - (package xapi) - (modules db_cache_test unit_test_marshall) - (libraries - alcotest - http_lib - rpclib.xml - sexplib - sexplib0 - xapi_database - xml-light2 - ) -) + (names unit_test_marshall db_cache_test) + (modes exe) + (package xapi) + (modules db_cache_test unit_test_marshall) + (libraries + alcotest + http_lib + rpclib.xml + sexplib + sexplib0 + xapi_database + xml-light2)) (test - (name unit_test_sql) - (modes exe) - (package xapi) - (modules unit_test_sql) - (deps - sql_msg_example.txt - ) - (libraries - alcotest - xapi_database - xml-light2 - ) -) + (name unit_test_sql) + (modes exe) + (package xapi) + (modules unit_test_sql) + (deps sql_msg_example.txt) + (libraries alcotest xapi_database xml-light2)) (rule - (alias runtest) - (deps - (:x database_server_main.exe) - ) - (package xapi) - (action (run %{x} --master db.xml --test)) -) + (alias runtest) + (deps + (:x database_server_main.exe)) + (package xapi) + (action + (run %{x} --master db.xml --test))) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/date.ml b/ocaml/database/generation.mli similarity index 68% rename from ocaml/libs/xapi-stdext/lib/xapi-stdext-date/date.ml rename to ocaml/database/generation.mli index ef0f98ce13a..4a5dd6c90ed 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/date.ml +++ b/ocaml/database/generation.mli @@ -12,24 +12,14 @@ * GNU Lesser General Public License for more details. *) -include Clock.Date +type t = int64 -let never = epoch +val of_string : string -> t -let of_string = of_iso8601 +val to_string : int64 -> string -let to_string = to_rfc3339 +val add_int : int64 -> int -> int64 -let of_float = of_unix_time +val null_generation : int64 -let to_float = to_unix_time - -let rfc822_of_float = of_unix_time - -let rfc822_to_string = to_rfc822 - -let eq = equal - -type iso8601 = t - -type rfc822 = t +val suffix : string diff --git a/ocaml/database/master_connection.ml b/ocaml/database/master_connection.ml index d7faff1cd62..09fde7dceef 100644 --- a/ocaml/database/master_connection.ml +++ b/ocaml/database/master_connection.ml @@ -20,8 +20,6 @@ open Safe_resources -type db_record = (string * string) list * (string * string list) list - module D = Debug.Make (struct let name = "master_connection" end) open D diff --git a/ocaml/database/master_connection.mli b/ocaml/database/master_connection.mli new file mode 100644 index 00000000000..eca6c22d025 --- /dev/null +++ b/ocaml/database/master_connection.mli @@ -0,0 +1,43 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val delay : Scheduler.PipeDelay.t + +exception Uninitialised + +val is_slave : (unit -> bool) ref + +val get_master_address : (unit -> string) ref + +val master_rpc_path : string ref + +exception Cannot_connect_to_master + +val force_connection_reset : unit -> unit + +val start_master_connection_watchdog : unit -> unit + +exception Goto_handler + +val on_database_connection_established : (unit -> unit) ref + +val open_secure_connection : unit -> unit + +val connection_timeout : float ref + +val restart_on_connection_timeout : bool ref + +exception Content_length_required + +val execute_remote_fn : string -> Db_interface.response diff --git a/ocaml/database/parse_db_conf.ml b/ocaml/database/parse_db_conf.ml index 8eb55ee2afe..67aa5c70d80 100644 --- a/ocaml/database/parse_db_conf.ml +++ b/ocaml/database/parse_db_conf.ml @@ -62,9 +62,6 @@ let generation_read dbconn = try Generation.of_string (Unixext.string_of_file gencount_fname) with _ -> 0L -(* The db conf used for bootstrap purposes, e.g. mounting the 'real' db on shared storage *) -let db_snapshot_dbconn = {dummy_conf with path= Db_globs.snapshot_db} - let from_mode v = match v with Write_limit -> "write_limit" | No_limit -> "no_limit" diff --git a/ocaml/database/parse_db_conf.mli b/ocaml/database/parse_db_conf.mli new file mode 100644 index 00000000000..95004fdb61f --- /dev/null +++ b/ocaml/database/parse_db_conf.mli @@ -0,0 +1,44 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type db_connection_mode = Write_limit | No_limit + +type db_connection = { + path: string + ; mode: db_connection_mode + ; compress: bool + ; write_limit_period: int + ; write_limit_write_cycles: int + ; is_on_remote_storage: bool + ; other_parameters: (string * string) list + ; mutable last_generation_count: Generation.t +} + +val dummy_conf : db_connection + +val make : string -> db_connection + +val generation_filename : db_connection -> string + +val generation_read : db_connection -> Generation.t + +val write_db_conf : db_connection list -> unit + +exception Cannot_parse_database_config_file + +exception Cannot_have_multiple_dbs_in_sr + +val parse_db_conf : string -> db_connection list + +val get_db_conf : string -> db_connection list diff --git a/ocaml/database/redo_log.ml b/ocaml/database/redo_log.ml index 429646dcce7..8c2c95928d7 100644 --- a/ocaml/database/redo_log.ml +++ b/ocaml/database/redo_log.ml @@ -77,8 +77,7 @@ type redo_log_conf = { ; backoff_delay: int ref ; sock: Unix.file_descr option ref ; pid: (Forkhelpers.pidty * string * string) option ref - ; dying_processes_mutex: Mutex.t - ; num_dying_processes: int ref + ; num_dying_processes: int Atomic.t ; mutex: Mutex.t (** exclusive access to this configuration *) } @@ -585,14 +584,10 @@ let shutdown log = (Thread.create (fun () -> D.debug "Waiting for I/O process with pid %d to die..." ipid ; - with_lock log.dying_processes_mutex (fun () -> - log.num_dying_processes := !(log.num_dying_processes) + 1 - ) ; + Atomic.incr log.num_dying_processes ; ignore (Forkhelpers.waitpid p) ; D.debug "Finished waiting for process with pid %d" ipid ; - with_lock log.dying_processes_mutex (fun () -> - log.num_dying_processes := !(log.num_dying_processes) - 1 - ) + Atomic.decr log.num_dying_processes ) () ) ; @@ -633,13 +628,11 @@ let startup log = () (* We're already started *) | None -> ( (* Don't start if there are already some processes hanging around *) - with_lock log.dying_processes_mutex (fun () -> - if - !(log.num_dying_processes) - >= Db_globs.redo_log_max_dying_processes - then - raise TooManyProcesses - ) ; + if + Atomic.get log.num_dying_processes + >= Db_globs.redo_log_max_dying_processes + then + raise TooManyProcesses ; match !(log.device) with | None -> D.info "Could not find block device" ; @@ -793,8 +786,7 @@ let create ~name ~state_change_callback ~read_only = ; backoff_delay= ref Db_globs.redo_log_initial_backoff_delay ; sock= ref None ; pid= ref None - ; dying_processes_mutex= Mutex.create () - ; num_dying_processes= ref 0 + ; num_dying_processes= Atomic.make 0 ; mutex= Mutex.create () } in diff --git a/ocaml/database/schema.ml b/ocaml/database/schema.ml index 6577bc7cfc3..06a2dc391d4 100644 --- a/ocaml/database/schema.ml +++ b/ocaml/database/schema.ml @@ -40,6 +40,12 @@ module Value = struct | Pairs of (string * string) list [@@deriving sexp_of] + let string s = String s + + let set xs = Set xs + + let pairs xs = Pairs xs + let marshal = function | String x -> x @@ -84,6 +90,49 @@ module Value = struct end end +(** We have a Value.t *) +type present = [`Present of Value.t] + +(** We don't have a Value.t. For backwards compatibility with DB RPC protocols. *) +type absent = [`Absent] + +type maybe = [present | absent] + +module CachedValue = struct + type !+'a t = {v: 'a; marshalled: string} + + let v v = {v= `Present v; marshalled= Value.marshal v} + + let of_string marshalled = {v= `Absent; marshalled} + + let string_of t = t.marshalled + + let value_of {v= `Present v; _} = v + + let unmarshal ty t = + match t.v with + | `Present v -> + v + | `Absent -> + Value.unmarshal ty t.marshalled + + let of_typed_string ty marshalled = + let v = Value.unmarshal ty marshalled in + {v= `Present v; marshalled} + + let maybe_unmarshal ty = function + | {v= `Present _; _} as p -> + p + | {v= `Absent; marshalled} -> + of_typed_string ty marshalled + + let open_present ({v= `Present _; _} as t) = t +end + +type cached_value = present CachedValue.t + +type maybe_cached_value = maybe CachedValue.t + module Column = struct type t = { name: string diff --git a/ocaml/database/schema.mli b/ocaml/database/schema.mli new file mode 100644 index 00000000000..8a248d49953 --- /dev/null +++ b/ocaml/database/schema.mli @@ -0,0 +1,232 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module Type : sig + type t = String | Set | Pairs [@@deriving sexp_of] + + exception Error of t * t +end + +module Value : sig + type t = + | String of string + | Set of string list + | Pairs of (string * string) list + [@@deriving sexp_of] + + val string : string -> t + + val set : string list -> t + + val pairs : (string * string) list -> t + + val marshal : t -> string + + val unmarshal : Type.t -> string -> t + + module Unsafe_cast : sig + val string : t -> string + + val set : t -> string list + + val pairs : t -> (string * string) list + end +end + +type present = [`Present of Value.t] + +type absent = [`Absent] + +type maybe = [`Absent | `Present of Value.t] + +(** Abstract type, ensuring marshalled form was created from a Value.t. + + For backwards compatibility this can also be created from a marshalled form, + but then retrieving the value requires its {Type.t} to be known. + + A polymorphic variant is used to decide at the type level when we are always guaranteed to have + a {type:Value.t} available, from the situations where we do not. + + When {type:Value.t} is not available at construction time then unmarshaling can incurr a performance + overhead every time it is called, because the value here is immutable, and caching only happens at construction time. + + No guarantee is made about the encoding of the values (in the future we could also cache whether we've already checked + for [utf8_xml] compatibility). + *) +module CachedValue : sig + type +!'a t + + val v : Value.t -> [> present] t + (** [v value] creates a cached value, storing the value and its serialized form. + + [O(1)] for strings, and [O(n)] for sets and maps, where [n] is the result size in marshalled form. + *) + + val of_string : string -> [> absent] t + (** [of_string marshalled] created a cached value from a marshalled form. + + This is provided for backwards compatibility, e.g. for DB RPC calls which only send the marshalled form without type information. + [O(1)] operation, but {!val:unmarshal} can be [O(n)] for sets and maps. + *) + + val string_of : 'a t -> string + (** [string_of t] returns [t] in marshalled form. + + This works on any cached value types. + + [O(1)] operation, marshaling happens at construction time. + *) + + val of_typed_string : Type.t -> string -> [> present] t + (** [of_typed_string ty marshalled] creates a cached value, storing both the serialized form and the value. + + Same complexity as {!val:unmarshal} + *) + + val value_of : [< present] t -> Value.t + (** [value_of t] returns [t] in {!type:Value.t} form. + + This only works on cached values created by {!val:v}. + + [O(1)] operation, stored at construction time. + *) + + val unmarshal : Type.t -> [< maybe] t -> Value.t + (** [unmarshal ty t] returns [t] in Value.t form if known, or unmarshals it. + + This works on any cached value. + When the value was created by {!val:v} this is an [O(1)] operation. + When the value was created by {!val:of_string} this is an [O(1)] operation for strings, + and [O(n)] operation for sets and maps, as it requires unmarshaling. + The unmarshalled value is not cached, so each unmarshal call has the same cost. + *) + + val maybe_unmarshal : Type.t -> [< maybe] t -> present t + (** [maybe_unmarshal ty t] returns [t] with both a Value and its marshaled form. + + Called {!val:unmarshal} internally if [t] doesn't contain a {type:Value.t}. + + Same complexity as !{val:unmarshal}. + *) + + val open_present : [< present] t -> [> present] t + (** [open_present t] returns [t] as an open polymorphic variant, that can be merged with [absent]. *) +end + +type cached_value = present CachedValue.t + +type maybe_cached_value = maybe CachedValue.t + +module Column : sig + type t = { + name: string + ; persistent: bool + ; empty: Value.t + ; default: Value.t option + ; ty: Type.t + ; issetref: bool + } + [@@deriving sexp_of] + + val name_of : t -> string +end + +val tabulate : 'a list -> key_fn:('a -> 'b) -> ('b, 'a) Hashtbl.t + +val values_of_table : ('a, 'b) Hashtbl.t -> 'b list + +module Table : sig + type t' = {name: string; columns: Column.t list; persistent: bool} + [@@deriving sexp_of] + + val sexp_of_t' : t' -> Sexplib0.Sexp.t + + type t = { + name: string + ; columns: (string, Column.t) Hashtbl.t + ; persistent: bool + } + [@@deriving sexp_of] + + val t'_of_t : t -> t' + + val t_of_t' : t' -> t + + val find : string -> t -> Column.t + + val create : name:string -> columns:Column.t list -> persistent:bool -> t + + val name_of : t -> string +end + +type relationship = OneToMany of string * string * string * string + +val sexp_of_relationship : relationship -> Sexplib0.Sexp.t + +module Database : sig + type t' = {tables: Table.t list} + + val sexp_of_t' : t' -> Sexplib0.Sexp.t + + type t = {tables: (string, Table.t) Hashtbl.t} + + val t_of_t' : t' -> t + + val t'_of_t : t -> t' + + val sexp_of_t : t -> Sexplib0.Sexp.t + + val find : string -> t -> Table.t + + val of_tables : Table.t list -> t +end + +type foreign = (string * string * string) list + +val sexp_of_foreign : foreign -> Sexplib0.Sexp.t + +module ForeignMap : sig + include Map.S with type key = string + + type t' = (key * foreign) list + + val sexp_of_t' : t' -> Sexplib0.Sexp.t + + type m = foreign t [@@deriving sexp_of] +end + +type t = { + major_vsn: int + ; minor_vsn: int + ; database: Database.t + ; one_to_many: ForeignMap.m + ; many_to_many: ForeignMap.m +} +[@@deriving sexp_of] + +val database : t -> Database.t + +val table : string -> t -> Table.t + +val empty : t + +val is_table_persistent : t -> string -> bool + +val is_field_persistent : t -> string -> string -> bool + +val table_names : t -> string list + +val one_to_many : ForeignMap.key -> t -> foreign + +val many_to_many : ForeignMap.key -> t -> foreign diff --git a/ocaml/database/static_vdis_list.mli b/ocaml/database/static_vdis_list.mli new file mode 100644 index 00000000000..4e59f5b75c1 --- /dev/null +++ b/ocaml/database/static_vdis_list.mli @@ -0,0 +1,23 @@ +(* + * Copyright (C) 2006-2009 Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type vdi = { + uuid: string + ; reason: string + ; delete_next_boot: bool + ; currently_attached: bool + ; path: string option +} + +val list : unit -> vdi list diff --git a/ocaml/database/string_marshall_helper.ml b/ocaml/database/string_marshall_helper.ml index ba003bee96a..1add3aef7b0 100644 --- a/ocaml/database/string_marshall_helper.ml +++ b/ocaml/database/string_marshall_helper.ml @@ -22,9 +22,7 @@ module D = Debug.Make (struct let name = __MODULE__ end) let ensure_utf8_xml string = let length = String.length string in - let prefix = - Xapi_stdext_encodings.Encodings.UTF8_XML.longest_valid_prefix string - in + let prefix = Xapi_stdext_encodings.Utf8.XML.longest_valid_prefix string in if length > String.length prefix then D.warn "Whilst doing 'set' of structured field, string truncated to: '%s'." prefix ; diff --git a/ocaml/database/string_marshall_helper.mli b/ocaml/database/string_marshall_helper.mli new file mode 100644 index 00000000000..2fc57ff97b7 --- /dev/null +++ b/ocaml/database/string_marshall_helper.mli @@ -0,0 +1,19 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val ensure_utf8_xml : string -> string + +val set : ('a -> string) -> 'a list -> string + +val map : ('a -> string) -> ('b -> string) -> ('a * 'b) list -> string diff --git a/ocaml/database/string_unmarshall_helper.mli b/ocaml/database/string_unmarshall_helper.mli new file mode 100644 index 00000000000..3362c9659f0 --- /dev/null +++ b/ocaml/database/string_unmarshall_helper.mli @@ -0,0 +1,19 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +exception Failure of string + +val set : (string -> 'a) -> string -> 'a list + +val map : (string -> 'a) -> (string -> 'b) -> string -> ('a * 'b) list diff --git a/ocaml/database/test_schemas.ml b/ocaml/database/test_schemas.ml index fa2519b5f61..57b92cce060 100644 --- a/ocaml/database/test_schemas.ml +++ b/ocaml/database/test_schemas.ml @@ -1,9 +1,11 @@ +let empty = Schema.Value.string "" + let schema = let _ref = { Schema.Column.name= Db_names.ref ; persistent= true - ; empty= Schema.Value.String "" + ; empty ; default= None ; ty= Schema.Type.String ; issetref= false @@ -13,7 +15,7 @@ let schema = { Schema.Column.name= Db_names.uuid ; persistent= true - ; empty= Schema.Value.String "" + ; empty ; default= None ; ty= Schema.Type.String ; issetref= false @@ -23,7 +25,7 @@ let schema = { Schema.Column.name= Db_names.name_label ; persistent= true - ; empty= Schema.Value.String "" + ; empty ; default= None ; ty= Schema.Type.String ; issetref= false @@ -33,7 +35,7 @@ let schema = { Schema.Column.name= "name__description" ; persistent= true - ; empty= Schema.Value.String "" + ; empty ; default= None ; ty= Schema.Type.String ; issetref= false @@ -43,7 +45,7 @@ let schema = { Schema.Column.name= "type" ; persistent= true - ; empty= Schema.Value.String "" + ; empty ; default= None ; ty= Schema.Type.String ; issetref= false @@ -73,8 +75,8 @@ let schema = { Schema.Column.name= "protection_policy" ; persistent= true - ; empty= Schema.Value.String "" - ; default= Some (Schema.Value.String "OpaqueRef:NULL") + ; empty + ; default= Some (Schema.Value.string "OpaqueRef:NULL") ; ty= Schema.Type.String ; issetref= false } @@ -93,7 +95,7 @@ let schema = { Schema.Column.name= "VM" ; persistent= true - ; empty= Schema.Value.String "" + ; empty ; default= None ; ty= Schema.Type.String ; issetref= false diff --git a/ocaml/database/test_schemas.mli b/ocaml/database/test_schemas.mli new file mode 100644 index 00000000000..fa4cb6ebac4 --- /dev/null +++ b/ocaml/database/test_schemas.mli @@ -0,0 +1,17 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val schema : Schema.t + +val many_to_many : Schema.t diff --git a/ocaml/database/unit_test_marshall.mli b/ocaml/database/unit_test_marshall.mli new file mode 100644 index 00000000000..cabf42bbb8e --- /dev/null +++ b/ocaml/database/unit_test_marshall.mli @@ -0,0 +1 @@ +(* this file is empty on purpose: this is an executable file *) diff --git a/ocaml/database/unit_test_sql.mli b/ocaml/database/unit_test_sql.mli new file mode 100644 index 00000000000..cabf42bbb8e --- /dev/null +++ b/ocaml/database/unit_test_sql.mli @@ -0,0 +1 @@ +(* this file is empty on purpose: this is an executable file *) diff --git a/ocaml/database/xml_spaces.mli b/ocaml/database/xml_spaces.mli new file mode 100644 index 00000000000..4ec7f9016d7 --- /dev/null +++ b/ocaml/database/xml_spaces.mli @@ -0,0 +1,17 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val protect : string -> string + +val unprotect : string -> string diff --git a/ocaml/forkexecd/.gitignore b/ocaml/forkexecd/.gitignore index d9b5b8ca4be..2c89ac5c34f 100644 --- a/ocaml/forkexecd/.gitignore +++ b/ocaml/forkexecd/.gitignore @@ -1,4 +1,7 @@ _build/ +helper/*.o +helper/*.o.d +helper/vfork_helper .merlin *.install diff --git a/ocaml/forkexecd/dune b/ocaml/forkexecd/dune new file mode 100644 index 00000000000..01b51f24ed5 --- /dev/null +++ b/ocaml/forkexecd/dune @@ -0,0 +1,20 @@ +(data_only_dirs helper) + +(rule + (deps (source_tree helper)) + (targets vfork_helper) + (action + (no-infer + (progn + (chdir helper (run make)) + (copy helper/vfork_helper vfork_helper) + ) + ) + ) +) + +(install + (package forkexec) + (section libexec_root) + (files (vfork_helper as xapi/vfork_helper)) +) diff --git a/ocaml/forkexecd/helper/Makefile b/ocaml/forkexecd/helper/Makefile new file mode 100644 index 00000000000..6c14a3aeb6c --- /dev/null +++ b/ocaml/forkexecd/helper/Makefile @@ -0,0 +1,33 @@ +## Set some macro but not override environment ones +CFLAGS ?= -O2 -g -Wall -Werror +LDFLAGS ?= + +all:: vfork_helper + +clean:: + rm -f vfork_helper *.o *.o.d + +%.o: %.c + $(CC) $(CFLAGS) -MMD -MP -MF $@.d -c -o $@ $< + +vfork_helper: vfork_helper.o close_from.o syslog.o + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ -pthread + +-include $(wildcard *.o.d) + +## Fuzzer uses AFL (American Fuzzy Lop). +## +## Use "make fuzz" to build and launch the fuzzer +## +## Use "make show" to look at the first failures (if found). + +fuzz:: + afl-gcc $(CFLAGS) -Wall -Werror -o algo_fuzzer algo_fuzzer.c + rm -rf testcase_dir + mkdir testcase_dir + echo maomaoamaoaoao > testcase_dir/test1 + rm -rf findings_dir/ + afl-fuzz -i testcase_dir -o findings_dir -D -- ./algo_fuzzer + +show:: + cat "$$(ls -1 findings_dir/default/crashes/id* | head -1)" | ./algo_fuzzer diff --git a/ocaml/forkexecd/helper/algo_fuzzer.c b/ocaml/forkexecd/helper/algo_fuzzer.c new file mode 100644 index 00000000000..97eabd48d2c --- /dev/null +++ b/ocaml/forkexecd/helper/algo_fuzzer.c @@ -0,0 +1,246 @@ + +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#undef NDEBUG +#define DEBUG 1 + +#if DEBUG +#define log(fmt, ...) printf(fmt "\n", ##__VA_ARGS__) +#else +#define log(fmt, ...) do {} while(0) +#endif + +// include as first file to make sure header is self contained +#include "redirect_algo.h" + +#include +#include +#include +#include +#include +#include +#include + +static int fake_close(int fd); + +typedef struct { + bool open; + bool cloexec; + char *name; +} fd; + +#define NUM_FDS 4096 +static fd fds[NUM_FDS]; + +static bool +fake_close_fds_from(int fd_from) +{ + for (int fd = fd_from; fd < NUM_FDS; ++fd) + fake_close(fd); + + return true; +} + +#define O_WRONLY 1 +static int +fake_open(const char *fn, int dummy) +{ + for (int i = 0; i < NUM_FDS; ++i) + if (!fds[i].open) { + assert(fds[i].name == NULL); + fds[i].name = strdup(fn); + fds[i].open = true; + fds[i].cloexec = false; + return i; + } + assert(0); + return -1; +} + +static int +fake_close(int fd) +{ + assert(fd >= 0); + assert(fd < NUM_FDS); + if (!fds[fd].open) { + errno = EBADF; + return -1; + } + fds[fd].open = false; + free(fds[fd].name); + fds[fd].name = NULL; + return 0; +} + +static int +fake_dup2(int from, int to) +{ + assert(from >= 0 && from < NUM_FDS); + assert(to >= 0 && to < NUM_FDS); + assert(fds[from].open); + assert(from != to); + free(fds[to].name); + fds[to].open = true; + fds[to].name = strdup(fds[from].name); + fds[to].cloexec = false; + return 0; +} + +static int +fake_fcntl(int fd) +{ + assert(fd >= 0 && fd < NUM_FDS); + assert(fds[fd].open); + fds[fd].cloexec = false; + return 0; +} + +int main(int argc, char **argv) +{ + // Input where a given FD goes?? + // No, not enough, can be duplicated. + // Numbers >4096 in 2 bytes not file descriptor, + // (-1 for standard, skip for normal). + // We should add some random fds. + enum { MAX_FILE_BUF = 2048 }; + uint16_t file_buf[MAX_FILE_BUF]; + size_t read = fread(file_buf, 2, MAX_FILE_BUF, stdin); + if (read < 3) + return 0; + + static const char standard_names[][8] = { + "stdin", "stdout", "stderr" + }; + int num_mappings = 0; + uint16_t *num = file_buf; + mapping mappings[MAX_FILE_BUF]; + int i = 0; + for (i = 0; i < 3; ++i) { + mapping *m = &mappings[num_mappings++]; + m->uuid = standard_names[i]; + uint16_t n = *num++; + m->current_fd = n < NUM_FDS ? n : -1; + m->wanted_fd = i; + } + for (; i < read; ++i) { + uint16_t n = *num++; + if (n >= NUM_FDS) + continue; + + mapping *m = &mappings[num_mappings++]; + m->current_fd = n; + m->wanted_fd = -1; + char buf[64]; + sprintf(buf, "file%d", i); + m->uuid = strdup(buf); + } + if (num_mappings > MAX_TOTAL_MAPPINGS) + return 0; + + for (unsigned n = 0; n < num_mappings; ++n) { + mapping *m = &mappings[n]; + int fd = m->current_fd; + if (fd < 0) + continue; + fake_close(fd); + fds[fd].open = true; + fds[fd].name = strdup(m->uuid); + fds[fd].cloexec = true; + } + + // Check in the final file mapping all valid mappings + // have an open file descriptor. + // There should be no duplicate numbers in current_fd. + // current_fd must be in a range. + // Only if wanted_fd >= 0 current_fd can be -1. + // There should be a correspondance between input and output names. + // If current_fd was -1 it will still be -1. + // If wanted_fd >= 0 current_fd should be the same. + + fd_operation operations[MAX_OPERATIONS]; + int num_operations = + redirect_mappings(mappings, num_mappings, operations); + assert(num_operations > 0); + assert(num_operations <= MAX_OPERATIONS); + + for (int i = 0; i < num_operations; ++i) { + const fd_operation* op = &operations[i]; + log("op %d %d %d", op->fd_from, op->fd_to, op->operation); + switch (op->operation) { + case FD_OP_DUP: + if (op->fd_from == op->fd_to) + fake_fcntl(op->fd_from); + else + fake_dup2(op->fd_from, op->fd_to); + break; + case FD_OP_MOVE: + assert(op->fd_from != op->fd_to); + fake_dup2(op->fd_from, op->fd_to); + fake_close(op->fd_from); + break; + case FD_OP_DEVNULL: + // first close old, then create new one + fake_close(op->fd_to); + // TODO ideally we want read only for input for Ocaml did the same... + assert(fake_open("/dev/null", O_WRONLY) == op->fd_to); + break; + case FD_OP_CLOSE_FROM: + fake_close_fds_from(op->fd_from); + break; + default: + assert(0); + } + } + + // check files opened + for (int fd = 0; fd < NUM_FDS; ++fd) + assert(fds[fd].open == (fd < num_mappings)); + + for (int fd = 0; fd < num_mappings; ++fd) { + assert(fds[fd].cloexec == false); + log("file %d %s", fd, fds[fd].name); + } + + // Check in the final file mapping all valid mappings + // has an open file descriptor. + bool already_found[NUM_FDS] = { false, }; + for (unsigned n = 0; n < num_mappings; ++n) { + const int fd = mappings[n].current_fd; + const int wanted = mappings[n].wanted_fd; + if (fd >= 0) { + assert(fd < NUM_FDS); + assert(fds[fd].open); + + // There should be no duplicate numbers in current_fd. + assert(!already_found[fd]); + already_found[fd] = true; + } else { + // Only if wanted_fd >= 0 current_fd can be -1. + assert(mappings[n].wanted_fd >= 0); + assert(fd == -1); + } + + // If wanted_fd >= 0 current_fd should be the same. + if (wanted >= 0) + assert(wanted == fd || fd == -1); + + // current_fd must be in a range. + assert(fd >= -1); + assert(fd < num_mappings); + } + + // There should be a correspondance between input and output names. + // If current_fd was -1 it will still be -1. +} diff --git a/ocaml/forkexecd/helper/close_from.c b/ocaml/forkexecd/helper/close_from.c new file mode 100644 index 00000000000..14e724109df --- /dev/null +++ b/ocaml/forkexecd/helper/close_from.c @@ -0,0 +1,86 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "close_from.h" + +#include +#include +#include +#include +#include +#include + +#ifdef __linux__ +#include +#endif + +// try to use close_range on Linux even if not defined by headers +#if defined(__linux__) && !defined(SYS_close_range) +# if defined(__alpha__) +# define SYS_close_range 546 +# elif defined(__amd64__) || defined(__x86_64__) || defined(__arm__) || \ + defined(__aarch64__) || defined(__hppa__) || defined(__i386__) || \ + defined(__ia64__) || defined(__m68k__) || defined(__mips__) || \ + defined(__powerpc__) || defined(__powerpc64__) || defined(__sparc__) || \ + defined(__s390x__) +# define SYS_close_range 436 +# endif +#endif + +bool +close_fds_from(int fd_from) +{ + // first method, use close_range +#if (defined(__linux__) && defined(SYS_close_range)) \ + || (defined(__FreeBSD__) && defined(CLOSE_RANGE_CLOEXEC)) + static bool close_range_supported = true; + if (close_range_supported) { +#if defined(__linux__) + if (syscall(SYS_close_range, fd_from, ~0U, 0) == 0) +#else + if (close_range(fd_from, ~0U, 0) == 0) +#endif + return true; + + if (errno == ENOSYS) + close_range_supported = false; + } +#endif + + // second method, read fds list from /proc + DIR *dir = opendir("/proc/self/fd"); + if (dir) { + const int dir_fd = dirfd(dir); + struct dirent *ent; + while ((ent = readdir(dir)) != NULL) { + char *end = NULL; + unsigned long fd = strtoul(ent->d_name, &end, 10); + if (end == NULL || *end) + continue; + if (fd >= fd_from && fd != dir_fd) + close(fd); + } + closedir(dir); + return true; + } + + // third method, use just a loop + struct rlimit limit; + if (getrlimit(RLIMIT_NOFILE, &limit) < 0) + return false; + for (int fd = fd_from; fd < limit.rlim_cur; ++ fd) + close(fd); + + return true; +} diff --git a/ocaml/forkexecd/helper/close_from.h b/ocaml/forkexecd/helper/close_from.h new file mode 100644 index 00000000000..2d1ae77e527 --- /dev/null +++ b/ocaml/forkexecd/helper/close_from.h @@ -0,0 +1,19 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#pragma once + +#include + +bool close_fds_from(int fd); diff --git a/ocaml/forkexecd/helper/logs.c b/ocaml/forkexecd/helper/logs.c new file mode 100644 index 00000000000..2f7fab23c2f --- /dev/null +++ b/ocaml/forkexecd/helper/logs.c @@ -0,0 +1,149 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include "logs.h" + +#if FORKEXECD_DEBUG_LOGS + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#define FILE_SIZE (32 * 1024) + +struct priv_mapped_logs { + uint32_t size; + + // Flags, we use characters instead of binary so + // easily see them easily with different tools. + char flags[4]; + char filename[64]; + pid_t pid; + int num; +}; + +// flags order +enum { SUCCESS, FAILURE }; + +mapped_logs mapped_logs_open(void) +{ + static int last_num = 0; + + // create a mapped file with a given size, will write header as structure + // and update using memory + mkdir("/tmp/fe_repl", 0755); + + char tmpl[] = "/tmp/fe_repl/logXXXXXX"; + int fd = mkstemp(tmpl); + if (!fd) + caml_raise_out_of_memory(); + + if (ftruncate(fd, FILE_SIZE) < 0) { + close(fd); + caml_raise_out_of_memory(); + } + + priv_mapped_logs *l = mmap(NULL, FILE_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (l == MAP_FAILED) { + close(fd); + caml_raise_out_of_memory(); + } + close(fd); + + l->size = sizeof(*l); + memcpy(l->flags, "____", 4); + strncpy(l->filename, tmpl, sizeof(l->filename)); + l->pid = getpid(); + l->num = ++last_num; + + return (mapped_logs){l}; +} + +#define DEFINE_RANGE(start, end) \ + char *start = (char*) logs.priv + sizeof(priv_mapped_logs); \ + char *const end = (char*) logs.priv + FILE_SIZE + +void mapped_logs_close(mapped_logs logs) +{ + if (!logs.priv) + return; + DEFINE_RANGE(start, end); + bool written = false; + bool success = logs.priv->flags[FAILURE] == '_' && logs.priv->flags[SUCCESS] != '_'; + if (!success) { + FILE *f = fopen("/tmp/fe_repl/all_logs", "a"); + if (f) { + end[-1] = 0; + size_t len = strlen(start); + written = (fwrite(start, 1, len, f) == len); + fclose(f); + } + } + if (written || success) + unlink(logs.priv->filename); + munmap(logs.priv, FILE_SIZE); +} + +void mapped_logs_failure(mapped_logs logs) +{ + if (!logs.priv) + return; + logs.priv->flags[FAILURE] = 'F'; +} + +void mapped_logs_success(mapped_logs logs) +{ + if (!logs.priv) + return; + logs.priv->flags[SUCCESS] = 'S'; +} + +void mapped_logs_add(mapped_logs logs, const char *fmt, ...) +{ + if (!logs.priv) + return; + int save_errno = errno; + DEFINE_RANGE(start, end); + start += strlen(start); + if (start >= end -1) { + errno = save_errno; + return; // no more space + } + size_t len = end - start; + int l = snprintf(start, len, "%d:%d ", (int) logs.priv->pid, logs.priv->num); + if (l >= len) { + errno = save_errno; + return; + } + start += l; + len -= l; + va_list ap; + va_start(ap, fmt); + vsnprintf(start, len, fmt, ap); + va_end(ap); + + errno = save_errno; +} +#endif diff --git a/ocaml/forkexecd/helper/logs.h b/ocaml/forkexecd/helper/logs.h new file mode 100644 index 00000000000..b9f396d6c34 --- /dev/null +++ b/ocaml/forkexecd/helper/logs.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +// Definitions to write logs into memory mapped objects. +// We use a memory mapped object here because we close file descriptors +// so writing to file using them would cause logs to be lost. + +#pragma once + +#if !defined(FORKEXECD_DEBUG_LOGS) +#define FORKEXECD_DEBUG_LOGS 0 +#endif + +#if (FORKEXECD_DEBUG_LOGS) != 0 && (FORKEXECD_DEBUG_LOGS) != 1 +#error Expected FORKEXECD_DEBUG_LOGS to be defined either 0 or 1 +#endif + +typedef struct priv_mapped_logs priv_mapped_logs; +typedef struct mapped_logs mapped_logs; + +#if FORKEXECD_DEBUG_LOGS +struct mapped_logs { + priv_mapped_logs *priv; +}; +#define NULL_MAPPED_LOGS ((mapped_logs){0}) +mapped_logs mapped_logs_open(void); +void mapped_logs_close(mapped_logs logs); + +// Add a log entry, similar to printf. +void mapped_logs_add(mapped_logs logs, const char *fmt, ...); + +// Mark as failed, any failure will keep the log. +void mapped_logs_failure(mapped_logs logs); + +// Mark as successful, if successful and no failure during +// execution the log will be removed. +void mapped_logs_success(mapped_logs logs); +#else +// Use an empty structure, compiler will strip it passing +// it as a parameter without the needs to change the source +// code. +struct mapped_logs {}; +#define NULL_MAPPED_LOGS ((mapped_logs){}) +static inline mapped_logs mapped_logs_open(void) { + return (mapped_logs){}; +} + +static inline void mapped_logs_close(mapped_logs logs) { +} + +static inline void mapped_logs_failure(mapped_logs logs) { +} + +static inline void mapped_logs_success(mapped_logs logs) { +} + +#define mapped_logs_add(...) \ + do {} while(0) +#endif diff --git a/ocaml/forkexecd/helper/redirect_algo.h b/ocaml/forkexecd/helper/redirect_algo.h new file mode 100644 index 00000000000..d86978cd351 --- /dev/null +++ b/ocaml/forkexecd/helper/redirect_algo.h @@ -0,0 +1,210 @@ +/* Algorithm used to remap file handles before executing a process. + * The algorithm is separated in a different file in order to reuse for + * fuzzing it. + */ + +#pragma once + +#if !defined(DEBUG) +#define DEBUG 0 +#endif + +#if (DEBUG) != 0 && (DEBUG) != 1 +#error Expected DEBUG to be defined either 0 or 1 +#endif + +#ifndef log +#error Expected log macro to be defined +#endif + +#include +#include +#include + +typedef struct { + const char *uuid; + int current_fd; + int wanted_fd; +} mapping; + +typedef struct { + // source file + int fd_from; + // destination file + short fd_to; + // see FD_OP_ constants + uint8_t operation; +} fd_operation; + +typedef enum { + // Duplicate from fd_from to fd_to. + // If fd_from is the same as fd_to remove FD_CLOEXEC flag. + FD_OP_DUP, + // Duplicate from fd_from to fd_to and close fd_from. + FD_OP_MOVE, + // Open /dev/null on fd_to. + FD_OP_DEVNULL, + // Close from fd_from to the sky! + FD_OP_CLOSE_FROM, +} FD_OP; + +#define MAX_OPERATIONS 1024 +#define MAX_TOTAL_MAPPINGS (MAX_OPERATIONS - 4) + +static uint16_t remap_fds(mapping *const mappings, unsigned num_mappings, int from, int to); + +// Given the passed mappings update them (current_fd) and returns the +// requested operations to do the job. +// First 3 mappings should refer to standard file descriptors (stdin, +// stdout, stderr). +// Returns the number of operations to perform or negative if error. +static int +redirect_mappings(mapping *const mappings, const unsigned num_mappings, fd_operation *operations) +{ + mapping *const end_mappings = mappings + num_mappings; + uint16_t used_fds[MAX_OPERATIONS] = {0,}; + fd_operation *ops = operations; + +#define DUMP_MAPPINGS do { \ + if (DEBUG) { \ + for (unsigned i = 0; i < num_mappings; ++i) { \ + const mapping *m __attribute__((unused)) = &mappings[i]; \ + log("mapping %s %d %d", m->uuid, m->current_fd, m->wanted_fd); \ + } \ + char lbuf[MAX_OPERATIONS* 16]; \ + lbuf[0] = 0; \ + for (int i = 0; i < MAX_OPERATIONS; ++i) { \ + if (used_fds[i]) \ + sprintf(strchr(lbuf, 0), "%d=%d,", i, used_fds[i]); \ + } \ + log("used %s", lbuf); \ + } \ +} while(0); + + log("handle"); + + // parse all mappings + standard fds, mark ones using range 0-MAX_OPERATIONS + for (mapping *m = mappings; m < end_mappings; ++m) { + if (m->current_fd < 0 || m->current_fd >= MAX_OPERATIONS) + continue; + used_fds[m->current_fd]++; + } + DUMP_MAPPINGS; + + // Move standard file descriptors out of the way. + // Maximum 3 operations. + log("move standard fds away"); + for (mapping *m = mappings; m < end_mappings; ++m) { + const int current_fd = m->current_fd; + if (current_fd < 0 || current_fd > 2) + continue; + // find first available fd to use + int fd = 3; + while (used_fds[fd]) + ++fd; + *ops++ = (fd_operation){ current_fd, fd, FD_OP_DUP }; + uint16_t changed = remap_fds(mappings, num_mappings, current_fd, fd); + log("changed %d from %d to %d", changed, current_fd, fd); + used_fds[current_fd] = 0; + used_fds[fd] = changed; + } + DUMP_MAPPINGS; + + // Move standard fds into proper positions + // Maximum 3 operations (standard fds to be moved). + log("move standard fds correctly"); + for (mapping *m = mappings; m < end_mappings; ++m) { + const int current_fd = m->current_fd; + if (current_fd < 0 || m->wanted_fd < 0) + continue; + int fd = m->wanted_fd; + FD_OP op = FD_OP_DUP; + if (current_fd >= num_mappings) { + // move + op = FD_OP_MOVE; + uint16_t changed = remap_fds(mappings, num_mappings, current_fd, fd); + log("changed %d from %d to %d", changed, current_fd, fd); + used_fds[fd] = changed; + } else { + // duplicate + m->current_fd = fd; + if (--used_fds[current_fd] == 0) + op = FD_OP_MOVE; + used_fds[fd] = 1; + } + *ops++ = (fd_operation){ current_fd, fd, op }; + } + DUMP_MAPPINGS; + + // Remove cloexec on range [3, 3 + num mappings). + // Maximum no standard mappings operations. + log("remove cloexec flags"); + for (int fd = 3; fd < num_mappings; ++fd) { + if (!used_fds[fd]) + continue; + log("remove cloexec from %d", fd); + *ops++ = (fd_operation){ fd, fd, FD_OP_DUP }; + } + DUMP_MAPPINGS; + + // Move all fds left in range [3, 3 + num mappings). + // Maximum no standard mapping operations; then sum with + // the above is the no standard mapping operations. + log("move all fds left in range"); + int last_free = 3; + for (mapping *m = mappings; m < end_mappings; ++m) { + const int current_fd = m->current_fd; + if (m->wanted_fd >= 0) + continue; + if (current_fd < num_mappings && used_fds[current_fd] == 1) + continue; + while (used_fds[last_free]) + ++last_free; + int fd = last_free; + // TODO copied from above + FD_OP op = FD_OP_DUP; + if (current_fd >= num_mappings) { + // move + op = FD_OP_MOVE; + uint16_t changed = remap_fds(mappings, num_mappings, current_fd, fd); + log("changed %d from %d to %d", changed, current_fd, fd); + used_fds[fd] = changed; + } else { + // duplicate + m->current_fd = fd; + if (--used_fds[current_fd] == 0) + op = FD_OP_MOVE; + used_fds[fd] = 1; + } + *ops++ = (fd_operation){ current_fd, fd, op }; + } + DUMP_MAPPINGS; + + // Close extra fds. + *ops++ = (fd_operation){ num_mappings, 0, FD_OP_CLOSE_FROM }; + + // Create missing standard fds. + // Maximum standard mapping operations, but not the above, + // so the sum with move the standard is 3. + for (int fd = 0; fd < 3; ++fd) { + if (used_fds[fd]) + continue; + *ops++ = (fd_operation){ fd, fd, FD_OP_DEVNULL }; + } + + return ops - operations; +} + +static uint16_t +remap_fds(mapping *const mappings, unsigned num_mappings, int from, int to) +{ + uint16_t res = 0; + for (unsigned i = 0; i < num_mappings; ++i) { + mapping *m = &mappings[i]; + if (m->current_fd == from) { + m->current_fd = to; + res++; + } + } + return res; +} diff --git a/ocaml/forkexecd/helper/syslog.c b/ocaml/forkexecd/helper/syslog.c new file mode 100644 index 00000000000..bf584370920 --- /dev/null +++ b/ocaml/forkexecd/helper/syslog.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include "syslog.h" + +#include +#include +#include + +static inline bool ocaml_isprint(const char c) +{ + return c >= ' ' && c < 0x7f; +} + +static inline size_t quoted_length(const char c) +{ + return c == '\\' ? 2 : + ocaml_isprint(c) ? 1 : + 4; +} + +static const char hex[] = "0123456789ABCDEF"; + +static inline void write_quoted(char *const p, const char c) +{ + if (c == '\\') { + p[0] = p[1] = c; + } else if (ocaml_isprint(c)) { + p[0] = c; + } else { + p[0] = '\\'; + p[1] = 'x'; + p[2] = hex[(c>>4)&0xf]; + p[3] = hex[c&0xf]; + } +} + +static void syslog_line(const char *line, const char *key, int child_pid) +{ + syslog(LOG_DAEMON|LOG_INFO, "%s[%d]: %s", key, child_pid, line); +} + +// Quote and forward every line from "fd" to the syslog. +// "fd" will be closed. +bool forward_to_syslog(int fd, const char *key, int child_pid) +{ +#define syslog_line(line) syslog_line(line, key, child_pid) + FILE *f = fdopen(fd, "r"); + char quoted_buf[64000]; + char *dest = quoted_buf; + char *const dest_end = quoted_buf + sizeof(quoted_buf) - sizeof(" ...") - 1; + bool overflowed = false; + while (true) { + int ch = getc_unlocked(f); + + if (!overflowed && dest != quoted_buf && (ch == '\n' || ch == EOF)) { + *dest = 0; + syslog_line(quoted_buf); + } + + if (ch == EOF) { + bool res = !!feof(f); + fclose(f); + return res; + } + + if (ch == '\n') { + overflowed = false; + dest = quoted_buf; + continue; + } + + if (overflowed) + continue; + + const size_t quoted_len = quoted_length(ch); + if (dest + quoted_len >= dest_end) { + strcpy(dest, " ..."); + syslog_line(quoted_buf); + overflowed = true; + continue; + } + write_quoted(dest, ch); + dest += quoted_len; + } +} diff --git a/ocaml/forkexecd/helper/syslog.h b/ocaml/forkexecd/helper/syslog.h new file mode 100644 index 00000000000..5ce466bc418 --- /dev/null +++ b/ocaml/forkexecd/helper/syslog.h @@ -0,0 +1,21 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#pragma once + +#include + +// Quote and forward every line from "fd" to the syslog. +// "fd" will be closed. +bool forward_to_syslog(int fd, const char *key, int child_pid); diff --git a/ocaml/forkexecd/helper/vfork_helper.c b/ocaml/forkexecd/helper/vfork_helper.c new file mode 100644 index 00000000000..0afd285e094 --- /dev/null +++ b/ocaml/forkexecd/helper/vfork_helper.c @@ -0,0 +1,481 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "close_from.h" +#include "syslog.h" +#include "logs.h" +#include "vfork_helper.h" + +#define log(...) do {} while(0) +#include "redirect_algo.h" + +typedef struct { + char **args; + mapping *mappings; + fd_operation operations[MAX_OPERATIONS]; + int err; + const char *err_func; +} exec_info; + +static void adjust_args(char **args, mapping *const mappings, unsigned num_mappings); +static void reset_signal_handlers(void); +static void clear_cgroup(void); +static const char *get_arg(int *argc, char ***argv); +static int get_fd(int *argc, char ***argv); +static void error(int err, const char *msg, ...); +static void init_syslog(const char *key, bool redirect_stderr_to_stdout); + +static int error_fd = -1; + +int main(int argc, char **argv) +{ + unsigned num_mappings = 3; + bool redirect_stderr_to_stdout = false; + const char *key = NULL; + struct rlimit nofile_limit; + mapping mappings_buf[MAX_TOTAL_MAPPINGS]; + exec_info info[1] = { NULL, }; + const char *directory = "/"; + + mapped_logs logs = mapped_logs_open(); +#undef log +#define log(fmt, ...) mapped_logs_add(logs, fmt "\n", ## __VA_ARGS__) +#define log_fail(fmt, ...) do {\ + mapped_logs_failure(logs); \ + mapped_logs_add(logs, fmt "\n", ## __VA_ARGS__); \ +} while(0) + + log("starting"); + + info->mappings = mappings_buf; + for (int i = 0; i < 3; ++i) { + mapping *const m = &info->mappings[i]; + m->uuid = NULL; + m->current_fd = -1; + m->wanted_fd = i; + } + + // Scan all arguments, check them and collect some information. + ++argv; + --argc; + for (;;) { + // we must have an argument left + const char *arg = get_arg(&argc, &argv); + + // next must be a single letter option + if (arg[0] != '-' || arg[1] == 0 || arg[2] != 0) { + log_fail("invalid option %s", arg); + mapped_logs_close(logs); + error(EINVAL, "Invalid option %s", arg); + } + + // final "--" + if (arg[1] == '-') + break; + + switch (arg[1]) { + case 'I': // stdin + info->mappings[0].current_fd = get_fd(&argc, &argv); + break; + case 'O': // stdout + info->mappings[1].current_fd = get_fd(&argc, &argv); + break; + case 'E': // stderr + info->mappings[2].current_fd = get_fd(&argc, &argv); + break; + case 'm': { // mapping + if (num_mappings >= MAX_TOTAL_MAPPINGS) { + log_fail("too many mappings"); + mapped_logs_close(logs); + error(EINVAL, "Too many mappings"); + } + const char *uuid = get_arg(&argc, &argv); + if (strlen(uuid) != 36) { + log_fail("invalid mapping"); + mapped_logs_close(logs); + error(EINVAL, "Invalid mapping UUID"); + } + const int fd = get_fd(&argc, &argv); + mapping* const m = &info->mappings[num_mappings++]; + m->uuid = uuid; + m->current_fd = fd; + m->wanted_fd = -1; + } + break; + case 's': // syslog (with key) + key = get_arg(&argc, &argv); + break; + case 'S': // syslog stderr to stdout + redirect_stderr_to_stdout = true; + break; + case 'd': + directory = get_arg(&argc, &argv); + break; + case 'e': { // error file descriptor + error_fd = get_fd(&argc, &argv); + if (num_mappings >= MAX_TOTAL_MAPPINGS) { + log_fail("too many mappings"); + mapped_logs_close(logs); + error(EINVAL, "Too many mappings"); + } + mapping* const m = &info->mappings[num_mappings++]; + m->uuid = NULL; + m->current_fd = error_fd; + m->wanted_fd = -1; + } + break; + default: + log_fail("invalid option %s", arg); + mapped_logs_close(logs); + error(EINVAL, "Invalid option %s", arg); + } + } + + if (argc < 1) { + log_fail("no args"); + mapped_logs_close(logs); + error(EINVAL, "No command arguments"); + } + + info->args = argv; + + if (getrlimit(RLIMIT_NOFILE, &nofile_limit) < 0) { + int err = errno; + log_fail("getrlimit error"); + mapped_logs_close(logs); + error(err, "getrlimit"); + } + + sigset_t sigset; + + // Compute the file operations we need to do for the file mappings + int num_operations = + redirect_mappings(info->mappings, num_mappings, info->operations); + + if (FORKEXECD_DEBUG_LOGS) { + for (size_t n = 0; info->args[n]; ++n) + log("arg %zd %s", n, info->args[n]); + } + + // Rename all command line. + adjust_args(info->args, info->mappings, num_mappings); + + if (FORKEXECD_DEBUG_LOGS) { + for (size_t n = 0; info->args[n]; ++n) + log("arg %zd %s", n, info->args[n]); + } + + reset_signal_handlers(); + + // "." is a no-op, don't call chdir + if (strcmp(directory, ".") != 0 && chdir(directory) < 0) { + int err = errno; + log_fail("chdir %d", err); + mapped_logs_close(logs); + error(err, "chdir"); + } + + // Clear cgroup otherwise systemd will shutdown processes if + // toolstack is restarted. + clear_cgroup(); + + if (setsid() < 0) { + int err = errno; + log_fail("setsid %d", errno); + mapped_logs_close(logs); + error(err, "setsid"); + } + + // Redirect file descriptors. + int err = 0; + const char *err_func = NULL; + for (int i = 0; i < num_operations && err == 0; ++i) { + const fd_operation* const op = &info->operations[i]; + log("op %d %d %d", op->fd_from, op->fd_to, op->operation); + switch (op->operation) { + case FD_OP_DUP: + if (op->fd_from == op->fd_to) { + // These file descriptors came from another process, + // so surely they have the CLOEXEC flag set, nothing + // to do. + break; + } else { + err_func = "dup2"; + if (dup2(op->fd_from, op->fd_to) < 0) + err = errno; + // Track last file descriptor. + // File descriptors are usually duplicated in order to be + // replaced later. + if (op->fd_from == error_fd) + error_fd = op->fd_to; + } + break; + case FD_OP_MOVE: + err_func = "dup2"; + if (dup2(op->fd_from, op->fd_to) < 0) + err = errno; + if (op->fd_from == error_fd) + error_fd = op->fd_to; + close(op->fd_from); + break; + case FD_OP_DEVNULL: + // first close old, then create new one + close(op->fd_to); + // TODO ideally we want read only for input for Ocaml did the same... + err_func = "open"; + errno = 0; + if (open("/dev/null", O_WRONLY) != op->fd_to) + err = errno ? errno : EBADF; + break; + case FD_OP_CLOSE_FROM: + close_fds_from(op->fd_from); + break; + default: + err_func = "safe_exec"; + err = EINVAL; + } + } + if (err != 0) { + info->err = err; + info->err_func = err_func; + log_fail("redirect error %d in %s", err, err_func); + mapped_logs_close(logs); + error(err, "%s", err_func); + } + + if (key) + init_syslog(key, redirect_stderr_to_stdout); + + // Limit number of files limits to standard limit to avoid + // creating bugs with old programs. + if (nofile_limit.rlim_cur > 1024) { + nofile_limit.rlim_cur = 1024; + setrlimit(RLIMIT_NOFILE, &nofile_limit); + } + + // Reset signal mask, inherited by the process we are going to execute + sigemptyset(&sigset); + pthread_sigmask(SIG_SETMASK, &sigset, NULL); + + log("execv..."); + mapped_logs_success(logs); + if (error_fd >= 0) + close(error_fd); + execv(info->args[0], info->args); + log_fail("execve failed %d", errno); + // Here we could set err and err_func but we kept compatibility + // with forkexecd daemon. + exit(errno == ENOENT ? 127 : 126); +} + +static void +adjust_args(char **args, mapping *const mappings, unsigned num_mappings) +{ + for (; *args; ++args) { + char *arg = *args; + size_t len = strlen(arg); + if (len < 36) + continue; + + // replace uuid with file descriptor + char *uuid = arg + len - 36; + for (unsigned i = 0; i < num_mappings; ++i) { + const mapping *m = &mappings[i]; + if (m->uuid == NULL || strcmp(m->uuid, uuid) != 0) + continue; + sprintf(uuid, "%d", m->current_fd); + } + } +} + +static void +reset_signal_handlers(void) +{ + for (int sig = 1; sig < NSIG; ++sig) { + // these signals can't be overridden + if (sig == SIGKILL || sig == SIGSTOP) + continue; + + // Set signal dispositions. + // This avoids inherit unwanted overrides. + // Also prevent handling unwanted signal handler, especially using vfork(). + // Use ignore SIGPIPE for compatibility with forkexecd. + signal(sig, sig == SIGPIPE ? SIG_IGN : SIG_DFL); + } +} + +static void +clear_cgroup(void) +{ + // list of files to try, terminated by NULL + static const char *const cgroup_files[] = { + "/sys/fs/cgroup/systemd/cgroup.procs", + "/sys/fs/cgroup/cgroup.procs", + NULL + }; + + char string_pid[32]; + int last_error = 0; + const char *last_error_operation = NULL; + const char *last_fn = NULL; + + snprintf(string_pid, sizeof(string_pid), "%ld\n", (long int) getpid()); + + for (const char *const *fn = cgroup_files; *fn != NULL; ++fn) { + last_fn = *fn; + int fd = open(*fn, O_WRONLY|O_CLOEXEC); + if (fd < 0) { + last_error = errno; + last_error_operation = "opening"; + continue; + } + + // Here we are writing to a virtual file system, partial write is + // not possible. + ssize_t written = write(fd, string_pid, strlen(string_pid)); + if (written < 0) { + last_error = errno; + last_error_operation = "writing"; + } + // Error ignored, we are using a virtual file system, only potential + // errors would be if we have a race and the file was replaced or a + // memory error in the kernel. + close(fd); + if (written >= 0) + return; + } + + // If we reach this point something went wrong. + // Report error and exit, unless we are not root user, we should be + // root so probably we are testing. + if (last_error_operation && geteuid() == 0) + error(last_error, "Error %s file %s", last_error_operation, last_fn); +} + +static const char * +get_arg(int *argc, char ***argv) +{ + if (*argc < 0) + error(EINVAL, "Expected one more argument"); + + const char *arg = **argv; + --(*argc); + ++(*argv); + return arg; +} + +static int +get_fd(int *argc, char ***argv) +{ + const char *arg = get_arg(argc, argv); + unsigned long fd = strtoul(arg, NULL, 0); + if (fd < 0 || fd > INT_MAX) + error(EINVAL, "Expected valid file descriptor number"); + return (int) fd; +} + +static void +error(int err, const char *format, ...) +{ + if (error_fd >= 0) { + msg_t msg = { err }; + va_list ap; + va_start(ap, format); + int ignored __attribute__((unused)); + vsnprintf(msg.msg_buf, sizeof(msg.msg_buf), format, ap); + msg.msg_buf[sizeof(msg.msg_buf) - 1] = 0; + va_end(ap); + ignored = write(error_fd, &msg, offsetof(msg_t, msg_buf) + strlen(msg.msg_buf) + 1); + } + exit(125); +} + +static void +init_syslog(const char *key, bool redirect_stderr_to_stdout) +{ + int fds[2]; + if (pipe(fds) < 0) + error(errno, "pipe"); + dup2(fds[1], 1); + if (redirect_stderr_to_stdout) + dup2(fds[1], 2); + close(fds[1]); + + const int child_pid = (int) getpid(); + + pid_t pid = fork(); + if (pid < 0) + error(errno, "fork"); + + if (pid == 0) { + // child + close(0); + close(1); + if (open("/dev/null", O_RDONLY) != 0 + || open("/dev/null", O_WRONLY) != 1) + error(errno, "open"); + dup2(1, 2); + if (fds[0] != 3) { + dup2(fds[0], 3); + fds[0] = 3; + } + close_fds_from(4); + + pid = fork(); + if (pid < 0) + error(errno, "fork"); + if (pid > 0) + // parent + exit(0); + + openlog("forkexecd", 0, LOG_DAEMON); + forward_to_syslog(fds[0], key, child_pid); + exit(0); + } + + close(fds[0]); + + // parent + int status; + wait(&status); + if (!WIFEXITED(status)) + error(EPIPE, "syslogger killed by signal"); + + switch (WEXITSTATUS(status)) { + case 0: + // success + return; + case 125: + // forward error, a proper message will be forwarded + exit(125); + } + error(EPIPE, "syslogger"); +} diff --git a/ocaml/forkexecd/helper/vfork_helper.h b/ocaml/forkexecd/helper/vfork_helper.h new file mode 100644 index 00000000000..400c7d45041 --- /dev/null +++ b/ocaml/forkexecd/helper/vfork_helper.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#pragma once + +// Common structure to pass errors from helper to library +typedef struct { + // numeric C error + int err; + // message + char msg_buf[1000]; +} msg_t; diff --git a/ocaml/forkexecd/lib/dune b/ocaml/forkexecd/lib/dune index 59d313a8b24..213ee173e1a 100644 --- a/ocaml/forkexecd/lib/dune +++ b/ocaml/forkexecd/lib/dune @@ -17,4 +17,10 @@ xapi-stdext-unix xapi-tracing ) - (preprocess (per_module ((pps ppx_deriving_rpc) Fe)))) + (preprocess (per_module ((pps ppx_deriving_rpc) Fe))) + (foreign_stubs + (language c) + (names fe_stubs) + (include_dirs ../helper) + (flags :standard -Wall -Werror) + )) diff --git a/ocaml/forkexecd/lib/fe_stubs.c b/ocaml/forkexecd/lib/fe_stubs.c new file mode 100644 index 00000000000..89e14101f13 --- /dev/null +++ b/ocaml/forkexecd/lib/fe_stubs.c @@ -0,0 +1,416 @@ +/* + * Copyright (C) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "../helper/vfork_helper.h" + +#define FOREACH_LIST(name, list) \ + for(value name = (list); name != Val_emptylist; name = Field(name, 1)) + +// Create thread reducing stack usage to a minimum to reduce memory usage. +// Returns error number (like pthread_create). +static int create_thread_minstack(pthread_t *th, void *(*proc)(void *), void *arg); + +static inline void +reap_pid(pid_t pid) +{ + int status; + while (waitpid(pid, &status, 0) < 0 && errno == EINTR) + continue; +} + +static void * +thread_proc_reap(void *arg) +{ + pid_t pid = (pid_t) (intptr_t) arg; + + reap_pid(pid); + + return NULL; +} + +// Appends a string to *p_dest buffer. +// It updates *p_dest to point after copied string. +// Returns copied string. +static char * +append_string(char **p_dest, const char *s) +{ + char *const dest = *p_dest; + size_t const size = strlen(s) + 1; + memcpy(dest, s, size); + *p_dest = dest + size; + return dest; +} + +static char** +copy_string_list(value list) +{ + size_t strings_size = 0; + size_t list_size = 0; + char **res, **ptrs; + char *strings; + + FOREACH_LIST(item, list) { + strings_size += strlen(String_val(Field(item, 0))) + 1; + ++list_size; + } + + res = (char **) malloc(sizeof(char*) * (list_size + 1) + strings_size); + if (!res) + return NULL; + + ptrs = res; + strings = (char *) (res + (list_size + 1)); + FOREACH_LIST(item, list) + *ptrs++ = append_string(&strings, String_val(Field(item, 0))); + *ptrs = NULL; + + return res; +} + +static void +close_fd(int *const p_fd) +{ + const int fd = *p_fd; + if (fd >= 0) { + *p_fd = -1; + close(fd); + } +} + +typedef struct { + const char *err_msg; + pid_t pid; + msg_t msg; +} safe_exec_result; + +static int +safe_exec_with_helper(safe_exec_result *res, char **args, char **envs) +{ + int err = EINVAL; + char fd_string[48]; + int pipe_fds[2] = { -1, -1 }; + + res->err_msg = "safe_exec"; + + if (!args[0] || !args[1] || !args[2]) + return EINVAL; + + if (strcmp(args[1], "-e") == 0) { + if (pipe(pipe_fds) < 0) { + res->err_msg = "pipe"; + return errno; + } + sprintf(fd_string, "%d", pipe_fds[1]); + args[2] = fd_string; + } + + sigset_t sigset, old_sigset; + int cancellation_state; + + // Disable cancellation to avoid some signals. + // Glibc use some signals to handle thread cancellation. + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancellation_state); + + // Block all possible signals to avoid receiving some in the child. + // Signal mask is inherited to new process/thread will start with + // all signals disabled and we can safely change them. + sigfillset(&sigset); + pthread_sigmask(SIG_BLOCK, &sigset, &old_sigset); + + // fork + err = 0; + res->pid = vfork(); + if (res->pid < 0) { + err = errno; + } else if (res->pid == 0) { + // child + if (pipe_fds[0] >= 0) + close(pipe_fds[0]); + execve(args[0], args, envs); + // keep compatibility with forkexecd daemon. + _exit(errno == ENOENT ? 127 : 126); + } + + // Restore thread state + pthread_sigmask(SIG_SETMASK, &old_sigset, NULL); + pthread_setcancelstate(cancellation_state, NULL); + + // We don't need writing pipe anymore and we need to detect + // if closed so we can't keep it open + close_fd(&pipe_fds[1]); + + if (err != 0) { + close_fd(&pipe_fds[0]); + res->err_msg = "vfork"; + return err; + } + + // Handle errors from helper + if (pipe_fds[0] >= 0) { + int readed; + // Note that buffer is small and written atomically by + // the helper, no reason for the kernel to split it. + while ((readed = read(pipe_fds[0], &res->msg, sizeof(res->msg))) < 0 + && errno == EINTR) + continue; + close_fd(&pipe_fds[0]); + if (readed != 0 && readed < offsetof(msg_t, msg_buf) + 1) { + // This should never happen !!! + // At this point the process is created and we have a pid so + // we cannot just return an error. + // We could try to wait the process but it should fail, let + // returns success and let caller read process status result. + return 0; + } + res->msg.msg_buf[sizeof(res->msg.msg_buf) - 1] = 0; + if (readed > 0) { + // Wait the process otherwise we'll have a zombie + reap_pid(res->pid); + + res->err_msg = res->msg.msg_buf; + return res->msg.err; + } + } + return 0; +} + +CAMLprim value +caml_safe_exec_with_helper(value args, value environment) +{ + CAMLparam2(args, environment); + + // Copy parameters to C + char **c_args = copy_string_list(args); + char **c_envs = copy_string_list(environment); + if (!c_envs || !c_args) { + free(c_envs); + free(c_args); + caml_raise_out_of_memory(); + } + + // potentially slow section, release Ocaml engine + caml_enter_blocking_section(); + + safe_exec_result res; + int err = safe_exec_with_helper(&res, c_args, c_envs); + + free(c_envs); + free(c_args); + + caml_leave_blocking_section(); + + // error, notify with an exception + if (err != 0) + unix_error(err, res.err_msg, Nothing); + + CAMLreturn(Val_int(res.pid)); +} + +CAMLprim value +caml_pidwaiter_dontwait(value pid_val) +{ + CAMLparam1(pid_val); + pid_t pid = Int_val(pid_val); + + // reap the pid to avoid zombies + pthread_t th; + if (create_thread_minstack(&th, thread_proc_reap, (void *) (intptr_t) pid) == 0) + pthread_detach(th); + + CAMLreturn(Val_unit); +} + +typedef struct { + pid_t pid; + bool timed_out; + bool stop; + struct timespec deadline; + pthread_mutex_t mtx; + pthread_cond_t cond; +} timeout_kill; + +static void * +thread_proc_timeout_kill(void *arg) +{ + timeout_kill *tm = (timeout_kill *) arg; + int res; + + do { + pthread_mutex_lock(&tm->mtx); + res = tm->stop ? 0: + pthread_cond_timedwait(&tm->cond, &tm->mtx, &tm->deadline); + pthread_mutex_unlock(&tm->mtx); + + if (res == ETIMEDOUT) { + kill(tm->pid, SIGKILL); + tm->timed_out = true; + break; + } + // handle spurious wakeups + } while (!tm->stop && res == 0); + return NULL; +} + +static int +create_thread_minstack(pthread_t *th, void *(*proc)(void *), void *arg) +{ + int res; + + // disable any possible signal handler so we can safely use a small stack + // for the thread + sigset_t sigset, old_sigset; + sigfillset(&sigset); + pthread_sigmask(SIG_BLOCK, &sigset, &old_sigset); + + pthread_attr_t th_attr; + res = pthread_attr_init(&th_attr); + if (!res) { + pthread_attr_setstacksize(&th_attr, PTHREAD_STACK_MIN); + + res = pthread_create(th, &th_attr, proc, arg); + + pthread_attr_destroy(&th_attr); + } + pthread_sigmask(SIG_SETMASK, &old_sigset, NULL); + + return res; +} + +/* + * Wait a process with a given timeout. + * At the end of timeout (if trigger) kill the process. + * To avoid race we need to wait a specific process, but this is blocking + * and we use a timeout to implement the wait. Timer functions are per + * process, not per thread. + * Returns <0 if error, 0 if not timed out, >0 if timedout. + */ +static int +wait_process_timeout(pid_t pid, double timeout) +{ + int err; + + // compute deadline + timeout_kill tm = { pid, false, false }; + if (clock_gettime(CLOCK_MONOTONIC, &tm.deadline) < 0) + return -errno; + + double f = floor(timeout); + tm.deadline.tv_sec += f; + tm.deadline.tv_nsec += (timeout - f) * 1000000000.; + if (tm.deadline.tv_nsec >= 1000000000) { + tm.deadline.tv_nsec -= 1000000000; + tm.deadline.tv_sec += 1; + } + + pthread_condattr_t attr; + err = pthread_condattr_init(&attr); + if (err) + return -err; + err = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC); + if (!err) + err = pthread_cond_init(&tm.cond, &attr); + pthread_condattr_destroy(&attr); + if (err) + return -err; + + err = pthread_mutex_init(&tm.mtx, NULL); + if (err) { + pthread_cond_destroy(&tm.cond); + return -err; + } + + // Create timeout thread + pthread_t th; + err = create_thread_minstack(&th, thread_proc_timeout_kill, &tm); + if (err) { + pthread_cond_destroy(&tm.cond); + pthread_mutex_destroy(&tm.mtx); + return -err; + } + + // Wait the process, we avoid to reap the other process to avoid + // race conditions. Consider: + // - process exit; + // - we reap the thread; + // - OS reuse the pid; + // - timeout thread terminate the pid, now reused. + // Avoiding reaping the process will create a zombie process so + // the KILL would be directed to that. + siginfo_t info; + err = 0; + while (waitid(P_PID, pid, &info, WEXITED|WNOWAIT) == -1) { + if (errno != EINTR) { + err = -errno; + break; + } + } + + // Close the timeout thread + pthread_mutex_lock(&tm.mtx); + // We use also a variable to avoid races like + // - we create the thread; + // - we start waiting the process which was already exited; + // - we came here trying to close the thread; + // - thread waits for signal. + tm.stop = true; + pthread_cond_broadcast(&tm.cond); + pthread_mutex_unlock(&tm.mtx); + pthread_join(th, NULL); + + // Cleanup + pthread_cond_destroy(&tm.cond); + pthread_mutex_destroy(&tm.mtx); + + return err ? err : (tm.timed_out ? 1 : 0); +} + +CAMLprim value +caml_pidwaiter_waitpid(value timeout_value, value pid_value) +{ + CAMLparam0(); + double timeout = timeout_value == Val_none ? 0 : Double_val(Some_val(timeout_value)); + pid_t pid = Int_val(pid_value); + + caml_enter_blocking_section(); + + bool timed_out = false; + int err = 0; + if (timeout > 0) { + int res = wait_process_timeout(pid, timeout); + if (res < 0) + err = -res; + else if (res != 0) + timed_out = true; + } + + caml_leave_blocking_section(); + + if (err) + unix_error(err, "waitpid", Nothing); + + CAMLreturn(timed_out ? Val_true: Val_false); +} diff --git a/ocaml/forkexecd/lib/fe_systemctl.ml b/ocaml/forkexecd/lib/fe_systemctl.ml index ec1e238d387..00b4371445d 100644 --- a/ocaml/forkexecd/lib/fe_systemctl.ml +++ b/ocaml/forkexecd/lib/fe_systemctl.ml @@ -107,10 +107,10 @@ let show ~service = let stop ~service = action ~service "stop" ; (* Stopping shouldn't fail because it should fall back to SIGKILL which should almost always work, - * unless there is a kernel bug that keeps a process stuck. - * In the unlikely scenario that this does fail we leave the transient service file behind - * so that the failure can be investigated. - * *) + unless there is a kernel bug that keeps a process stuck. + In the unlikely scenario that this does fail we leave the transient service file behind + so that the failure can be investigated. + *) let status = show ~service in (* allow systemd to garbage-collect the status and the unit, preventing leaks. * See CollectMode in systemd.unit(5) for details. *) @@ -121,15 +121,19 @@ let stop ~service = Xapi_stdext_unix.Unixext.unlink_safe destination ; status -let is_active ~service = +let status ~command ~service = let status = Forkhelpers.safe_close_and_exec None None None [] systemctl - ["is-active"; "--quiet"; service] + [command; "--quiet"; service] |> Forkhelpers.waitpid |> snd in Unix.WEXITED 0 = status +let is_active ~service = status ~command:"is-active" ~service + +let is_enabled ~service = status ~command:"is-enabled" ~service + (** path to service file *) let path service = Filename.concat run_path (service ^ ".service") @@ -158,7 +162,7 @@ let start_transient ?env ?properties ?(exec_ty = Type.Simple) ~service cmd args (* If start failed we do not know what state the service is in: * try to stop it and clean up. * Stopping could fail as well, in which case report the original exception. - * *) + *) ( try let (_ : status) = stop ~service in () diff --git a/ocaml/forkexecd/lib/fe_systemctl.mli b/ocaml/forkexecd/lib/fe_systemctl.mli index 5ba44c4e290..908987f8432 100644 --- a/ocaml/forkexecd/lib/fe_systemctl.mli +++ b/ocaml/forkexecd/lib/fe_systemctl.mli @@ -45,6 +45,9 @@ val start_transient : val is_active : service:string -> bool (** [is_active ~service] checks whether the [service] is still running *) +val is_enabled : service:string -> bool +(** [is_enabled ~service] checks whether the [service] is enabled *) + val show : service:string -> status (** [shows ~service] retrieves the exitcodes and PIDs of the specified [service] *) diff --git a/ocaml/forkexecd/lib/forkhelpers.ml b/ocaml/forkexecd/lib/forkhelpers.ml index 2407b86b924..f731eb3e5b9 100644 --- a/ocaml/forkexecd/lib/forkhelpers.ml +++ b/ocaml/forkexecd/lib/forkhelpers.ml @@ -40,12 +40,33 @@ let with_tracing ~tracing ~name f = Tracing.with_tracing ~parent:tracing ~name f let finally = Xapi_stdext_pervasives.Pervasiveext.finally -type pidty = Unix.file_descr * int +(* Use forkexecd daemon instead of vfork implementation if file is present *) +let use_daemon = Sys.file_exists "/etc/xensource/forkexec-uses-daemon" + +module FEStubs = struct + external safe_exec_with_helper : string list -> string list -> int + = "caml_safe_exec_with_helper" + + (* timeout <= 0 wait infinite *) + external pidwaiter_waitpid : ?timeout:float -> int -> bool + = "caml_pidwaiter_waitpid" + + (* do not wait for a process, release it, it won't generate a zombie process *) + external pidwaiter_dontwait : int -> unit = "caml_pidwaiter_dontwait" +end + +type waiter = Pidwaiter | Sock of Unix.file_descr + +type pidty = waiter * int (* The forking executioner has been used, therefore we need to tell *it* to waitpid *) -let string_of_pidty (fd, pid) = - Printf.sprintf "(FEFork (%d,%d))" (Fd_send_recv.int_of_fd fd) pid +let string_of_pidty (waiter, pid) = + match waiter with + | Pidwaiter -> + Printf.sprintf "(FEFork (%d))" pid + | Sock fd -> + Printf.sprintf "(FEFork (%d,%d))" (Fd_send_recv.int_of_fd fd) pid exception Subprocess_failed of int @@ -53,7 +74,7 @@ exception Subprocess_killed of int exception Subprocess_timeout -let waitpid (sock, pid) = +let waitpid_daemon sock pid = let status = Fecomms.read_raw_rpc sock in Unix.close sock ; match status with @@ -79,7 +100,7 @@ let waitpid (sock, pid) = (* [waitpid_nohang] reports the status of a socket to a process. The intention is to make this non-blocking. If the process is finished, the socket is closed and not otherwise. *) -let waitpid_nohang (sock, pid) = +let waitpid_nohang_daemon sock pid = let verbose = false in if verbose then D.debug "%s pid=%d" __FUNCTION__ pid ; let fail fmt = Printf.ksprintf failwith fmt in @@ -118,7 +139,7 @@ let waitpid_nohang (sock, pid) = fail "%s: error happened when trying to read the status. %s" __FUNCTION__ (Printexc.to_string exn) -let dontwaitpid (sock, _pid) = +let dontwaitpid_daemon sock _pid = ( try (* Try to tell the child fe that we're not going to wait for it. If the other end of the pipe has been closed then this doesn't matter, as this @@ -128,6 +149,27 @@ let dontwaitpid (sock, _pid) = ) ; Unix.close sock +let waitpid (waiter, pid) = + match waiter with + | Pidwaiter -> + Unix.waitpid [] pid + | Sock sock -> + waitpid_daemon sock pid + +let waitpid_nohang (waiter, pid) = + match waiter with + | Pidwaiter -> + Unix.waitpid [Unix.WNOHANG] pid + | Sock sock -> + waitpid_nohang_daemon sock pid + +let dontwaitpid (waiter, pid) = + match waiter with + | Pidwaiter -> + FEStubs.pidwaiter_dontwait pid + | Sock sock -> + dontwaitpid_daemon sock pid + let waitpid_fail_if_bad_exit ty = let _, status = waitpid ty in match status with @@ -140,7 +182,7 @@ let waitpid_fail_if_bad_exit ty = | Unix.WSTOPPED n -> raise (Subprocess_killed n) -let getpid (_sock, pid) = pid +let getpid (_waiter, pid) = pid type 'a result = Success of string * 'a | Failure of string * exn @@ -179,12 +221,9 @@ type syslog_stdout = | Syslog_DefaultKey | Syslog_WithKey of string -(** Safe function which forks a command, closing all fds except a whitelist and - having performed some fd operations in the child *) -let safe_close_and_exec ?tracing ?env stdin stdout stderr +let safe_close_and_exec_daemon ?tracing env stdin stdout stderr (fds : (string * Unix.file_descr) list) ?(syslog_stdout = NoSyslogging) - ?(redirect_stderr_to_stdout = false) (cmd : string) (args : string list) = - with_tracing ~tracing ~name:__FUNCTION__ @@ fun tracing -> + ?(redirect_stderr_to_stdout = false) args = let sock = Fecomms.open_unix_domain_sock_client ?tracing (Filename.concat runtime_path "/xapi/forker/main") @@ -228,7 +267,6 @@ let safe_close_and_exec ?tracing ?env stdin stdout stderr List.fold_left maybe_add_id_to_fd_map dest_named_fds predefined_fds in - let env = Option.value ~default:default_path_env_pair env in let syslog_stdout = match syslog_stdout with | NoSyslogging -> @@ -241,7 +279,7 @@ let safe_close_and_exec ?tracing ?env stdin stdout stderr Fecomms.write_raw_rpc ?tracing sock (Fe.Setup { - Fe.cmdargs= cmd :: args + Fe.cmdargs= args ; env= Array.to_list env ; id_to_fd_map ; syslog_stdout @@ -295,7 +333,7 @@ let safe_close_and_exec ?tracing ?env stdin stdout stderr match Fecomms.read_raw_rpc ?tracing sock with | Ok (Fe.Execed pid) -> remove_fd_from_close_list sock ; - (sock, pid) + (Sock sock, pid) | Ok status -> let msg = Printf.sprintf @@ -314,6 +352,64 @@ let safe_close_and_exec ?tracing ?env stdin stdout stderr ) close_fds +let safe_close_and_exec_vfork ?tracing env stdin stdout stderr + (fds : (string * Unix.file_descr) list) ?(syslog_stdout = NoSyslogging) + ?(redirect_stderr_to_stdout = false) cmd args = + let string_of_fd (fd : Unix.file_descr) = string_of_int (Obj.magic fd) in + let args = "--" :: args in + let args = if redirect_stderr_to_stdout then "-S" :: args else args in + let args = + match syslog_stdout with + | NoSyslogging -> + args + | Syslog_DefaultKey -> + "-s" :: Filename.basename cmd :: args + | Syslog_WithKey key -> + "-s" :: key :: args + in + let args = + List.fold_right + (fun (uuid, fd) args -> + Unix.clear_close_on_exec fd ; + "-m" :: uuid :: string_of_fd fd :: args + ) + fds args + in + let add_std args arg fd = + match fd with + | Some fd -> + Unix.clear_close_on_exec fd ; + arg :: string_of_fd fd :: args + | None -> + args + in + let args = add_std args "-E" stderr in + let args = add_std args "-O" stdout in + let args = add_std args "-I" stdin in + let args = "/usr/libexec/xapi/vfork_helper" :: "-e" :: "DUMMY" :: args in + (* Convert environment and add tracing variables. *) + let env = + List.append (Tracing.EnvHelpers.of_span tracing) (Array.to_list env) + in + let pid = FEStubs.safe_exec_with_helper args env in + (Pidwaiter, pid) + +(** Safe function which forks a command, closing all fds except a whitelist and + having performed some fd operations in the child *) +let safe_close_and_exec ?tracing ?env stdin stdout stderr + (fds : (string * Unix.file_descr) list) ?(syslog_stdout = NoSyslogging) + ?(redirect_stderr_to_stdout = false) (cmd : string) (args : string list) = + with_tracing ~tracing ~name:__FUNCTION__ @@ fun tracing -> + let args = cmd :: args in + let env = Option.value ~default:default_path_env_pair env in + + if not use_daemon then (* Build a list of arguments as helper wants. *) + safe_close_and_exec_vfork ?tracing env stdin stdout stderr fds + ~syslog_stdout ~redirect_stderr_to_stdout cmd args + else + safe_close_and_exec_daemon ?tracing env stdin stdout stderr fds + ~syslog_stdout ~redirect_stderr_to_stdout args + let execute_command_get_output_inner ?tracing ?env ?stdin ?(syslog_stdout = NoSyslogging) ?(redirect_stderr_to_stdout = false) timeout cmd args = @@ -342,7 +438,7 @@ let execute_command_get_output_inner ?tracing ?env ?stdin with_tracing ~tracing ~name:"Forkhelpers.with_logfile_err_fd" @@ fun tracing -> with_logfile_fd "execute_command_get_err" (fun err_fd -> - let sock, pid = + let waiter, pid = safe_close_and_exec ?tracing ?env (Option.map (fun (_, fd, _) -> fd) stdinandpipes) (Some out_fd) (Some err_fd) [] ~syslog_stdout @@ -354,19 +450,38 @@ let execute_command_get_output_inner ?tracing ?env ?stdin close wr ) stdinandpipes ; - ( match timeout with - | Some span -> - let timeout = Clock.Timer.span_to_s span in - Unix.setsockopt_float sock Unix.SO_RCVTIMEO timeout - | None -> - () - ) ; - with_tracing ~tracing ~name:"Forkhelpers.waitpid" @@ fun _ -> - try waitpid (sock, pid) - with Unix.(Unix_error ((EAGAIN | EWOULDBLOCK), _, _)) -> - Unix.kill pid Sys.sigkill ; - ignore (waitpid (sock, pid)) ; - raise Subprocess_timeout + match waiter with + | Pidwaiter -> + with_tracing ~tracing ~name:"Forkhelpers.waitpid" + @@ fun _ -> + let timeout = + match timeout with + | Some span -> + Clock.Timer.span_to_s span + | None -> + 0. + in + let timedout = FEStubs.pidwaiter_waitpid ~timeout pid in + let res = Unix.waitpid [] pid in + + if timedout then raise Subprocess_timeout ; + res + | Sock sock -> ( + ( match timeout with + | Some span -> + let timeout = Clock.Timer.span_to_s span in + Unix.setsockopt_float sock Unix.SO_RCVTIMEO timeout + | None -> + () + ) ; + with_tracing ~tracing ~name:"Forkhelpers.waitpid" + @@ fun _ -> + try waitpid_daemon sock pid + with Unix.(Unix_error ((EAGAIN | EWOULDBLOCK), _, _)) -> + Unix.kill pid Sys.sigkill ; + ignore (waitpid_daemon sock pid) ; + raise Subprocess_timeout + ) ) ) with diff --git a/ocaml/forkexecd/src/child.ml b/ocaml/forkexecd/src/child.ml index 5f79f2fb6c9..76a1611ee86 100644 --- a/ocaml/forkexecd/src/child.ml +++ b/ocaml/forkexecd/src/child.ml @@ -274,7 +274,7 @@ let run state comms_sock fd_sock fd_sock_path = let (_ : int list) = Unix.sigprocmask Unix.SIG_BLOCK [Sys.sigchld] in (* First test whether the child has exited - if it has then report this - * via the socket and exit. *) + * via the socket and exit. *) match Unix.waitpid [Unix.WNOHANG] result with | pid, status when pid = result -> report_child_exit comms_sock args result status ; diff --git a/ocaml/forkexecd/test/dune b/ocaml/forkexecd/test/dune index 689c972ca5a..bba6499fd1e 100644 --- a/ocaml/forkexecd/test/dune +++ b/ocaml/forkexecd/test/dune @@ -13,6 +13,6 @@ (rule (alias runtest) (package xapi-forkexecd) - (deps fe_test.sh fe_test.exe ../src/fe_main.exe syslog.so) + (deps fe_test.sh fe_test.exe ../src/fe_main.exe syslog.so ../vfork_helper) (action (run ./fe_test.sh))) diff --git a/ocaml/forkexecd/test/fe_test.sh b/ocaml/forkexecd/test/fe_test.sh index fe454e89802..24ee9c21791 100755 --- a/ocaml/forkexecd/test/fe_test.sh +++ b/ocaml/forkexecd/test/fe_test.sh @@ -8,7 +8,9 @@ export FE_TEST=1 SOCKET=${XDG_RUNTIME_DIR}/xapi/forker/main rm -f "$SOCKET" -LD_PRELOAD="$PWD/syslog.so" ../src/fe_main.exe & +LD_PRELOAD="$PWD/syslog.so" \ +TEST_VFORK_HELPER="$PWD/../vfork_helper" \ +../src/fe_main.exe & MAIN=$! cleanup () { kill $MAIN @@ -17,4 +19,6 @@ trap cleanup EXIT INT for _ in $(seq 1 10); do test -S ${SOCKET} || sleep 1 done -echo "" | LD_PRELOAD="$PWD/syslog.so" ./fe_test.exe 16 +echo "" | LD_PRELOAD="$PWD/syslog.so" \ +TEST_VFORK_HELPER="$PWD/../vfork_helper" \ +./fe_test.exe 16 diff --git a/ocaml/forkexecd/test/syslog.c b/ocaml/forkexecd/test/syslog.c index 2316e84a25e..10e3dc3c79f 100644 --- a/ocaml/forkexecd/test/syslog.c +++ b/ocaml/forkexecd/test/syslog.c @@ -18,6 +18,23 @@ if (!old_func) \ old_func = (typeof(name) *) dlsym(RTLD_NEXT, #name); +#define strlcpy _strlcpy +static size_t +strlcpy(char *dest, const char *src, size_t len) +{ + size_t l = strlen(src); + + if (len) { + --len; + if (l <= len) + len = l; + + memcpy(dest, src, len); + dest[len] = 0; + } + return l; +} + int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen) { static const char dev_log[] = "/dev/log"; @@ -119,3 +136,50 @@ void __vsyslog_chk(int priority, int flags, const char *format, va_list ap) { vsyslog_internal(priority, format, ap); } + +static char vfork_helper[256] = "/usr/libexec/xapi/vfork_helper"; +static char ld_preload[512]; + +static const char ld_prefix[] = "LD_PRELOAD="; +enum { len_prefix = sizeof(ld_prefix) - 1 }; + +__attribute__((constructor)) +static void initialize(void) +{ + const char *env; + env = getenv("TEST_VFORK_HELPER"); + if (env) + strlcpy(vfork_helper, env, sizeof(vfork_helper)); + env = getenv("LD_PRELOAD"); + if (env) { + strcpy(ld_preload, ld_prefix); + strlcpy(ld_preload + len_prefix, env, sizeof(ld_preload) - len_prefix); + } +} + +int execve(const char *pathname, char *const argv[], char *const envp[]) +{ + START(execve); + + if (strcmp(pathname, "/usr/libexec/xapi/vfork_helper") == 0) + pathname = vfork_helper; + + if (envp && ld_preload[0]) { + bool ok = false; + size_t num_env = 0; + for (char * const *e = envp; *e; ++e) { + ++num_env; + if (strncmp(*e, ld_prefix, len_prefix) == 0) + ok = true; + } + if (!ok) { + // allocate on stack, we could be inside a vfork() created process + char **new_envs = alloca(sizeof(char*) * (num_env + 2)); + *new_envs = ld_preload; + memcpy(new_envs + 1, envp, sizeof(char*) * (num_env + 1)); + envp = new_envs; + } + } + + return old_func(pathname, argv, envp); +} diff --git a/ocaml/gencert/dune b/ocaml/gencert/dune index cbd5cd73ae2..608b274963f 100644 --- a/ocaml/gencert/dune +++ b/ocaml/gencert/dune @@ -6,7 +6,7 @@ (libraries angstrom astring - cstruct + digestif forkexec mirage-crypto mirage-crypto-pk @@ -52,7 +52,7 @@ (modules test_lib test_pem) (libraries alcotest - cstruct + digestif fmt gencertlib mirage-crypto @@ -64,6 +64,7 @@ rresult x509 xapi-consts + xapi-datamodel xapi-stdext-unix ) (deps diff --git a/ocaml/gencert/gencert.ml b/ocaml/gencert/gencert.ml index 0d3284379ff..f507879b2ca 100644 --- a/ocaml/gencert/gencert.ml +++ b/ocaml/gencert/gencert.ml @@ -47,22 +47,20 @@ let main ~dbg ~path ~cert_gid ~sni () = init_inventory () ; let generator path = match sni with - | SNI.Default -> - let name, ip = - match Networking_info.get_management_ip_addr ~dbg with - | None -> - D.error "gencert.ml: cannot get management ip address!" ; - exit 1 - | Some x -> - x - in - let dns_names = Networking_info.dns_names () in - let ips = [ip] in - let (_ : X509.Certificate.t) = - Gencertlib.Selfcert.host ~name ~dns_names ~ips ~valid_for_days path - cert_gid - in - () + | SNI.Default -> ( + match Networking_info.get_host_certificate_subjects ~dbg with + | Error cause -> + let msg = Networking_info.management_ip_error_to_string cause in + D.error + "gencert.ml: failed to generate certificate subjects because %s" msg ; + exit 1 + | Ok (name, dns_names, ips) -> + let _ : X509.Certificate.t = + Gencertlib.Selfcert.host ~name ~dns_names ~ips ~valid_for_days path + cert_gid + in + () + ) | SNI.Xapi_pool -> let uuid = Inventory.lookup Inventory._installation_uuid in let (_ : X509.Certificate.t) = diff --git a/ocaml/gencert/lib.ml b/ocaml/gencert/lib.ml index cd964276e65..b25f4db2633 100644 --- a/ocaml/gencert/lib.ml +++ b/ocaml/gencert/lib.ml @@ -34,8 +34,7 @@ let validate_private_key pkcs8_private_key = let key_type = X509.(Key_type.to_string (Private_key.key_type key)) in Error (`Msg (server_certificate_key_algorithm_not_supported, [key_type])) in - let raw_pem = Cstruct.of_string pkcs8_private_key in - X509.Private_key.decode_pem raw_pem + X509.Private_key.decode_pem pkcs8_private_key |> R.reword_error (fun (`Msg err_msg) -> let unknown_algorithm = "Unknown algorithm " in if Astring.String.is_prefix ~affix:"multi-prime RSA" err_msg then @@ -56,9 +55,8 @@ let validate_private_key pkcs8_private_key = ) >>= ensure_rsa_key_length -let pem_of_string x ~error_invalid = - let raw_pem = Cstruct.of_string x in - X509.Certificate.decode_pem raw_pem +let decode_cert pem ~error_invalid = + X509.Certificate.decode_pem pem |> R.reword_error (fun (`Msg err_msg) -> D.info {|Failed to validate certificate because "%s"|} err_msg ; `Msg (error_invalid, []) @@ -76,7 +74,7 @@ let assert_not_expired ~now certificate ~error_not_yet ~error_expired = let _validate_not_expired ~now (blob : string) ~error_invalid ~error_not_yet ~error_expired = - pem_of_string blob ~error_invalid >>= fun cert -> + decode_cert blob ~error_invalid >>= fun cert -> assert_not_expired ~now cert ~error_not_yet ~error_expired let validate_not_expired x ~error_not_yet ~error_expired ~error_invalid = @@ -93,16 +91,15 @@ let validate_pem_chain ~pem_leaf ~pem_chain now private_key = | _ -> Error (`Msg (server_certificate_key_mismatch, [])) in - let ensure_sha256_signature_algorithm certificate = + let ensure_signature_algorithm certificate = match X509.Certificate.signature_algorithm certificate with - | Some (_, `SHA256) -> + | Some (_, (`SHA256 | `SHA512)) -> Ok certificate | _ -> Error (`Msg (server_certificate_signature_not_supported, [])) in let validate_chain pem_chain = - let raw_pem = Cstruct.of_string pem_chain in - X509.Certificate.decode_pem_multiple raw_pem |> function + X509.Certificate.decode_pem_multiple pem_chain |> function | Ok (_ :: _ as certs) -> Ok certs | Ok [] -> @@ -116,7 +113,7 @@ let validate_pem_chain ~pem_leaf ~pem_chain now private_key = ~error_not_yet:server_certificate_not_valid_yet ~error_expired:server_certificate_expired >>= ensure_keys_match private_key - >>= ensure_sha256_signature_algorithm + >>= ensure_signature_algorithm >>= fun cert -> match Option.map validate_chain pem_chain with | None -> @@ -135,17 +132,13 @@ let install_server_certificate ~pem_chain ~pem_leaf ~pkcs8_private_key ~server_cert_path ~cert_gid = let now = Ptime_clock.now () in validate_private_key pkcs8_private_key >>= fun priv -> - let pkcs8_private_key = - X509.Private_key.encode_pem priv |> Cstruct.to_string - in + let pkcs8_private_key = X509.Private_key.encode_pem priv in validate_pem_chain ~pem_leaf ~pem_chain now priv >>= fun (cert, chain) -> - let pem_leaf = X509.Certificate.encode_pem cert |> Cstruct.to_string in + let pem_leaf = X509.Certificate.encode_pem cert in Option.fold ~none:(Ok [pkcs8_private_key; pem_leaf]) ~some:(fun chain -> - let pem_chain = - X509.Certificate.encode_pem_multiple chain |> Cstruct.to_string - in + let pem_chain = X509.Certificate.encode_pem_multiple chain in Ok [pkcs8_private_key; pem_leaf; pem_chain] ) chain diff --git a/ocaml/gencert/selfcert.ml b/ocaml/gencert/selfcert.ml index 3b022bcb19f..68ff2125dea 100644 --- a/ocaml/gencert/selfcert.ml +++ b/ocaml/gencert/selfcert.ml @@ -43,7 +43,7 @@ let valid_from' date = (* Needed to initialize the rng to create random serial codes when signing certificates *) -let () = Mirage_crypto_rng_unix.initialize (module Mirage_crypto_rng.Fortuna) +let () = Mirage_crypto_rng_unix.use_default () (** [write_cert] writes a PKCS12 file to [path]. The typical file extension would be ".pem". It attempts to do that atomically by @@ -109,6 +109,7 @@ let generate_pub_priv_key length = let stdout, _stderr = call_openssl args in Ok stdout with e -> + Backtrace.is_important e ; let msg = "generating RSA key failed" in D.error "selfcert.ml: %s" msg ; Debug.log_backtrace e (Backtrace.get e) ; @@ -116,7 +117,6 @@ let generate_pub_priv_key length = in let* privkey = rsa_string - |> Cstruct.of_string |> X509.Private_key.decode_pem |> R.reword_error (fun _ -> R.msg "decoding private key failed") in @@ -131,9 +131,7 @@ let selfsign' issuer extensions key_length expiration = let* cert = sign expiration privkey pubkey issuer req extensions in let key_pem = X509.Private_key.encode_pem privkey in let cert_pem = X509.Certificate.encode_pem cert in - let pkcs12 = - String.concat "\n\n" [Cstruct.to_string key_pem; Cstruct.to_string cert_pem] - in + let pkcs12 = String.concat "\n\n" [key_pem; cert_pem] in Ok (cert, pkcs12) let selfsign issuer extensions key_length expiration certfile cert_gid = diff --git a/ocaml/gencert/selfcert.mli b/ocaml/gencert/selfcert.mli index 2e073725e02..d8ce652f8a5 100644 --- a/ocaml/gencert/selfcert.mli +++ b/ocaml/gencert/selfcert.mli @@ -23,7 +23,7 @@ val write_certs : string -> int -> string -> (unit, [> Rresult.R.msg]) result val host : name:string -> dns_names:string list - -> ips:Cstruct.t list + -> ips:string list -> ?valid_from:Ptime.t (* default: now *) -> valid_for_days:int -> string diff --git a/ocaml/gencert/test_lib.ml b/ocaml/gencert/test_lib.ml index 379eb35f2e3..e2a71225d90 100644 --- a/ocaml/gencert/test_lib.ml +++ b/ocaml/gencert/test_lib.ml @@ -8,7 +8,7 @@ open Rresult.R.Infix let ( let* ) = Rresult.R.bind (* Initialize RNG for testing certificates *) -let () = Mirage_crypto_rng_unix.initialize (module Mirage_crypto_rng.Fortuna) +let () = Mirage_crypto_rng_unix.use_default () let time_of_rfc3339 date = match Ptime.of_rfc3339 date with @@ -50,6 +50,11 @@ let valid_leaf_certificates = , "2020-02-01T00:00:00Z" , `SHA256 ) + ; ( "Valid, SHA512, matches key" + , "pkey_rsa_2048" + , "2020-02-01T00:00:00Z" + , `SHA512 + ) ] (* ( description, leaf_private_key, expected_private_key, time_of_validation, @@ -80,6 +85,14 @@ let invalid_leaf_certificates = , server_certificate_key_mismatch , [] ) + ; ( "Valid, SHA512, keys do not match" + , "pkey_rsa_2048" + , "pkey_rsa_4096" + , "2020-02-01T00:00:00Z" + , `SHA512 + , server_certificate_key_mismatch + , [] + ) ; ( "Valid, SHA1, matching keys" , "pkey_rsa_2048" , "pkey_rsa_2048" @@ -166,11 +179,20 @@ let test_valid_leaf_cert pem_leaf time pkey () = match validate_pem_chain ~pem_leaf ~pem_chain:None time pkey with | Ok _ -> () - | Error (`Msg (_, msg)) -> + | Error (`Msg err) -> + let err_to_str (name, params) = + let Datamodel_types.{err_doc; err_params; _} = + Hashtbl.find Datamodel_errors.errors name + in + let args = List.combine err_params params in + Format.asprintf "%s %a" err_doc + Fmt.(Dump.list (pair ~sep:(Fmt.any ":@ ") string string)) + args + in Alcotest.fail (Format.asprintf "Valid certificate could not be validated: %a" - Fmt.(Dump.list string) - msg + (Fmt.of_to_string err_to_str) + err ) let test_invalid_cert pem_leaf time pkey error reason = @@ -182,7 +204,7 @@ let test_invalid_cert pem_leaf time pkey error reason = "Error must match" (error, reason) msg let load_pkcs8 name = - X509.Private_key.decode_pem (Cstruct.of_string (load_test_data name)) + X509.Private_key.decode_pem (load_test_data name) |> Rresult.R.reword_error (fun (`Msg msg) -> `Msg (Printf.sprintf "Could not load private key with name '%s': %s" name @@ -200,7 +222,6 @@ let sign_leaf_cert host_name digest pkey_leaf = load_pkcs8 "pkey_rsa_4096" >>= fun pkey_sign -> sign_cert host_name ~pkey_sign digest pkey_leaf >>| X509.Certificate.encode_pem - >>| Cstruct.to_string let valid_leaf_cert_tests = List.map @@ -278,8 +299,7 @@ let valid_chain_cert_tests = (pkey_root, Ok []) key_chain in sign_leaf_cert host_name `SHA256 pkey_leaf >>= fun pem_leaf -> - chain >>| X509.Certificate.encode_pem_multiple >>| Cstruct.to_string - >>| fun pem_chain -> + chain >>| X509.Certificate.encode_pem_multiple >>| fun pem_chain -> test_valid_cert_chain ~pem_leaf ~pem_chain time pkey_leaf in [("Validation of a supported certificate chain", `Quick, test_cert)] diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index b8d12bdecf4..4372877b995 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -3866,7 +3866,9 @@ module VIF = struct , "order in which VIF backends are created by xapi" ) ] - "device" "order in which VIF backends are created by xapi" + "device" + "order in which VIF backends are created by xapi. Guaranteed to \ + be an unsigned decimal integer." ; field ~qualifier:StaticRO ~ty:(Ref _network) ~lifecycle: [ @@ -7621,6 +7623,10 @@ module VM_guest_metrics = struct ~ty:Bool ~default_value:(Some (VBool false)) "PV_drivers_detected" "At least one of the guest's devices has successfully connected to \ the backend." + ; field ~qualifier:DynamicRO ~lifecycle:[] + ~ty:(Map (String, String)) + ~default_value:(Some (VMap [])) "services" + "The guest's services data." ] () end @@ -8972,7 +8978,7 @@ module Message = struct let get_all_records_where = call ~name:"get_all_records_where" ~lifecycle:[(Published, rel_orlando, "")] - ~params:[(String, "expr", "The expression to match (not currently used)")] + ~params:[(String, "expr", "The expression to match")] ~flags:[`Session] ~result:(Map (Ref _message, Record _message), "The messages") ~allowed_roles:_R_READ_ONLY () @@ -10539,12 +10545,12 @@ let all_system = (** These are the pairs of (object, field) which are bound together in the database schema. - + It is assumed that, for any entry (p, p'), neither p nor p' appears in any other entry. It may be the case that p = p', which is the only instance where some object-field pair may appear more than once. - + This is implicitly assumed by other code which treats this list - and its symmetric closure - as an association list without duplicate keys. *) diff --git a/ocaml/idl/datamodel_cluster.ml b/ocaml/idl/datamodel_cluster.ml index dba9b76c73b..c3556fee726 100644 --- a/ocaml/idl/datamodel_cluster.ml +++ b/ocaml/idl/datamodel_cluster.ml @@ -204,7 +204,7 @@ let t = (Some (VString Constants.default_smapiv3_cluster_stack)) "Simply the string 'corosync'. No other cluster stacks are \ currently supported" - ; field ~qualifier:StaticRO ~lifecycle ~ty:Int "cluster_stack_version" + ; field ~qualifier:StaticRO ~lifecycle:[] ~ty:Int "cluster_stack_version" ~default_value:(Some (VInt 2L)) "Version of cluster stack, not writable via the API. Defaulting to \ 2 for backwards compatibility when upgrading from a cluster \ @@ -219,6 +219,9 @@ let t = ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:Int "live_hosts" ~default_value:(Some (VInt 0L)) "Current number of live hosts, according to the cluster stack" + ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:Int "expected_hosts" + ~default_value:(Some (VInt 0L)) + "Total number of hosts expected by the cluster stack" ] @ allowed_and_current_operations cluster_operation @ [ diff --git a/ocaml/idl/datamodel_common.ml b/ocaml/idl/datamodel_common.ml index 50bc585b7ac..c5b3376624d 100644 --- a/ocaml/idl/datamodel_common.ml +++ b/ocaml/idl/datamodel_common.ml @@ -10,7 +10,7 @@ open Datamodel_roles to leave a gap for potential hotfixes needing to increment the schema version.*) let schema_major_vsn = 5 -let schema_minor_vsn = 786 +let schema_minor_vsn = 790 (* Historical schema versions just in case this is useful later *) let rio_schema_major_vsn = 5 @@ -578,8 +578,8 @@ let get_deprecated lifecycle = with Not_found -> None let call ~name ?(doc = "") ?(in_oss_since = Some "3.0.3") ?result - ?(flags = [`Session; `Async]) ?(effect = true) ?(tag = Custom) ?(errs = []) - ?(custom_marshaller = false) ?(db_only = false) + ?(flags = [`Session; `Async]) ?(has_effect = true) ?(tag = Custom) + ?(errs = []) ?(custom_marshaller = false) ?(db_only = false) ?(no_current_operations = false) ?(secret = false) ?(hide_from_docs = false) ?(pool_internal = false) ~allowed_roles ?(map_keys_roles = []) ?(params = []) ?versioned_params ?lifecycle ?(doc_tags = []) ?forward_to () @@ -633,7 +633,7 @@ let call ~name ?(doc = "") ?(in_oss_since = Some "3.0.3") ?result ; msg_db_only= db_only ; msg_release= call_release ; msg_lifecycle= Lifecycle.from lifecycle - ; msg_has_effect= effect + ; msg_has_effect= has_effect ; msg_tag= tag ; msg_obj_name= "" ; msg_force_custom= None @@ -659,8 +659,8 @@ let operation_enum x = (** Make an object field record *) let field ?(in_oss_since = Some "3.0.3") ?(internal_only = false) ?(ignore_foreign_key = false) ?(writer_roles = None) ?(reader_roles = None) - ?(qualifier = RW) ?(ty = String) ?(effect = false) ?(default_value = None) - ?(persist = true) ?(map_keys_roles = []) + ?(qualifier = RW) ?(ty = String) ?(has_effect = false) + ?(default_value = None) ?(persist = true) ?(map_keys_roles = []) ?(* list of (key_name,(writer_roles)) for a map field *) lifecycle ?(doc_tags = []) name desc = let lifecycle = @@ -695,7 +695,7 @@ let field ?(in_oss_since = Some "3.0.3") ?(internal_only = false) ; full_name= [name] ; field_description= desc ; field_persist= persist - ; field_has_effect= effect + ; field_has_effect= has_effect ; field_ignore_foreign_key= ignore_foreign_key ; field_setter_roles= writer_roles ; field_getter_roles= reader_roles diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index b1ac98268c4..62cf8d8452a 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -68,7 +68,7 @@ let _ = "The license-server connection details (address or port) were missing or \ incomplete." () ; - error Api_errors.license_checkout_error ["reason"] + error Api_errors.license_checkout_error ["code"; "message"] ~doc:"The license for the edition you requested is not available." () ; error Api_errors.license_file_deprecated [] ~doc: @@ -532,31 +532,14 @@ let _ = "You attempted an operation on a VM which requires a more recent version \ of the PV drivers. Please upgrade your PV drivers." () ; - error Api_errors.vm_lacks_feature_shutdown ["vm"] - ~doc: - "You attempted an operation which needs the cooperative shutdown feature \ - on a VM which lacks it." - () ; - error Api_errors.vm_lacks_feature_vcpu_hotplug ["vm"] - ~doc: - "You attempted an operation which needs the VM hotplug-vcpu feature on a \ - VM which lacks it." - () ; - error Api_errors.vm_lacks_feature_suspend ["vm"] - ~doc: - "You attempted an operation which needs the VM cooperative suspend \ - feature on a VM which lacks it." - () ; - error Api_errors.vm_lacks_feature_static_ip_setting ["vm"] - ~doc: - "You attempted an operation which needs the VM static-ip-setting feature \ - on a VM which lacks it." - () ; error Api_errors.vm_lacks_feature ["vm"] ~doc:"You attempted an operation on a VM which lacks the feature." () ; + error Api_errors.vm_non_suspendable ["vm"; "reason"] + ~doc:"You attempted an operation on a VM which is not suspendable." () ; error Api_errors.vm_is_template ["vm"] ~doc:"The operation attempted is not valid for a template VM" () ; - error Api_errors.other_operation_in_progress ["class"; "object"] + error Api_errors.other_operation_in_progress + ["class"; "object"; "operation_type"; "operation_ref"] ~doc:"Another operation involving the object is currently in progress" () ; error Api_errors.vbd_not_removable_media ["vbd"] ~doc:"Media could not be ejected because it is not removable" () ; @@ -665,6 +648,11 @@ let _ = "The specified server is disabled and cannot be re-enabled until after \ it has rebooted." () ; + error Api_errors.host_disabled_indefinitely ["host"] + ~doc: + "The specified server is disabled and can only be re-enabled manually \ + with Host.enable." + () ; error Api_errors.no_hosts_available [] ~doc:"There were no servers available to complete the specified operation." () ; @@ -897,6 +885,14 @@ let _ = the pool coordinator. Make sure the sm are of the same versions and try \ again." () ; + error Api_errors.pool_joining_pool_cannot_enable_clustering_on_vlan_network + ["vlan"] ~doc:"The remote pool cannot enable clustering on vlan network" () ; + error Api_errors.pool_joining_host_must_have_only_one_IP_on_clustering_network + [] + ~doc: + "The host joining the pool must have one and only one IP on the \ + clustering network" + () ; (* External directory service *) error Api_errors.subject_cannot_be_resolved [] @@ -1700,8 +1696,8 @@ let _ = ~doc:"The provided certificate has expired." () ; error Api_errors.server_certificate_signature_not_supported [] ~doc: - "The provided certificate is not using the SHA256 (SHA2) signature \ - algorithm." + "The provided certificate is not using one of the following SHA2 \ + signature algorithms: SHA256, SHA512." () ; error Api_errors.server_certificate_chain_invalid [] @@ -1913,6 +1909,11 @@ let _ = () ; error Api_errors.invalid_base_url ["url"] ~doc:"The base url in the repository is invalid." () ; + error Api_errors.blocked_repo_url ["url"] + ~doc: + "Cannot create the repository as the url is blocked, please check your \ + settings." + () ; error Api_errors.invalid_gpgkey_path ["gpgkey_path"] ~doc:"The GPG public key file name in the repository is invalid." () ; error Api_errors.repository_already_exists ["ref"] @@ -2028,6 +2029,27 @@ let _ = error Api_errors.too_many_groups [] ~doc:"VM can only belong to one group." () ; + error Api_errors.enable_ssh_failed ["host"] + ~doc:"Failed to enable SSH access." () ; + + error Api_errors.disable_ssh_failed ["host"] + ~doc:"Failed to disable SSH access." () ; + + error Api_errors.enable_ssh_partially_failed ["hosts"] + ~doc:"Some of hosts failed to enable SSH access." () ; + + error Api_errors.disable_ssh_partially_failed ["hosts"] + ~doc:"Some of hosts failed to disable SSH access." () ; + + error Api_errors.set_ssh_timeout_partially_failed ["hosts"] + ~doc:"Some hosts failed to set SSH timeout." () ; + + error Api_errors.set_console_timeout_partially_failed ["hosts"] + ~doc:"Some hosts failed to set console timeout." () ; + + error Api_errors.set_ssh_auto_mode_partially_failed ["hosts"] + ~doc:"Some hosts failed to set SSH auto mode." () ; + error Api_errors.host_driver_no_hardware ["driver variant"] ~doc:"No hardware present for this host driver variant" () ; @@ -2037,6 +2059,9 @@ let _ = enable it in XC or run xe pool-enable-tls-verification instead." () ; + error Api_errors.sysprep ["vm"; "message"] + ~doc:"VM.sysprep error with details in the message" () ; + message (fst Api_messages.ha_pool_overcommitted) ~doc: diff --git a/ocaml/idl/datamodel_host.ml b/ocaml/idl/datamodel_host.ml index aecb4a9de0a..ed2aef4bfa8 100644 --- a/ocaml/idl/datamodel_host.ml +++ b/ocaml/idl/datamodel_host.ml @@ -625,12 +625,37 @@ let disable = , "Puts the host into a state in which no new VMs can be started. \ Currently active VMs on the host continue to execute." ) + ; ( Changed + , "25.31.0" + , "Added auto_enable option to allow persisting the state across \ + toolstack restarts and host reboots." + ) ] ~name:"disable" ~doc: "Puts the host into a state in which no new VMs can be started. \ Currently active VMs on the host continue to execute." - ~params:[(Ref _host, "host", "The Host to disable")] + ~versioned_params: + [ + { + param_type= Ref _host + ; param_name= "host" + ; param_doc= "The Host to disable" + ; param_release= rio_release + ; param_default= None + } + ; { + param_type= Bool + ; param_name= "auto_enable" + ; param_doc= + "If true (default), the host will be re-enabled after a toolstack \ + restart automatically. If false, the host will be disabled \ + indefinitely, across toolstack restarts and host reboots, until \ + re-enabled explicitly with Host.enable." + ; param_release= numbered_release "25.31.0" + ; param_default= Some (VBool true) + } + ] ~allowed_roles:(_R_POOL_OP ++ _R_CLIENT_CERT) () @@ -1297,14 +1322,70 @@ let create_params = ; param_doc= "The SHA256 checksum of updateinfo of the most recently applied update \ on the host" - ; param_release= numbered_release "24.39.0-next" + ; param_release= numbered_release "24.40.0" ; param_default= Some (VString "") } + ; { + param_type= Bool + ; param_name= "ssh_enabled" + ; param_doc= "True if SSH access is enabled for the host" + ; param_release= numbered_release "25.21.0" + ; param_default= Some (VBool Constants.default_ssh_enabled) + } + ; { + param_type= Int + ; param_name= "ssh_enabled_timeout" + ; param_doc= + "The timeout in seconds after which SSH access will be automatically \ + disabled (0 means never), this setting will be applied every time the \ + SSH is enabled by XAPI" + ; param_release= numbered_release "25.21.0" + ; param_default= Some (VInt Constants.default_ssh_enabled_timeout) + } + ; { + param_type= DateTime + ; param_name= "ssh_expiry" + ; param_doc= + "The time in UTC after which the SSH access will be automatically \ + disabled" + ; param_release= numbered_release "25.21.0" + ; param_default= Some (VDateTime Date.epoch) + } + ; { + param_type= Int + ; param_name= "console_idle_timeout" + ; param_doc= + "The timeout in seconds after which idle console will be automatically \ + terminated (0 means never)" + ; param_release= numbered_release "25.21.0" + ; param_default= Some (VInt Constants.default_console_idle_timeout) + } + ; { + param_type= Bool + ; param_name= "ssh_auto_mode" + ; param_doc= "True if SSH auto mode is enabled for the host" + ; param_release= numbered_release "25.27.0" + ; param_default= Some (VBool Constants.default_ssh_auto_mode) + } ] let create = call ~name:"create" ~in_oss_since:None - ~lifecycle:[(Published, rel_rio, "Create a new host record")] + ~lifecycle: + [ + (Published, rel_rio, "Create a new host record") + ; ( Changed + , "24.40.0" + , "Added --last_update_hash option to allow last_update_hash to be \ + kept for host joined a pool" + ) + ; ( Changed + , "25.21.0" + , "Added --ssh_enabled --ssh_enabled_timeout --ssh_expiry \ + --console_idle_timeout --ssh_auto_mode options to allow them to be \ + configured for new host" + ) + ] ~versioned_params:create_params ~doc:"Create a new host record" ~result:(Ref _host, "Reference to the newly created host object.") ~hide_from_docs:true ~allowed_roles:_R_POOL_OP () @@ -2346,6 +2427,66 @@ let emergency_clear_mandatory_guidance = ~doc:"Clear the pending mandatory guidance on this host" ~allowed_roles:_R_LOCAL_ROOT_ONLY () +let enable_ssh = + call ~name:"enable_ssh" + ~doc: + "Enable SSH access on the host. It will start the service sshd only if \ + it is not running. It will also enable the service sshd only if it is \ + not enabled. A newly joined host in the pool or an ejected host from \ + the pool would keep the original status." + ~lifecycle:[] + ~params:[(Ref _host, "self", "The host")] + ~allowed_roles:_R_POOL_ADMIN () + +let disable_ssh = + call ~name:"disable_ssh" + ~doc: + "Disable SSH access on the host. It will stop the service sshd only if \ + it is running. It will also disable the service sshd only if it is \ + enabled. A newly joined host in the pool or an ejected host from the \ + pool would keep the original status." + ~lifecycle:[] + ~params:[(Ref _host, "self", "The host")] + ~allowed_roles:_R_POOL_ADMIN () + +let set_ssh_enabled_timeout = + call ~name:"set_ssh_enabled_timeout" ~lifecycle:[] + ~doc:"Set the SSH service enabled timeout for the host" + ~params: + [ + (Ref _host, "self", "The host") + ; ( Int + , "value" + , "The SSH enabled timeout in seconds (0 means no timeout, max 2 days)" + ) + ] + ~allowed_roles:_R_POOL_ADMIN () + +let set_console_idle_timeout = + call ~name:"set_console_idle_timeout" ~lifecycle:[] + ~doc:"Set the console idle timeout for the host" + ~params: + [ + (Ref _host, "self", "The host") + ; (Int, "value", "The console idle timeout in seconds") + ] + ~allowed_roles:_R_POOL_ADMIN () + +let set_ssh_auto_mode = + call ~name:"set_ssh_auto_mode" ~lifecycle:[] + ~doc:"Set the SSH auto mode for the host" + ~params: + [ + (Ref _host, "self", "The host") + ; ( Bool + , "value" + , "The SSH auto mode for the host,when set to true, SSH to normally be \ + disabled and SSH to be enabled only in case of emergency e.g., xapi \ + is down" + ) + ] + ~allowed_roles:_R_POOL_ADMIN () + let latest_synced_updates_applied_state = Enum ( "latest_synced_updates_applied_state" @@ -2503,6 +2644,11 @@ let t = ; set_https_only ; apply_recommended_guidances ; emergency_clear_mandatory_guidance + ; enable_ssh + ; disable_ssh + ; set_ssh_enabled_timeout + ; set_console_idle_timeout + ; set_ssh_auto_mode ] ~contents: ([ @@ -2940,6 +3086,28 @@ let t = ~default_value:(Some (VString "")) "last_update_hash" "The SHA256 checksum of updateinfo of the most recently applied \ update on the host" + ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:Bool + ~default_value:(Some (VBool Constants.default_ssh_enabled)) + "ssh_enabled" "True if SSH access is enabled for the host" + ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:Int + ~default_value:(Some (VInt Constants.default_ssh_enabled_timeout)) + "ssh_enabled_timeout" + "The timeout in seconds after which SSH access will be \ + automatically disabled (0 means never), this setting will be \ + applied every time the SSH is enabled by XAPI" + ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:DateTime + ~default_value:(Some (VDateTime Date.epoch)) "ssh_expiry" + "The time in UTC after which the SSH access will be automatically \ + disabled" + ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:Int + ~default_value:(Some (VInt Constants.default_console_idle_timeout)) + "console_idle_timeout" + "The timeout in seconds after which idle console will be \ + automatically terminated (0 means never)" + ; field ~qualifier:DynamicRO ~lifecycle:[] ~ty:Bool + ~default_value:(Some (VBool Constants.default_ssh_auto_mode)) + "ssh_auto_mode" + "Reflects whether SSH auto mode is enabled for the host" ] ) () diff --git a/ocaml/idl/datamodel_lifecycle.ml b/ocaml/idl/datamodel_lifecycle.ml index 38ed231cd7e..3a644fba8cd 100644 --- a/ocaml/idl/datamodel_lifecycle.ml +++ b/ocaml/idl/datamodel_lifecycle.ml @@ -77,12 +77,16 @@ let prototyped_of_field = function Some "24.3.0" | "Cluster_host", "live" -> Some "24.3.0" + | "Cluster", "expected_hosts" -> + Some "25.17.0" | "Cluster", "live_hosts" -> Some "24.3.0" | "Cluster", "quorum" -> Some "24.3.0" | "Cluster", "is_quorate" -> Some "24.3.0" + | "Cluster", "cluster_stack_version" -> + Some "24.15.0" | "VTPM", "contents" -> Some "22.26.0" | "VTPM", "is_protected" -> @@ -93,6 +97,16 @@ let prototyped_of_field = function Some "22.26.0" | "SM", "host_pending_features" -> Some "24.37.0" + | "host", "ssh_auto_mode" -> + Some "25.27.0" + | "host", "console_idle_timeout" -> + Some "25.21.0" + | "host", "ssh_expiry" -> + Some "25.21.0" + | "host", "ssh_enabled_timeout" -> + Some "25.21.0" + | "host", "ssh_enabled" -> + Some "25.21.0" | "host", "last_update_hash" -> Some "24.10.0" | "host", "pending_guidances_full" -> @@ -109,6 +123,8 @@ let prototyped_of_field = function Some "22.27.0" | "host", "last_software_update" -> Some "22.20.0" + | "VM_guest_metrics", "services" -> + Some "25.15.0" | "VM_guest_metrics", "netbios_name" -> Some "24.28.0" | "VM", "groups" -> @@ -121,6 +137,8 @@ let prototyped_of_field = function Some "23.18.0" | "VM", "actions__after_softreboot" -> Some "23.1.0" + | "pool", "ha_reboot_vm_on_internal_shutdown" -> + Some "25.16.0" | "pool", "license_server" -> Some "25.6.0" | "pool", "recommendations" -> @@ -205,6 +223,16 @@ let prototyped_of_message = function Some "22.26.0" | "VTPM", "create" -> Some "22.26.0" + | "host", "set_ssh_auto_mode" -> + Some "25.27.0" + | "host", "set_console_idle_timeout" -> + Some "25.21.0" + | "host", "set_ssh_enabled_timeout" -> + Some "25.21.0" + | "host", "disable_ssh" -> + Some "25.13.0" + | "host", "enable_ssh" -> + Some "25.13.0" | "host", "emergency_clear_mandatory_guidance" -> Some "24.10.0" | "host", "apply_recommended_guidances" -> @@ -215,14 +243,28 @@ let prototyped_of_message = function Some "25.2.0" | "host", "set_numa_affinity_policy" -> Some "24.0.0" + | "VM", "sysprep" -> + Some "25.24.0" | "VM", "get_secureboot_readiness" -> Some "24.17.0" | "VM", "set_uefi_mode" -> Some "24.17.0" | "VM", "restart_device_models" -> Some "23.30.0" + | "VM", "call_host_plugin" -> + Some "25.22.0" | "VM", "set_groups" -> Some "24.19.1" + | "pool", "set_ssh_auto_mode" -> + Some "25.27.0" + | "pool", "set_console_idle_timeout" -> + Some "25.21.0" + | "pool", "set_ssh_enabled_timeout" -> + Some "25.21.0" + | "pool", "disable_ssh" -> + Some "25.13.0" + | "pool", "enable_ssh" -> + Some "25.13.0" | "pool", "get_guest_secureboot_readiness" -> Some "24.17.0" | "pool", "set_ext_auth_cache_expiry" -> diff --git a/ocaml/idl/datamodel_pool.ml b/ocaml/idl/datamodel_pool.ml index b71e82d1763..1874512c14d 100644 --- a/ocaml/idl/datamodel_pool.ml +++ b/ocaml/idl/datamodel_pool.ml @@ -1249,7 +1249,15 @@ let remove_repository = let sync_updates = call ~name:"sync_updates" - ~lifecycle:[(Published, "1.329.0", "")] + ~lifecycle: + [ + (Published, "1.329.0", "") + ; ( Changed + , "25.7.0" + , "Added --username --password options to allow syncing updates from a \ + remote_pool type repository" + ) + ] ~doc:"Sync with the enabled repository" ~versioned_params: [ @@ -1286,14 +1294,14 @@ let sync_updates = param_type= String ; param_name= "username" ; param_doc= "The username of the remote pool" - ; param_release= numbered_release "25.6.0-next" + ; param_release= numbered_release "25.7.0" ; param_default= Some (VString "") } ; { param_type= String ; param_name= "password" ; param_doc= "The password of the remote pool" - ; param_release= numbered_release "25.6.0-next" + ; param_release= numbered_release "25.7.0" ; param_default= Some (VString "") } ] @@ -1553,6 +1561,66 @@ let get_guest_secureboot_readiness = ~result:(pool_guest_secureboot_readiness, "The readiness of the pool") ~allowed_roles:_R_POOL_OP () +let enable_ssh = + call ~name:"enable_ssh" + ~doc: + "Enable SSH access on all hosts in the pool. It's a helper which calls \ + host.enable_ssh for all the hosts in the pool." + ~lifecycle:[] + ~params:[(Ref _pool, "self", "The pool")] + ~allowed_roles:_R_POOL_ADMIN () + +let disable_ssh = + call ~name:"disable_ssh" + ~doc: + "Disable SSH access on all hosts in the pool. It's a helper which calls \ + host.disable_ssh for all the hosts in the pool." + ~lifecycle:[] + ~params:[(Ref _pool, "self", "The pool")] + ~allowed_roles:_R_POOL_ADMIN () + +let set_ssh_enabled_timeout = + call ~name:"set_ssh_enabled_timeout" ~lifecycle:[] + ~doc:"Set the SSH enabled timeout for all hosts in the pool" + ~params: + [ + (Ref _pool, "self", "The pool") + ; ( Int + , "value" + , "The SSH enabled timeout in seconds. (0 means no timeout, max 2 days)" + ) + ] + ~allowed_roles:_R_POOL_ADMIN () + +let set_console_idle_timeout = + call ~name:"set_console_idle_timeout" ~lifecycle:[] + ~doc:"Set the console idle timeout for all hosts in the pool" + ~params: + [ + (Ref _pool, "self", "The pool") + ; ( Int + , "value" + , "The idle SSH/VNC session timeout in seconds. A value of 0 means no \ + timeout." + ) + ] + ~allowed_roles:_R_POOL_ADMIN () + +let set_ssh_auto_mode = + call ~name:"set_ssh_auto_mode" ~lifecycle:[] + ~doc:"Set the SSH auto mode for all hosts in the pool" + ~params: + [ + (Ref _pool, "self", "The pool") + ; ( Bool + , "value" + , "The SSH auto mode for all hosts in the pool,when set to true, SSH \ + to normally be disabled and SSH to be enabled only in case of \ + emergency e.g., xapi is down" + ) + ] + ~allowed_roles:_R_POOL_ADMIN () + (** A pool class *) let t = create_obj ~in_db:true @@ -1647,6 +1715,11 @@ let t = ; set_ext_auth_cache_size ; set_ext_auth_cache_expiry ; get_guest_secureboot_readiness + ; enable_ssh + ; disable_ssh + ; set_ssh_enabled_timeout + ; set_console_idle_timeout + ; set_ssh_auto_mode ] ~contents: ([ @@ -2171,6 +2244,12 @@ let t = ~ty:(Map (String, String)) ~default_value:(Some (VMap [])) "license_server" "Licensing data shared within the whole pool" + ; field ~writer_roles:_R_POOL_OP ~qualifier:RW ~lifecycle:[] ~ty:Bool + ~default_value:(Some (VBool true)) + "ha_reboot_vm_on_internal_shutdown" + "Indicates whether an HA-protected VM that is shut down from \ + inside (not through the API) should be automatically rebooted \ + when HA is enabled" ] ) () diff --git a/ocaml/idl/datamodel_schema.ml b/ocaml/idl/datamodel_schema.ml index 10f20662496..6c295ef00f4 100644 --- a/ocaml/idl/datamodel_schema.ml +++ b/ocaml/idl/datamodel_schema.ml @@ -77,7 +77,7 @@ let of_datamodel () = { Column.name= Db_names.ref ; persistent= true - ; empty= Value.String "" + ; empty= Value.string "" ; default= None ; ty= Type.String ; issetref= false diff --git a/ocaml/idl/datamodel_values.ml b/ocaml/idl/datamodel_values.ml index e270899b50f..522ab4e530a 100644 --- a/ocaml/idl/datamodel_values.ml +++ b/ocaml/idl/datamodel_values.ml @@ -84,42 +84,42 @@ let to_db v = let open Schema.Value in match v with | VString s -> - String s + string s | VInt i -> - String (Int64.to_string i) + string (Int64.to_string i) | VFloat f -> - String (string_of_float f) + string (string_of_float f) | VBool true -> - String "true" + string "true" | VBool false -> - String "false" + string "false" | VDateTime d -> - String (Date.to_rfc3339 d) + string (Date.to_rfc3339 d) | VEnum e -> - String e + string e | VMap vvl -> Pairs (List.map (fun (k, v) -> (to_string k, to_string v)) vvl) | VSet vl -> Set (List.map to_string vl) | VRef r -> - String r + string r (* Generate suitable "empty" database value of specified type *) let gen_empty_db_val t = let open Schema in match t with | SecretString | String -> - Value.String "" + Value.string "" | Int -> - Value.String "0" + Value.string "0" | Float -> - Value.String (string_of_float 0.0) + Value.string (string_of_float 0.0) | Bool -> - Value.String "false" + Value.string "false" | DateTime -> - Value.String Date.(to_rfc3339 epoch) + Value.string Date.(to_rfc3339 epoch) | Enum (_, (enum_value, _) :: _) -> - Value.String enum_value + Value.string enum_value | Enum (_, []) -> assert false | Set _ -> @@ -127,8 +127,8 @@ let gen_empty_db_val t = | Map _ -> Value.Pairs [] | Ref _ -> - Value.String null_ref + Value.string null_ref | Record _ -> - Value.String "" + Value.string "" | Option _ -> Value.Set [] diff --git a/ocaml/idl/datamodel_vm.ml b/ocaml/idl/datamodel_vm.ml index 44ca1466d78..f0d0856f9e4 100644 --- a/ocaml/idl/datamodel_vm.ml +++ b/ocaml/idl/datamodel_vm.ml @@ -2098,6 +2098,19 @@ let call_plugin = ~result:(String, "Result from the plugin") ~allowed_roles:_R_VM_OP () +let call_host_plugin = + call ~name:"call_host_plugin" + ~doc:"Call an API plugin on the host where this vm resides" ~lifecycle:[] + ~params: + [ + (Ref _vm, "vm", "The vm") + ; (String, "plugin", "The name of the plugin") + ; (String, "fn", "The name of the function within the plugin") + ; (Map (String, String), "args", "Arguments for the function") + ] + ~result:(String, "Result from the plugin") + ~allowed_roles:_R_VM_OP () + let set_has_vendor_device = call ~name:"set_has_vendor_device" ~lifecycle: @@ -2198,6 +2211,7 @@ let operations = ; ("reverting", "Reverting the VM to a previous snapshotted state") ; ("destroy", "refers to the act of uninstalling the VM") ; ("create_vtpm", "Creating and adding a VTPM to this VM") + ; ("sysprep", "Performing a Windows sysprep on this VM") ] ) @@ -2356,6 +2370,19 @@ let restart_device_models = ~allowed_roles:(_R_VM_POWER_ADMIN ++ _R_CLIENT_CERT) () +let sysprep = + call ~name:"sysprep" ~lifecycle:[] + ~params: + [ + (Ref _vm, "self", "The VM") + ; (SecretString, "unattend", "XML content passed to sysprep") + ; (Float, "timeout", "timeout in seconds for expected reboot") + ] + ~doc: + "Pass unattend.xml to Windows sysprep and wait for the VM to shut down \ + as part of a reboot." + ~allowed_roles:_R_VM_ADMIN () + let vm_uefi_mode = Enum ( "vm_uefi_mode" @@ -2545,6 +2572,7 @@ let t = ; set_groups ; query_services ; call_plugin + ; call_host_plugin ; set_has_vendor_device ; import ; set_actions_after_crash @@ -2557,6 +2585,7 @@ let t = ; set_blocked_operations ; add_to_blocked_operations ; remove_from_blocked_operations + ; sysprep ] ~contents: ([ @@ -2591,7 +2620,7 @@ let t = ) ] "Creators of VMs and templates may store version information here." - ; field ~effect:true ~ty:Bool "is_a_template" + ; field ~has_effect:true ~ty:Bool "is_a_template" ~lifecycle: [ ( Published @@ -2786,7 +2815,7 @@ let t = ~ty:String "recommendations" "An XML specification of recommended values and ranges for \ properties of this VM" - ; field ~effect:true ~in_oss_since:None + ; field ~has_effect:true ~in_oss_since:None ~ty:(Map (String, String)) ~lifecycle: [ diff --git a/ocaml/idl/ocaml_backend/gen_api.ml b/ocaml/idl/ocaml_backend/gen_api.ml index 44d8bf9298e..863ae6b2b50 100644 --- a/ocaml/idl/ocaml_backend/gen_api.ml +++ b/ocaml/idl/ocaml_backend/gen_api.ml @@ -484,7 +484,9 @@ let gen_db_actions _config highapi = (toposort_types highapi only_records) ; (* NB record types are ignored by dm_to_string and string_to_dm *) O.Module.strings_of (dm_to_string all_types_in_db) + ; O.Module.strings_of (dm_to_field all_types_in_db) ; O.Module.strings_of (string_to_dm all_types_in_db) + ; O.Module.strings_of (field_to_dm all_types_in_db) ; O.Module.strings_of (db_action highapi_in_db) ] @ List.map O.Module.strings_of (Gen_db_check.all highapi_in_db) diff --git a/ocaml/idl/ocaml_backend/gen_db_actions.ml b/ocaml/idl/ocaml_backend/gen_db_actions.ml index e467624ab13..f4633fd1ba8 100644 --- a/ocaml/idl/ocaml_backend/gen_db_actions.ml +++ b/ocaml/idl/ocaml_backend/gen_db_actions.ml @@ -25,8 +25,12 @@ open DT (* Names of the modules we're going to generate (use these to prevent typos) *) let _dm_to_string = "DM_to_String" +let _dm_to_field = "DM_to_Field" + let _string_to_dm = "String_to_DM" +let _field_to_dm = "Field_to_DM" + let _db_action = "DB_Action" let _db_defaults = "DB_DEFAULTS" @@ -109,6 +113,44 @@ let dm_to_string tys : O.Module.t = ~elements:(List.map (fun ty -> O.Module.Let (ty_fun ty)) tys) () +let dm_to_field tys : O.Module.t = + let tys = List.filter type_marshalled_in_db tys in + (* For every type, we create a single function *) + let ty_fun ty = + let body = + match ty with + | DT.Map (String, String) -> + "Schema.Value.pairs" + | DT.Map (key, value) -> + Printf.sprintf + "fun s -> s |> List.map (fun (k, v) -> %s.%s k, %s.%s v) |> \ + Schema.Value.pairs" + _dm_to_string (OU.alias_of_ty key) _dm_to_string + (OU.alias_of_ty value) + | DT.Set String -> + "Schema.Value.set" + | DT.Set ty -> + Printf.sprintf "fun s -> s |> List.map %s.%s |> Schema.Value.set" + _dm_to_string (OU.alias_of_ty ty) + | DT.String -> + "Schema.Value.string" + | _ -> + Printf.sprintf "fun s -> s |> %s.%s |> Schema.Value.string" + _dm_to_string (OU.alias_of_ty ty) + in + O.Let.make ~name:(OU.alias_of_ty ty) ~params:[] ~ty:"Db_interface.field_in" + ~body:[body] () + in + O.Module.make ~name:_dm_to_field + ~preamble: + [ + "exception StringEnumTypeError of string" + ; "exception DateTimeError of string" + ] + ~letrec:true + ~elements:(List.map (fun ty -> O.Module.Let (ty_fun ty)) tys) + () + (** Generate a module of string to datamodel type unmarshalling functions *) let string_to_dm tys : O.Module.t = let tys = List.filter type_marshalled_in_db tys in @@ -171,6 +213,53 @@ let string_to_dm tys : O.Module.t = ~elements:(List.map (fun ty -> O.Module.Let (ty_fun ty)) tys) () +let field_to_dm tys : O.Module.t = + let tys = List.filter type_marshalled_in_db tys in + (* For every type, we create a single function *) + let ty_fun ty = + let name = OU.alias_of_ty ty in + let body = + match ty with + | DT.Map (key, value) -> + let conv = + match (key, value) with + | DT.String, DT.String -> + "" + | _ -> + Printf.sprintf " |> List.map (fun (k, v) -> %s.%s k, %s.%s v)" + _string_to_dm (OU.alias_of_ty key) _string_to_dm + (OU.alias_of_ty value) + in + "fun s -> s |> Schema.CachedValue.maybe_unmarshal Schema.Type.Pairs \ + |> Schema.CachedValue.value_of |> Schema.Value.Unsafe_cast.pairs" + ^ conv + | DT.Set ty -> + let conv = + match ty with + | DT.String -> + "" + | _ -> + Printf.sprintf " |> List.map %s.%s" _string_to_dm + (OU.alias_of_ty ty) + in + "fun s -> s |> Schema.CachedValue.maybe_unmarshal Schema.Type.Set |> \ + Schema.CachedValue.value_of |> Schema.Value.Unsafe_cast.set" + ^ conv + | DT.String -> + "fun s -> s |> Schema.CachedValue.maybe_unmarshal Schema.Type.String \ + |> Schema.CachedValue.value_of |> Schema.Value.Unsafe_cast.string" + | _ -> + Printf.sprintf "fun f -> f |> Schema.CachedValue.string_of |> %s.%s" + _string_to_dm name + in + O.Let.make ~name ~params:[] ~ty:(OU.alias_of_ty ty) ~body:[body] () + in + O.Module.make ~name:_field_to_dm + ~preamble:["exception StringEnumTypeError of string"] + ~letrec:true + ~elements:(List.map (fun ty -> O.Module.Let (ty_fun ty)) tys) + () + (** True if a field is actually in this table, false if stored elsewhere (ie Set(Ref _) are stored in foreign tables *) let field_in_this_table = function @@ -283,7 +372,7 @@ let open_db_module = [ "let __t = Context.database_of __context in" ; "let module DB = (val (Xapi_database.Db_cache.get __t) : \ - Xapi_database.Db_interface.DB_ACCESS) in" + Xapi_database.Db_interface.DB_ACCESS2) in" ] let db_action api : O.Module.t = @@ -331,7 +420,7 @@ let db_action api : O.Module.t = let ty_alias = OU.alias_of_ty f.DT.ty in let accessor = "find_regular" in let field_name = Escaping.escape_id f.full_name in - Printf.sprintf {|%s.%s (%s "%s")|} _string_to_dm ty_alias accessor + Printf.sprintf {|%s.%s (%s "%s")|} _field_to_dm ty_alias accessor field_name in let make_field f = @@ -433,8 +522,13 @@ let db_action api : O.Module.t = let to_string arg = let binding = O.string_of_param arg in let converter = O.type_of_param arg in - Printf.sprintf "let %s = %s.%s %s in" binding _dm_to_string converter - binding + Printf.sprintf "let %s = %s.%s %s in" binding + ( if binding = Client._self || binding = "ref" then + _dm_to_string + else + _dm_to_field + ) + converter binding in let body = match tag with @@ -445,37 +539,38 @@ let db_action api : O.Module.t = (Escaping.escape_id fld.DT.full_name) | FromField (Getter, {DT.ty; full_name; _}) -> Printf.sprintf "%s.%s (DB.read_field __t \"%s\" \"%s\" %s)" - _string_to_dm (OU.alias_of_ty ty) + _field_to_dm (OU.alias_of_ty ty) (Escaping.escape_obj obj.DT.name) (Escaping.escape_id full_name) Client._self | FromField (Add, {DT.ty= DT.Map (_, _); full_name; _}) -> Printf.sprintf - "DB.process_structured_field __t (%s,%s) \"%s\" \"%s\" %s \ - AddMapLegacy" + "DB.process_structured_field __t (Schema.Value.marshal %s, \ + Schema.Value.marshal %s) \"%s\" \"%s\" %s AddMapLegacy" Client._key Client._value (Escaping.escape_obj obj.DT.name) (Escaping.escape_id full_name) Client._self | FromField (Add, {DT.ty= DT.Set _; full_name; _}) -> Printf.sprintf - "DB.process_structured_field __t (%s,\"\") \"%s\" \"%s\" %s AddSet" + "DB.process_structured_field __t (Schema.Value.marshal %s,\"\") \ + \"%s\" \"%s\" %s AddSet" Client._value (Escaping.escape_obj obj.DT.name) (Escaping.escape_id full_name) Client._self | FromField (Remove, {DT.ty= DT.Map (_, _); full_name; _}) -> Printf.sprintf - "DB.process_structured_field __t (%s,\"\") \"%s\" \"%s\" %s \ - RemoveMap" + "DB.process_structured_field __t (Schema.Value.marshal %s,\"\") \ + \"%s\" \"%s\" %s RemoveMap" Client._key (Escaping.escape_obj obj.DT.name) (Escaping.escape_id full_name) Client._self | FromField (Remove, {DT.ty= DT.Set _; full_name; _}) -> Printf.sprintf - "DB.process_structured_field __t (%s,\"\") \"%s\" \"%s\" %s \ - RemoveSet" + "DB.process_structured_field __t (Schema.Value.marshal %s,\"\") \ + \"%s\" \"%s\" %s RemoveSet" Client._value (Escaping.escape_obj obj.DT.name) (Escaping.escape_id full_name) @@ -517,7 +612,9 @@ let db_action api : O.Module.t = match (x.msg_params, x.msg_result) with | [{param_name= name; _}], Some (result_ty, _) -> let query = - Printf.sprintf "DB.db_get_by_uuid __t \"%s\" %s" + Printf.sprintf + "DB.db_get_by_uuid __t \"%s\" (Schema.Value.Unsafe_cast.string \ + %s)" (Escaping.escape_obj obj.DT.name) (OU.escape name) in @@ -530,7 +627,7 @@ let db_action api : O.Module.t = ^ ")" in let query_opt = - Printf.sprintf "DB.db_get_by_uuid_opt __t \"%s\" %s" + Printf.sprintf "DB.db_get_by_uuid_opt __t \"%s\" (%s)" (Escaping.escape_obj obj.DT.name) (OU.escape name) in @@ -555,7 +652,9 @@ let db_action api : O.Module.t = match (x.msg_params, x.msg_result) with | [{param_name= name; _}], Some (Set result_ty, _) -> let query = - Printf.sprintf "DB.db_get_by_name_label __t \"%s\" %s" + Printf.sprintf + "DB.db_get_by_name_label __t \"%s\" \ + (Schema.Value.Unsafe_cast.string %s)" (Escaping.escape_obj obj.DT.name) (OU.escape name) in @@ -606,13 +705,15 @@ let db_action api : O.Module.t = | FromObject GetAllRecordsWhere -> String.concat "\n" [ - "let expr' = Xapi_database.Db_filter.expr_of_string expr in" + "let expr' = Xapi_database.Db_filter.expr_of_string \ + (Schema.Value.Unsafe_cast.string expr) in" ; "get_records_where ~" ^ Gen_common.context ^ " ~expr:expr'" ] | FromObject GetAllWhere -> String.concat "\n" [ - "let expr' = Xapi_database.Db_filter.expr_of_string expr in" + "let expr' = Xapi_database.Db_filter.expr_of_string \ + (Schema.Value.Unsafe_cast.string expr) in" ; "get_refs_where ~" ^ Gen_common.context ^ " ~expr:expr'" ] | _ -> diff --git a/ocaml/idl/ocaml_backend/gen_server.ml b/ocaml/idl/ocaml_backend/gen_server.ml index 19145021264..f95f5f6d962 100644 --- a/ocaml/idl/ocaml_backend/gen_server.ml +++ b/ocaml/idl/ocaml_backend/gen_server.ml @@ -457,7 +457,7 @@ let gen_module api : O.Module.t = ([ "let __call, __params = call.Rpc.name, call.Rpc.params in" ; "List.iter (fun p -> let s = Rpc.to_string p in if not \ - (Xapi_stdext_encodings.Encodings.UTF8_XML.is_valid s) then" + (Xapi_stdext_encodings.Utf8.is_valid s) then" ; "raise (Api_errors.Server_error(Api_errors.invalid_value, \ [\"Invalid UTF-8 string in parameter\"; s]))) __params;" ; "let __label = __call in" diff --git a/ocaml/idl/schematest.ml b/ocaml/idl/schematest.ml index 255e094e1dd..963231d7d69 100644 --- a/ocaml/idl/schematest.ml +++ b/ocaml/idl/schematest.ml @@ -3,7 +3,7 @@ let hash x = Digest.string x |> Digest.to_hex (* BEWARE: if this changes, check that schema has been bumped accordingly in ocaml/idl/datamodel_common.ml, usually schema_minor_vsn *) -let last_known_schema_hash = "ad67a64cd47cdea32085518c1fb38d27" +let last_known_schema_hash = "7586cb039918e573594fc358e90b0f04" let current_schema_hash : string = let open Datamodel_types in diff --git a/ocaml/libs/clock/date.ml b/ocaml/libs/clock/date.ml index c668b0c1fb3..2dab4a95443 100644 --- a/ocaml/libs/clock/date.ml +++ b/ocaml/libs/clock/date.ml @@ -64,12 +64,24 @@ let best_effort_iso8601_to_rfc3339 x = x let of_iso8601 x = - let rfc3339 = best_effort_iso8601_to_rfc3339 x in - match Ptime.of_rfc3339 rfc3339 |> Ptime.rfc3339_error_to_msg with - | Error _ -> - invalid_arg (Printf.sprintf "%s: %s" __FUNCTION__ x) - | Ok (t, tz, _) -> - {t; tz} + if String.length x > 5 && x.[4] <> '-' && x.[String.length x - 1] = 'Z' then + (* dates in the DB look like "20250319T04:16:24Z", so decoding that should be the fastpath *) + Scanf.sscanf x "%04i%02i%02iT%02i:%02i:%02iZ" (fun y mon d hh mm ss -> + let tz = 0 in + let date = (y, mon, d) and time = ((hh, mm, ss), tz) in + match Ptime.of_date_time (date, time) with + | Some t -> + {t; tz= Some tz} + | None -> + invalid_arg (Printf.sprintf "%s: %s" __FUNCTION__ x) + ) + else + let rfc3339 = best_effort_iso8601_to_rfc3339 x in + match Ptime.of_rfc3339 rfc3339 |> Ptime.rfc3339_error_to_msg with + | Error _ -> + invalid_arg (Printf.sprintf "%s: %s" __FUNCTION__ x) + | Ok (t, tz, _) -> + {t; tz} let print_tz tz_s = match tz_s with diff --git a/ocaml/libs/http-lib/dune b/ocaml/libs/http-lib/dune index 887aaf7dbc7..184fd5b2869 100644 --- a/ocaml/libs/http-lib/dune +++ b/ocaml/libs/http-lib/dune @@ -1,109 +1,103 @@ (library - (name http_lib) - (public_name http-lib) - (modes best) - (wrapped false) - (modules (:standard \ http_svr http_proxy server_io http_test radix_tree_test test_client test_server)) - (preprocess (per_module ((pps ppx_deriving_rpc) Http))) - (libraries - astring - base64 - fmt - ipaddr - mtime - mtime.clock.os - rpclib.core - rpclib.json - rpclib.xml - safe_resources - sha - stunnel - threads.posix - uuid - uri - xapi-backtrace - xapi-consts.xapi_version - xapi-idl.updates - xapi-log - clock - xapi-stdext-pervasives - xapi-stdext-threads - xapi-stdext-unix - xml-light2 - ) -) + (name http_lib) + (public_name http-lib) + (modes best) + (wrapped false) + (modules + (:standard + \ + http_svr + http_proxy + server_io + http_test + radix_tree_test + test_client + test_server)) + (preprocess + (per_module + ((pps ppx_deriving_rpc) + Http))) + (libraries + astring + base64 + fmt + ipaddr + mtime + mtime.clock.os + rpclib.core + rpclib.json + rpclib.xml + safe_resources + sha + stunnel + threads.posix + uuid + uri + xapi-backtrace + xapi-consts.xapi_version + xapi-idl.updates + xapi-log + clock + xapi-stdext-pervasives + xapi-stdext-threads + xapi-stdext-unix + xml-light2)) (library - (name httpsvr) - (wrapped false) - (modes best) - (modules http_svr http_proxy server_io) - (libraries - astring - http_lib - ipaddr - polly - tgroup - threads.posix - tracing - tracing_propagator - uri - xapi-log - xapi-stdext-pervasives - xapi-stdext-threads - xapi-stdext-unix - ) -) + (name httpsvr) + (wrapped false) + (modes best) + (modules http_svr http_proxy server_io) + (libraries + astring + http_lib + ipaddr + polly + tgroup + threads.posix + tracing + tracing_propagator + uri + xapi-backtrace + xapi-log + xapi-stdext-pervasives + xapi-stdext-threads + xapi-stdext-unix)) (tests - (names http_test radix_tree_test) - (package http-lib) - (modes (best exe)) - (modules http_test radix_tree_test) - (libraries - alcotest - - fmt - http_lib - ) -) + (names http_test radix_tree_test) + (package http-lib) + (modes + (best exe)) + (modules http_test radix_tree_test) + (libraries alcotest fmt http_lib)) (executable - (modes exe) - (name test_client) - (modules test_client) - (libraries - - http_lib - safe-resources - stunnel - threads.posix - xapi-backtrace - xapi-log - xapi-stdext-pervasives - xapi-stdext-unix - ) -) + (modes exe) + (name test_client) + (modules test_client) + (libraries + http_lib + safe-resources + stunnel + threads.posix + xapi-backtrace + xapi-log + xapi-stdext-pervasives + xapi-stdext-unix)) (executable - (modes exe) - (name test_server) - (modules test_server) - (libraries - - http_lib - httpsvr - safe-resources - threads.posix - xapi-stdext-threads - xapi-stdext-unix - ) -) + (modes exe) + (name test_server) + (modules test_server) + (libraries + http_lib + httpsvr + safe-resources + threads.posix + xapi-stdext-threads + xapi-stdext-unix)) (cram - (package xapi) - (deps - test_client.exe - test_server.exe - ) -) + (package xapi) + (deps test_client.exe test_server.exe)) diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index d84ba6ad627..4db3df81d2a 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -189,8 +189,9 @@ let response_request_header_fields_too_large s = response_error_html s "431" "Request Header Fields Too Large" [] body let response_internal_error ?req ?extra exc s = + Backtrace.is_important exc ; E.error "Responding with 500 Internal Error due to %s" (Printexc.to_string exc) ; - E.log_backtrace () ; + E.log_backtrace exc ; let version = Option.map get_return_version req in let extra = Option.fold ~none:"" @@ -473,6 +474,7 @@ let read_request ?proxy_seen ~read_timeout ~total_timeout ~max_length fd = in (Some r, proxy) with e -> + Backtrace.is_important e ; D.warn "%s (%s)" (Printexc.to_string e) __LOC__ ; best_effort (fun () -> match e with @@ -498,8 +500,7 @@ let read_request ?proxy_seen ~read_timeout ~total_timeout ~max_length fd = ) | exc -> response_internal_error exc fd - ~extra:(escape (Printexc.to_string exc)) ; - log_backtrace () + ~extra:(escape (Printexc.to_string exc)) ) ; (None, None) diff --git a/ocaml/libs/http-lib/server_io.ml b/ocaml/libs/http-lib/server_io.ml index c821a27c024..61cce796afc 100644 --- a/ocaml/libs/http-lib/server_io.ml +++ b/ocaml/libs/http-lib/server_io.ml @@ -53,12 +53,13 @@ let establish_server ?(signal_fds = []) forker handler sock = let s, caller = Unix.accept ~cloexec:true sock in try ignore (forker handler s caller) with exc -> + Backtrace.is_important exc ; (* NB provided 'forker' is configured to make a background thread then the only way we can get here is if Thread.create fails. This means we haven't executed any code which could close the fd therefore we should do it ourselves. *) debug "Got exception in server_io.ml: %s" (Printexc.to_string exc) ; - log_backtrace () ; + log_backtrace exc ; Unix.close s ; Thread.delay 30.0 ) diff --git a/ocaml/libs/log/debug.ml b/ocaml/libs/log/debug.ml index 2f73cd47aca..ac0478e281b 100644 --- a/ocaml/libs/log/debug.ml +++ b/ocaml/libs/log/debug.ml @@ -258,6 +258,10 @@ let with_thread_associated ?client ?(quiet = false) desc f x = (* This function is a top-level exception handler typically used on fresh threads. This is the last chance to do something with the backtrace *) if not quiet then ( + (* It would seem that a Backtrace.is_important would be missing here. + But in fact it has actually been called in [let result] above, + so calling it again is not necessary. + *) output_log "backtrace" Syslog.Err "error" (Printf.sprintf "%s failed with exception %s" desc (Printexc.to_string exn) @@ -307,7 +311,7 @@ module type DEBUG = sig val audit : ?raw:bool -> ('a, unit, string, string) format4 -> 'a - val log_backtrace : unit -> unit + val log_backtrace : exn -> unit val log_and_ignore_exn : (unit -> unit) -> unit end @@ -344,9 +348,10 @@ functor ) fmt - let log_backtrace () = - let backtrace = Printexc.get_backtrace () in - debug "%s" (String.escaped backtrace) + let log_backtrace exn = + let level = Syslog.Debug in + if not (is_disabled Brand.name level) then + log_backtrace_internal ~level exn () let log_and_ignore_exn f = try f () diff --git a/ocaml/libs/log/debug.mli b/ocaml/libs/log/debug.mli index 4ba72886ce6..e552d56cc4a 100644 --- a/ocaml/libs/log/debug.mli +++ b/ocaml/libs/log/debug.mli @@ -76,7 +76,15 @@ module type DEBUG = sig val audit : ?raw:bool -> ('a, unit, string, string) format4 -> 'a (** Audit function *) - val log_backtrace : unit -> unit + val log_backtrace : exn -> unit + (** [log_backtrace exn] logs the backtrace associated with [exn]. + Either this or {!Backtrace.is_important} must be the first statement in an exception handler, + otherwise the backtrace may be overwritten (e.g. by formatting functions that internally raise and catch exceptions). + + This has to be used instead of getting a new backtrace from Printexc if [Backtrace.is_important] was ever called, + because that function stashes away the backtrace and then overwrites the current backtrace (to avoid duplicate frames in the stacktrace, + when Backtrace.get is used). + *) val log_and_ignore_exn : (unit -> unit) -> unit end diff --git a/ocaml/libs/log/test/dune b/ocaml/libs/log/test/dune new file mode 100644 index 00000000000..75fbbad7557 --- /dev/null +++ b/ocaml/libs/log/test/dune @@ -0,0 +1,7 @@ +(executable + (name log_test) + (libraries log threads.posix xapi-backtrace)) + +(cram + (package xapi-log) + (deps log_test.exe)) diff --git a/ocaml/libs/log/test/log_test.ml b/ocaml/libs/log/test/log_test.ml new file mode 100644 index 00000000000..b493b18d426 --- /dev/null +++ b/ocaml/libs/log/test/log_test.ml @@ -0,0 +1,21 @@ +module D = Debug.Make (struct let name = Filename.basename __FILE__ end) + +let m = Mutex.create () + +let a = [||] + +let buggy () = a.(1) <- 0 + +let with_lock mutex f = + let finally () = Mutex.unlock mutex in + Mutex.lock mutex ; Fun.protect ~finally f + +let () = + Printexc.record_backtrace true ; + Debug.log_to_stdout () ; + () + |> Debug.with_thread_associated "main" @@ fun () -> + try with_lock m buggy + with e -> + D.log_backtrace e ; + D.warn "Got exception: %s" (Printexc.to_string e) diff --git a/ocaml/tests/test_host_driver_helpers.mli b/ocaml/libs/log/test/log_test.mli similarity index 100% rename from ocaml/tests/test_host_driver_helpers.mli rename to ocaml/libs/log/test/log_test.mli diff --git a/ocaml/libs/log/test/log_test.t b/ocaml/libs/log/test/log_test.t new file mode 100644 index 00000000000..ae296392b86 --- /dev/null +++ b/ocaml/libs/log/test/log_test.t @@ -0,0 +1,23 @@ +The log_test executable produces a backtrace on purpose, on x86_64, and with +the datetimes removed, it looks like this: +$ ./log_test.exe | sed -re 's/[0-9]+T[0-9:.]+Z//' +[|error||0 |main|backtrace] Raised Invalid_argument("index out of bounds") +[|error||0 |main|backtrace] 1/4 log_test.exe Raised at file ocaml/libs/log/test/log_test.ml, line 7 +[|error||0 |main|backtrace] 2/4 log_test.exe Called from file fun.ml, line 33 +[|error||0 |main|backtrace] 3/4 log_test.exe Called from file fun.ml, line 38 +[|error||0 |main|backtrace] 4/4 log_test.exe Called from file ocaml/libs/log/test/log_test.ml, line 18 +[|error||0 |main|backtrace] +[| warn||0 |main|log_test.ml] Got exception: Invalid_argument("index out of bounds") + +and on aarch64: +[|error||0 |main|backtrace] Raised Invalid_argument("index out of bounds") +[|error||0 |main|backtrace] 1/3 log_test.exe Raised at file ocaml/libs/log/test/log_test.ml, line 7 +[|error||0 |main|backtrace] 2/3 log_test.exe Called from file fun.ml, line 38 +[|error||0 |main|backtrace] 3/3 log_test.exe Called from file ocaml/libs/log/test/log_test.ml, line 18 +[|error||0 |main|backtrace] +[| warn||0 |main|log_test.ml] Got exception: Invalid_argument("index out of bounds") + + $ ./log_test.exe | grep "main|backtrace" -c | xargs -I _ sh -c "test 5 -eq _ || test 6 -eq _" + $ ./log_test.exe | grep "log_test.exe" -c | xargs -I _ sh -c "test 3 -eq _ || test 4 -eq _" + $ ./log_test.exe | grep "ocaml/libs/log/test/log_test.ml" -c + 2 diff --git a/ocaml/libs/pciutil/pciutil.ml b/ocaml/libs/pciutil/pciutil.ml index ca63fb5aa13..757ea0ce0e4 100644 --- a/ocaml/libs/pciutil/pciutil.ml +++ b/ocaml/libs/pciutil/pciutil.ml @@ -25,13 +25,14 @@ let parse_from file vendor device = let vendor_str = ref (unknown_vendor vendor) and device_str = ref (unknown_device device) in (* CA-26771: As we parse the file we keep track of the current vendor. - When we find a device match we only accept it if it's from the right vendor; it doesn't make - sense to pair vendor 2's device with vendor 1. *) + When we find a device match we only accept it if it's from the right + vendor; it doesn't make sense to pair vendor 2's device with vendor 1. *) let current_xvendor = ref "" in Unixext.readfile_line (fun line -> + (* ignore subvendors/subdevices, blank lines and comments *) if line = "" || line.[0] = '#' || (line.[0] = '\t' && line.[1] = '\t') - then (* ignore subvendors/subdevices, blank lines and comments *) + then () else if line.[0] = '\t' then ( if diff --git a/ocaml/libs/resources/test/dune b/ocaml/libs/resources/test/dune index 2bc052f2e63..15a20f0bfa3 100644 --- a/ocaml/libs/resources/test/dune +++ b/ocaml/libs/resources/test/dune @@ -4,6 +4,7 @@ (action (run %{test} -e)) (libraries safe-resources + logs logs.fmt alcotest ) diff --git a/ocaml/libs/sexpr/dune b/ocaml/libs/sexpr/dune index 77653c2abcc..6490da85beb 100644 --- a/ocaml/libs/sexpr/dune +++ b/ocaml/libs/sexpr/dune @@ -1,22 +1,10 @@ -(menhir (modules sExprParser)) +(menhir + (modules sExprParser)) (ocamllex sExprLexer) (library - (name sexpr) - (public_name sexpr) - (wrapped false) - (modules (:standard \ sexprpp)) - (libraries - astring - ) -) - -(executable - (modes exe) - (name sexprpp) - (modules sexprpp) - (libraries - sexpr - ) -) + (name sexpr) + (public_name sexpr) + (wrapped false) + (libraries astring)) diff --git a/ocaml/libs/sexpr/sExpr.ml b/ocaml/libs/sexpr/sExpr.ml index 488142898c2..3637ac6abf5 100644 --- a/ocaml/libs/sexpr/sExpr.ml +++ b/ocaml/libs/sexpr/sExpr.ml @@ -23,7 +23,7 @@ let unescape_buf buf s = if Astring.String.fold_left aux false s then Buffer.add_char buf '\\' -let is_escape_char = function '\\' | '"' | '\'' -> true | _ -> false +let is_escape_char = function '\\' | '\'' -> true | _ -> false (* XXX: This escapes "'c'" and "\'c\'" to "\\'c\\'". * They are both unescaped as "'c'". They have been ported @@ -32,26 +32,22 @@ let is_escape_char = function '\\' | '"' | '\'' -> true | _ -> false * - Astring.String.Ascii.escape_string * - Astring.String.Ascii.unescape * that have guaranteed invariants and optimised performances *) -let escape s = +let escape_buf escaped s = let open Astring in - if String.exists is_escape_char s then ( - let escaped = Buffer.create (String.length s + 10) in + if String.exists is_escape_char s then String.iter (fun c -> match c with | '\\' -> Buffer.add_string escaped "\\\\" - | '"' -> - Buffer.add_string escaped "\\\"" | '\'' -> Buffer.add_string escaped "\\\'" | _ -> Buffer.add_char escaped c ) - s ; - Buffer.contents escaped - ) else - s + s + else + Buffer.add_string escaped s let unescape s = if String.contains s '\\' then ( @@ -82,22 +78,7 @@ let string_of sexpr = Buffer.add_char buf ')' | Symbol s | String s -> Buffer.add_string buf "\'" ; - Buffer.add_string buf (escape s) ; + escape_buf buf s ; Buffer.add_string buf "\'" in __string_of_rec sexpr ; Buffer.contents buf - -let rec output_fmt ff = function - | Node list -> - let rec aux ?(first = true) = function - | [] -> - () - | h :: t when first -> - output_fmt ff h ; aux ~first:false t - | h :: t -> - Format.fprintf ff "@;<1 2>%a" output_fmt h ; - aux ~first t - in - Format.fprintf ff "@[(" ; aux list ; Format.fprintf ff ")@]" - | Symbol s | String s -> - Format.fprintf ff "\"%s\"" (escape s) diff --git a/ocaml/libs/sexpr/sExpr.mli b/ocaml/libs/sexpr/sExpr.mli index e7ab5c68a1a..7bf1c61812b 100644 --- a/ocaml/libs/sexpr/sExpr.mli +++ b/ocaml/libs/sexpr/sExpr.mli @@ -16,5 +16,3 @@ type t = Node of t list | Symbol of string | String of string val mkstring : string -> t val string_of : t -> string - -val output_fmt : Format.formatter -> t -> unit diff --git a/ocaml/libs/sexpr/sexprpp.ml b/ocaml/libs/sexpr/sexprpp.ml deleted file mode 100644 index 109ee577169..00000000000 --- a/ocaml/libs/sexpr/sexprpp.ml +++ /dev/null @@ -1,30 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -let lexer = Lexing.from_channel stdin - -let _ = - match Sys.argv with - | [|_; "-nofmt"|] -> - let start_time = Sys.time () in - let sexpr = SExprParser.expr SExprLexer.token lexer in - let parse_time = Sys.time () in - let s = SExpr.string_of sexpr in - let print_time = Sys.time () in - Printf.fprintf stderr "Parse time: %f\nPrint time: %f\n%!" - (parse_time -. start_time) (print_time -. parse_time) ; - print_endline s - | _ -> - let sexpr = SExprParser.expr SExprLexer.token lexer in - let ff = Format.formatter_of_out_channel stdout in - SExpr.output_fmt ff sexpr ; Format.fprintf ff "@." diff --git a/ocaml/libs/stunnel/stunnel.ml b/ocaml/libs/stunnel/stunnel.ml index 93b990d8449..0445b4bee4c 100644 --- a/ocaml/libs/stunnel/stunnel.ml +++ b/ocaml/libs/stunnel/stunnel.ml @@ -109,11 +109,6 @@ type pid = | FEFork of Forkhelpers.pidty (** the forkhelpers module did it for us. *) | Nopid -(* let string_of_pid = function - | StdFork x -> Printf.sprintf "(StdFork %d)" x - | FEFork x -> Forkhelpers.string_of_pidty x - | Nopid -> "None" *) - let getpid ty = match ty with | StdFork pid -> @@ -483,20 +478,30 @@ let check_verify_error line = let split_1 c s = match Astring.String.cut ~sep:c s with Some (x, _) -> x | None -> s in - if Astring.String.is_infix ~affix:"VERIFY ERROR: " line then - match Astring.String.find_sub ~sub:"error=" line with + (* When verified with a mismatched certificate, one line of log from stunnel + * would look like: + SSL_connect: ssl/statem/statem_clnt.c:1889: error:0A000086:SSL routines::certificate verify failed + * in this case, Stunnel_verify_error can be raised with detailed error as + * reason if it can found in the log *) + if Astring.String.is_infix ~affix:"certificate verify failed" line then + match Astring.String.find_sub ~sub:"error:" line with | Some e -> raise (Stunnel_verify_error - (split_1 "," (sub_after (e + String.length "error=") line)) + (split_1 "," (sub_after (e + String.length "error:") line)) ) | None -> raise (Stunnel_verify_error "") + else if + Astring.String.is_infix ~affix:"No certificate or private key specified" + line + then + raise (Stunnel_verify_error "The specified certificate is corrupt") else () let check_error s line = - if Astring.String.is_infix ~affix:line s then + if Astring.String.is_infix ~affix:s line then raise (Stunnel_error s) let diagnose_failure st_proc = diff --git a/ocaml/libs/tgroup/tgroup.ml b/ocaml/libs/tgroup/tgroup.ml index 171b78ee2b2..071a9dfe0d2 100644 --- a/ocaml/libs/tgroup/tgroup.ml +++ b/ocaml/libs/tgroup/tgroup.ml @@ -253,6 +253,11 @@ module Group = struct External.name // External.Unauthenticated.name let to_string g = match g with Group group -> to_cgroup group + + let authenticated_root = + of_creator (Creator.make ~identity:Identity.root_identity ()) + + let unauthenticated = Group External_Unauthenticated end module Cgroup = struct diff --git a/ocaml/libs/tgroup/tgroup.mli b/ocaml/libs/tgroup/tgroup.mli index b9316967ae3..d89ef542ffd 100644 --- a/ocaml/libs/tgroup/tgroup.mli +++ b/ocaml/libs/tgroup/tgroup.mli @@ -37,7 +37,7 @@ module Group : sig val of_string : string -> t (** [of_string s] creates an originator from a string [s]. - + e.g create an originator based on a http header. *) val to_string : t -> string @@ -76,6 +76,14 @@ module Group : sig val to_string : t -> string (** [to_string g] returns the string representation of the group [g].*) + + val authenticated_root : t + (** [authenticated_root] represents the main classification of internal xapi + threads. *) + + val unauthenticated : t + (** [unauthenticated] represents the classification of xapi threads for + unauthenticated users. *) end (** [Cgroup] module encapsulates different function for managing the cgroups @@ -87,7 +95,7 @@ module Cgroup : sig val dir_of : Group.t -> t option (** [dir_of group] returns the full path of the cgroup directory corresponding to the group [group] as [Some dir]. - + Returns [None] if [init dir] has not been called. *) val init : string -> unit diff --git a/ocaml/libs/timeslice/timeslice.ml b/ocaml/libs/timeslice/timeslice.ml index c414b321d64..55b888871df 100644 --- a/ocaml/libs/timeslice/timeslice.ml +++ b/ocaml/libs/timeslice/timeslice.ml @@ -65,7 +65,8 @@ let periodic = let set ?(sampling_rate = 1e-4) interval = Atomic.set yield_interval (Mtime.Span.of_float_ns @@ (interval *. 1e9) |> Option.get) ; - Gc.Memprof.start ~sampling_rate ~callstack_size:0 periodic + let _ = Gc.Memprof.start ~sampling_rate ~callstack_size:0 periodic in + () let clear () = Gc.Memprof.stop () ; diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index 4cb89d45b8a..78ba3bc3ab6 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -222,6 +222,8 @@ module TraceContext = struct let empty = {traceparent= None; baggage= None} + let depth_key = "span.depth" + let with_traceparent traceparent ctx = {ctx with traceparent} let with_baggage baggage ctx = {ctx with baggage} @@ -230,6 +232,20 @@ module TraceContext = struct let baggage_of ctx = ctx.baggage + let baggage_depth_of ctx = + Option.bind (baggage_of ctx) (List.assoc_opt depth_key) + |> Option.value ~default:"1" + |> int_of_string + + let update_with_baggage k v ctx = + let new_baggage = + baggage_of ctx + |> Option.value ~default:[] + |> List.remove_assoc k + |> List.cons (k, v) + in + with_baggage (Some new_baggage) ctx + let parse input = let open Astring.String in let trim_pair (key, value) = (trim key, trim value) in @@ -322,22 +338,36 @@ module Span = struct let start ?(attributes = Attributes.empty) ?(trace_context : TraceContext.t option) ~name ~parent ~span_kind () = - let trace_id, extra_context = + let trace_id, extra_context, depth = match parent with | None -> - (Trace_id.make (), TraceContext.empty) + (Trace_id.make (), TraceContext.empty, 1) | Some span_parent -> - (span_parent.context.trace_id, span_parent.context.trace_context) + ( span_parent.context.trace_id + , span_parent.context.trace_context + , TraceContext.baggage_depth_of span_parent.context.trace_context + 1 + ) in let span_id = Span_id.make () in + let extra_context_with_depth = + TraceContext.( + update_with_baggage depth_key (string_of_int depth) extra_context + ) + in let context : SpanContext.t = - {trace_id; span_id; trace_context= extra_context} + {trace_id; span_id; trace_context= extra_context_with_depth} in let context = - (* If trace_context is provided to the call, override any inherited trace context. *) - trace_context - |> Option.fold ~none:context - ~some:(Fun.flip SpanContext.with_trace_context context) + (* If trace_context is provided to the call, override any inherited trace + context except span.depth which should still be maintained. *) + match trace_context with + | Some tc -> + let tc_with_depth = + TraceContext.(update_with_baggage depth_key (string_of_int depth) tc) + in + SpanContext.with_trace_context tc_with_depth context + | None -> + context in (* Using gettimeofday over Mtime as it is better for sharing timestamps between the systems *) let begin_time = Unix.gettimeofday () in @@ -437,10 +467,7 @@ module Span = struct let to_propagation_context span = let traceparent = span |> get_context |> SpanContext.to_traceparent in - span - |> get_context - |> SpanContext.context_of_span_context - |> TraceContext.with_traceparent (Some traceparent) + span |> get_trace_context |> TraceContext.with_traceparent (Some traceparent) let with_trace_context span trace_context = let span_context = @@ -476,6 +503,11 @@ module Spans = struct let set_max_traces x = Atomic.set max_traces x + (* Default is much larger than the largest current traces, so effectively off *) + let max_depth = Atomic.make 100 + + let set_max_depth x = Atomic.set max_depth x + let finished_spans = Atomic.make ([], 0) let span_hashtbl_is_empty () = TraceMap.is_empty (Atomic.get spans) @@ -716,12 +748,18 @@ module Tracer = struct let get_tracer ~name:_ = TracerProvider.get_current () let span_of_span_context context name : Span.t = + let tc = SpanContext.context_of_span_context context in + let new_depth = TraceContext.baggage_depth_of tc in + let new_tc = + TraceContext.(update_with_baggage depth_key (string_of_int new_depth) tc) + in + let context = SpanContext.with_trace_context new_tc context in { context ; status= {status_code= Status.Unset; _description= None} ; name ; parent= None - ; span_kind= SpanKind.Client (* This will be the span of the client call*) + ; span_kind= SpanKind.Client (* This will be the span of the client call *) ; begin_time= Unix.gettimeofday () ; end_time= None ; links= [] @@ -733,10 +771,32 @@ module Tracer = struct ?(span_kind = SpanKind.Internal) ~name ~parent () : (Span.t option, exn) result = let open TracerProvider in - (* Do not start span if the TracerProvider is disabled*) + let parent_depth = + Option.fold ~none:1 + ~some:(fun parent -> + parent.Span.context + |> SpanContext.context_of_span_context + |> TraceContext.baggage_depth_of + ) + parent + in + (* Do not start span if the TracerProvider is disabled *) if not t.enabled then + ok_none (* Do not start span if the max depth has been reached *) + else if parent_depth >= Atomic.get Spans.max_depth then ( + let parent_trace_id = + Option.fold ~none:"None" + ~some:(fun p -> + p.Span.context + |> SpanContext.span_id_of_span_context + |> Span_id.to_string + ) + parent + in + debug "Max_span_depth limit reached, not creating span %s (parent %s)" + name parent_trace_id ; ok_none - else + ) else let attributes = Attributes.merge_into t.attributes attributes in let span = Span.start ~attributes ?trace_context ~name ~parent ~span_kind () @@ -753,8 +813,17 @@ module Tracer = struct |> Spans.remove_from_spans |> Option.map (fun existing_span -> let old_context = Span.get_context existing_span in + let parent_trace_context = Span.get_trace_context parent in + let new_depth = + TraceContext.baggage_depth_of parent_trace_context + 1 + in let new_context : SpanContext.t = - let trace_context = span.Span.context.trace_context in + let trace_context = + TraceContext.( + update_with_baggage depth_key (string_of_int new_depth) + span.Span.context.trace_context + ) + in SpanContext.context (SpanContext.trace_id_of_span_context parent.context) old_context.span_id @@ -762,7 +831,6 @@ module Tracer = struct in let updated_span = {existing_span with parent= Some parent} in let updated_span = {updated_span with context= new_context} in - let () = Spans.add_to_spans ~span:updated_span in updated_span ) @@ -795,10 +863,14 @@ end let enable_span_garbage_collector ?(timeout = 86400.) () = Spans.GC.initialise_thread ~timeout -let with_tracing ?(attributes = []) ?(parent = None) ?trace_context ~name f = +let with_tracing ?(attributes = []) ?(parent = None) ?span_kind ?trace_context + ~name f = let tracer = Tracer.get_tracer ~name in if tracer.enabled then ( - match Tracer.start ~tracer ?trace_context ~attributes ~name ~parent () with + match + Tracer.start ?span_kind ~tracer ?trace_context ~attributes ~name ~parent + () + with | Ok span -> ( try let result = f span in @@ -925,7 +997,15 @@ module Propagator = struct let trace_context' = TraceContext.with_traceparent (Some traceparent) trace_context in - let carrier' = P.inject_into trace_context' carrier in + let new_depth = + TraceContext.baggage_depth_of trace_context' + 1 |> string_of_int + in + let trace_context'' = + TraceContext.( + update_with_baggage depth_key new_depth trace_context' + ) + in + let carrier' = P.inject_into trace_context'' carrier in f carrier' | _ -> f carrier diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index 262acb52f27..ec33f4ac5ff 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -165,6 +165,8 @@ module Spans : sig val set_max_traces : int -> unit + val set_max_depth : int -> unit + val span_count : unit -> int val since : unit -> Span.t list * int @@ -190,12 +192,12 @@ module Tracer : sig -> (Span.t option, exn) result val update_span_with_parent : Span.t -> Span.t option -> Span.t option - (**[update_span_with_parent s p] returns [Some span] where [span] is an + (**[update_span_with_parent s p] returns [Some span] where [span] is an updated verison of the span [s]. - If [p] is [Some parent], [span] is a child of [parent], otherwise it is the + If [p] is [Some parent], [span] is a child of [parent], otherwise it is the original [s]. - - If the span [s] is finished or is no longer considered an on-going span, + + If the span [s] is finished or is no longer considered an on-going span, returns [None]. *) @@ -209,7 +211,7 @@ module Tracer : sig val finished_span_hashtbl_is_empty : unit -> bool end -(** [TracerProvider] module provides ways to intereact with the tracer providers. +(** [TracerProvider] module provides ways to intereact with the tracer providers. *) module TracerProvider : sig (** Type that represents a tracer provider.*) @@ -222,7 +224,7 @@ module TracerProvider : sig -> name_label:string -> uuid:string -> unit - (** [create ~enabled ~attributes ~endpoints ~name_label ~uuid] initializes a + (** [create ~enabled ~attributes ~endpoints ~name_label ~uuid] initializes a tracer provider based on the following parameters: [enabled], [attributes], [endpoints], [name_label], and [uuid]. *) @@ -234,17 +236,17 @@ module TracerProvider : sig -> unit -> unit (** [set ?enabled ?attributes ?endpoints ~uuid ()] updates the tracer provider - identified by the given [uuid] with the new configuration paremeters: - [enabled], [attributes], and [endpoints]. - + identified by the given [uuid] with the new configuration paremeters: + [enabled], [attributes], and [endpoints]. + If any of the configuration parameters are missing, the old ones are kept. - + Raises [Failure] if there are no tracer provider with the given [uuid]. *) val destroy : uuid:string -> unit - (** [destroy ~uuid] destroys the tracer provider with the given [uuid]. + (** [destroy ~uuid] destroys the tracer provider with the given [uuid]. If there are no tracer provider with the given [uuid], it does nothing. *) @@ -269,6 +271,7 @@ val enable_span_garbage_collector : ?timeout:float -> unit -> unit val with_tracing : ?attributes:(string * string) list -> ?parent:Span.t option + -> ?span_kind:SpanKind.t -> ?trace_context:TraceContext.t -> name:string -> (Span.t option -> 'a) @@ -288,24 +291,24 @@ val get_observe : unit -> bool val validate_attribute : string * string -> bool -(** [EnvHelpers] module is a helper module for the tracing library to easily - transition back and forth between a string list of environment variables to - a traceparent. +(** [EnvHelpers] module is a helper module for the tracing library to easily + transition back and forth between a string list of environment variables to + a traceparent. *) module EnvHelpers : sig val traceparent_key : string (** [traceparent_key] is a constant the represents the key of the traceparent - environment variable. + environment variable. *) val of_traceparent : string option -> string list (** [of_traceparent traceparent_opt] returns a singleton list consisting of a - envirentment variable with the key [traceparent_key] and value [v] if + envirentment variable with the key [traceparent_key] and value [v] if [traceparent_opt] is [Some v]. Otherwise, returns an empty list. *) val to_traceparent : string list -> string option - (** [to_traceparent env_var_lst] returns [Some v] where v is the value of the - environmental variable coresponding to the key [traceparent_key] from a + (** [to_traceparent env_var_lst] returns [Some v] where v is the value of the + environmental variable coresponding to the key [traceparent_key] from a string list of environmental variables [env_var_lst]. If there is no such evironmental variable in the list, it returns [None]. *) @@ -314,7 +317,7 @@ module EnvHelpers : sig (** [of_span span] returns a singleton list consisting of a envirentment variable with the key [traceparent_key] and value [v], where [v] is traceparent representation of span [s] (if [span] is [Some s]). - + If [span] is [None], it returns an empty list. *) end diff --git a/ocaml/libs/tracing/tracing_export.ml b/ocaml/libs/tracing/tracing_export.ml index 592a12bbb26..352d5d488e4 100644 --- a/ocaml/libs/tracing/tracing_export.ml +++ b/ocaml/libs/tracing/tracing_export.ml @@ -24,6 +24,10 @@ let export_interval = ref 30. let set_export_interval t = export_interval := t +let export_chunk_size = Atomic.make 10000 + +let set_export_chunk_size x = Atomic.set export_chunk_size x + let host_id = ref "localhost" let set_host_id id = host_id := id @@ -83,10 +87,7 @@ module Content = struct ) in let tags = - let span_context = Span.get_context s in - let trace_context = - SpanContext.context_of_span_context span_context - in + let trace_context = Span.get_trace_context s in let baggage = TraceContext.baggage_of trace_context |> Option.value ~default:[] in @@ -281,8 +282,8 @@ module Destination = struct ] in let@ _ = - with_tracing ~trace_context:TraceContext.empty ~parent ~attributes - ~name + with_tracing ~span_kind:Server ~trace_context:TraceContext.empty + ~parent ~attributes ~name in all_spans |> Content.Json.ZipkinV2.content_of @@ -292,23 +293,48 @@ module Destination = struct with exn -> debug "Tracing: unable to export span : %s" (Printexc.to_string exn) + let rec span_info_chunks span_info batch_size = + let rec list_to_chunks_inner l n curr chunks = + if n = 0 then + if l <> [] then + list_to_chunks_inner l batch_size [] ((curr, batch_size) :: chunks) + else + (curr, batch_size) :: chunks + else + match l with + | [] -> + (curr, List.length curr) :: chunks + | h :: t -> + list_to_chunks_inner t (n - 1) (h :: curr) chunks + in + list_to_chunks_inner (fst span_info) batch_size [] [] + let flush_spans () = let ((_span_list, span_count) as span_info) = Spans.since () in let attributes = [("export.traces.count", string_of_int span_count)] in let@ parent = - with_tracing ~trace_context:TraceContext.empty ~parent:None ~attributes - ~name:"Tracing.flush_spans" + with_tracing ~span_kind:Server ~trace_context:TraceContext.empty + ~parent:None ~attributes ~name:"Tracing.flush_spans" + in + let endpoints = + TracerProvider.get_tracer_providers () + |> List.filter TracerProvider.get_enabled + |> List.concat_map TracerProvider.get_endpoints in - TracerProvider.get_tracer_providers () - |> List.filter TracerProvider.get_enabled - |> List.concat_map TracerProvider.get_endpoints - |> List.iter (export_to_endpoint parent span_info) + let span_info_chunks = + span_info_chunks span_info (Atomic.get export_chunk_size) + in + List.iter + (fun s_i -> List.iter (export_to_endpoint parent s_i) endpoints) + span_info_chunks let delay = Delay.make () (* Note this signal will flush the spans and terminate the exporter thread *) let signal () = Delay.signal delay + let wait_exit = Delay.make () + let create_exporter () = enable_span_garbage_collector () ; Thread.create @@ -322,7 +348,8 @@ module Destination = struct signaled := true ) ; flush_spans () - done + done ; + Delay.signal wait_exit ) () @@ -342,6 +369,12 @@ module Destination = struct ) end -let flush_and_exit = Destination.signal +let flush_and_exit ~max_wait () = + D.debug "flush_and_exit: signaling thread to export now" ; + Destination.signal () ; + if Delay.wait Destination.wait_exit max_wait then + D.info "flush_and_exit: timeout on span export" + else + D.debug "flush_and_exit: span export finished" let main = Destination.main diff --git a/ocaml/libs/tracing/tracing_export.mli b/ocaml/libs/tracing/tracing_export.mli index 3f8ca750026..0714b7107a9 100644 --- a/ocaml/libs/tracing/tracing_export.mli +++ b/ocaml/libs/tracing/tracing_export.mli @@ -1,16 +1,16 @@ (* -* Copyright (C) 2024 Cloud Software Group -* -* This program is free software; you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License as published -* by the Free Software Foundation; version 2.1 only. with the special -* exception on linking described in file LICENSE. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Lesser General Public License for more details. -*) + * Copyright (C) 2024 Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) (** [Tracing_export] is a module dedicated for the creation and management of threads that export the tracing data. @@ -23,6 +23,13 @@ val set_export_interval : float -> unit Default is every [30.] seconds. *) +val set_export_chunk_size : int -> unit +(** [set_export_chunk_size size] sets the maximum number of finished spans that + can be exported in one chunk to [size]. + + Default is 10000 spans. + *) + val set_host_id : string -> unit (** [set_host_id id] sets the id of the host to [id]. @@ -85,9 +92,9 @@ module Destination : sig end end -val flush_and_exit : unit -> unit -(** [flush_and_exit ()] sends a signal to flush the finish spans and terminate - the exporter thread. +val flush_and_exit : max_wait:float -> unit -> unit +(** [flush_and_exit ~max_wait ()] sends a signal to flush the finish spans and terminate + the exporter thread. It waits at most [max_wait] seconds. *) val main : unit -> Thread.t diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 9d0ce1f3ffc..4f4c4d3cec1 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -342,9 +342,9 @@ let rra_update rrd proc_pdp_st elapsed_pdp_st pdps = (* We assume that the data being given is of the form of a rate; that is, it's dependent on the time interval between updates. - Gauge and Absolute data sources are simply kept as is without any - time-based calculations, while Derive data sources will be changed according - to the time passed since the last measurement. (see CA-404597) *) + Gauge data sources are simply kept as is without any time-based + calculations, while Absolute and Derive data sources will be changed + according to the time passed since the last measurement. (see CA-404597) *) let process_ds_value ds value interval new_rrd = if interval > ds.ds_mrhb then nan @@ -361,8 +361,10 @@ let process_ds_value ds value interval new_rrd = let rate = match (ds.ds_ty, new_rrd) with - | Absolute, _ | Derive, true | Gauge, _ -> + | Derive, true | Gauge, _ -> value_raw + | Absolute, _ -> + value_raw /. interval | Derive, false -> ( match (ds.ds_last, value) with | VT_Int64 x, VT_Int64 y -> @@ -379,121 +381,136 @@ let process_ds_value ds value interval new_rrd = rate let ds_update rrd timestamp valuesandtransforms new_rrd = - (* Interval is the time between this and the last update - - Currently ds_update is called with datasources that belong to a single - plugin, correspondingly they all have the same timestamp. - Further refactoring is needed if timestamps per measurement are to be - introduced. *) - let first_ds_index, _ = valuesandtransforms.(0) in - let last_updated = rrd.rrd_dss.(first_ds_index).ds_last_updated in - let interval = timestamp -. last_updated in - (* Work around the clock going backwards *) - let interval = if interval < 0. then 5. else interval in - - (* start time (st) and age of the last processed pdp and the currently occupied one *) - let proc_pdp_st, _proc_pdp_age = get_times last_updated rrd.timestep in - let occu_pdp_st, occu_pdp_age = get_times timestamp rrd.timestep in - - (* The number of pdps that should result from this update *) - let elapsed_pdp_st = - Int64.to_int ((occu_pdp_st --- proc_pdp_st) /// rrd.timestep) - in - - (* if we're due one or more PDPs, pre_int is the amount of the - current update interval that will be used in calculating them, and - post_int is the amount left over - this step. If a PDP isn't post is what's left over *) - let pre_int, post_int = - if elapsed_pdp_st > 0 then - let pre = interval -. occu_pdp_age in - (pre, occu_pdp_age) - else - (interval, 0.0) - in - - (* We're now done with the last_updated value, so update it *) - rrd.last_updated <- timestamp ; + (* CA-408841 - don't update the rrd at all if list of datasources is empty *) + if valuesandtransforms <> [||] then ( + (* Interval is the time between this and the last update + + Currently ds_update is called with datasources that belong to a single + plugin, correspondingly they all have the same timestamp. + Further refactoring is needed if timestamps per measurement are to be + introduced. *) + let first_ds_index, _ = valuesandtransforms.(0) in + let last_updated = rrd.rrd_dss.(first_ds_index).ds_last_updated in + let interval = timestamp -. last_updated in + (* Work around the clock going backwards *) + let interval = if interval < 0. then 5. else interval in + + (* start time (st) and age of the last processed pdp and the currently occupied one *) + let proc_pdp_st, _proc_pdp_age = get_times last_updated rrd.timestep in + let occu_pdp_st, occu_pdp_age = get_times timestamp rrd.timestep in + + (* The number of pdps that should result from this update *) + let elapsed_pdp_st = + Int64.to_int ((occu_pdp_st --- proc_pdp_st) /// rrd.timestep) + in - (* Calculate the values we're going to store based on the input data and the type of the DS *) - let v2s = - Array.map - (fun (i, {value; _}) -> - let v = process_ds_value rrd.rrd_dss.(i) value interval new_rrd in - rrd.rrd_dss.(i).ds_last_updated <- timestamp ; - (i, v) - ) - valuesandtransforms - in - (* Update the PDP accumulators up until the most recent PDP *) - Array.iter - (fun (i, value) -> - let ds = rrd.rrd_dss.(i) in - if Utils.isnan value then - ds.ds_unknown_sec <- pre_int + (* if we're due one or more PDPs, pre_int is the amount of the + current update interval that will be used in calculating them, and + post_int is the amount left over + this step. If a PDP isn't post is what's left over *) + let pre_int, post_int = + if elapsed_pdp_st > 0 then + let pre = interval -. occu_pdp_age in + (pre, occu_pdp_age) else - (* CA-404597 - Gauge and Absolute values should be passed as-is, - without being involved in time-based calculations at all. - This applies to calculations below as well *) - match ds.ds_ty with - | Gauge | Absolute -> - ds.ds_value <- value - | Derive -> - ds.ds_value <- ds.ds_value +. (pre_int *. value /. interval) - ) - v2s ; + (interval, 0.0) + in + + (* We're now done with the last_updated value, so update it *) + rrd.last_updated <- timestamp ; - (* If we've passed a PDP point, we need to update the RRAs *) - if elapsed_pdp_st > 0 then ( - (* Calculate the PDPs for each DS *) - let pdps = + (* Calculate the values we're going to store based on the input data and the type of the DS *) + let v2s = Array.map - (fun (i, {transform; _}) -> - let ds = rrd.rrd_dss.(i) in - if interval > ds.ds_mrhb then - (i, nan) - else - let raw = - let proc_pdp_st = get_float_time last_updated rrd.timestep in - let occu_pdp_st = get_float_time timestamp rrd.timestep in - - match ds.ds_ty with - | Gauge | Absolute -> - ds.ds_value - | Derive -> - ds.ds_value - /. (occu_pdp_st -. proc_pdp_st -. ds.ds_unknown_sec) - in - (* Apply the transform after the raw value has been calculated *) - let raw = apply_transform_function transform raw in - (* Make sure the values are not out of bounds after all the processing *) - if raw < ds.ds_min || raw > ds.ds_max then - (i, nan) - else - (i, raw) + (fun (i, {value; _}) -> + let v = process_ds_value rrd.rrd_dss.(i) value interval new_rrd in + rrd.rrd_dss.(i).ds_last_updated <- timestamp ; + (i, v) ) valuesandtransforms in - - rra_update rrd proc_pdp_st elapsed_pdp_st pdps ; - - (* Reset the PDP accumulators *) + (* Update the PDP accumulators up until the most recent PDP *) Array.iter (fun (i, value) -> let ds = rrd.rrd_dss.(i) in - if Utils.isnan value then ( - ds.ds_value <- 0.0 ; - ds.ds_unknown_sec <- post_int - ) else ( - ds.ds_unknown_sec <- 0.0 ; + if Utils.isnan value then + ds.ds_unknown_sec <- pre_int + else + (* CA-404597 - Gauge and Absolute values should be passed as-is, + without being involved in time-based calculations at all. + This applies to calculations below as well *) match ds.ds_ty with | Gauge | Absolute -> ds.ds_value <- value | Derive -> - ds.ds_value <- post_int *. value /. interval - ) + ds.ds_value <- ds.ds_value +. (pre_int *. value /. interval) ) - v2s + v2s ; + + (* If we've passed a PDP point, we need to update the RRAs *) + if elapsed_pdp_st > 0 then ( + (* Calculate the PDPs for each DS *) + let pdps = + Array.map + (fun (i, {transform; _}) -> + let ds = rrd.rrd_dss.(i) in + if interval > ds.ds_mrhb then + (i, nan) + else + let raw = + let proc_pdp_st = get_float_time last_updated rrd.timestep in + let occu_pdp_st = get_float_time timestamp rrd.timestep in + + match ds.ds_ty with + | Gauge | Absolute -> + ds.ds_value + | Derive -> + ds.ds_value + /. (occu_pdp_st -. proc_pdp_st -. ds.ds_unknown_sec) + in + (* Apply the transform after the raw value has been calculated *) + let raw = apply_transform_function transform raw in + + (* Make sure the values are not out of bounds after all the processing *) + match (ds.ds_ty, raw) with + | Derive, _ when raw > ds.ds_max && raw < ds.ds_max *. (1. +. 0.05) + -> + (* CA-411679: To handle deviations in CPU rates, Derive values + exceeding the maximum by up to 5% are capped at the maximum; + others are marked as unknown. This logic is specific to + Derive data sources because they represent rates derived + from differences over time, which can occasionally exceed + expected bounds due to measurement inaccuracies. *) + (i, ds.ds_max) + | (Derive | Gauge | Absolute), _ + when raw < ds.ds_min || raw > ds.ds_max -> + (i, nan) + | (Derive | Gauge | Absolute), _ -> + (i, raw) + ) + valuesandtransforms + in + + rra_update rrd proc_pdp_st elapsed_pdp_st pdps ; + + (* Reset the PDP accumulators *) + Array.iter + (fun (i, value) -> + let ds = rrd.rrd_dss.(i) in + if Utils.isnan value then ( + ds.ds_value <- 0.0 ; + ds.ds_unknown_sec <- post_int + ) else ( + ds.ds_unknown_sec <- 0.0 ; + match ds.ds_ty with + | Gauge | Absolute -> + ds.ds_value <- value + | Derive -> + ds.ds_value <- post_int *. value /. interval + ) + ) + v2s + ) ) (** Update the rrd with named values rather than just an ordered array @@ -744,7 +761,11 @@ let from_xml input = let name = get_el "name" i in let type_ = get_el "type" i in let min_hb = get_el "minimal_heartbeat" i in - let min = get_el "min" i in + (* CA-408126 - work around negative data in historical RRDs + where ds_min could have been incorrectly set to neg_infinity. + Setting ds_min to 0. means Fring.make below will turn negative + historical values to NaNs.*) + let min = max (float_of_string (get_el "min" i)) 0. in let max = get_el "max" i in ignore (get_el "last_ds" i) ; let value = get_el "value" i in @@ -767,7 +788,7 @@ let from_xml input = failwith "Bad format" ) ; ds_mrhb= float_of_string min_hb - ; ds_min= float_of_string min + ; ds_min= min ; ds_max= float_of_string max ; ds_last= VT_Unknown ; (* float_of_string "last_ds"; *) diff --git a/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml b/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml index f016605848c..5f84e76f194 100644 --- a/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml +++ b/ocaml/libs/xapi-rrd/lib_test/unit_tests.ml @@ -147,46 +147,22 @@ let absolute_rrd = let absolute_rrd_CA_404597 () = let rra = rra_create CF_Average 100 1 0.5 in - let rra2 = rra_create CF_Average 100 10 0.5 in - let rra3 = rra_create CF_Average 100 100 0.5 in - let rra4 = rra_create CF_Average 100 1000 0.5 in - let ts = 1000000000.0 in + let ts = 0.0 in let ds = - ds_create "foo" Absolute ~mrhb:10.0 ~min:0. ~max:infinity (VT_Float 0.0) + ds_create "foo" Absolute ~mrhb:1000.0 ~min:0. ~max:infinity (VT_Float 0.0) in - let ds2 = - ds_create "bar" Absolute ~mrhb:10.0 ~min:0. ~max:infinity (VT_Float 0.0) - in - let ds3 = - ds_create "baz" Absolute ~mrhb:10.0 ~min:0. ~max:infinity (VT_Float 0.0) - in - let ds4 = - ds_create "boo" Absolute ~mrhb:10.0 ~min:0. ~max:infinity (VT_Float 0.0) - in - let rrd = rrd_create [|ds; ds2; ds3; ds4|] [|rra; rra2; rra3; rra4|] 1L ts in + let rrd = rrd_create [|ds|] [|rra|] 1L ts in let id = Identity in for i = 1 to 100000 do - let t = 1000000.0 +. (0.7 *. float_of_int i) in + let t = 300. *. float_of_int i in let ((_, val1) as v1) = - (0, {value= VT_Float (0.5 +. (0.5 *. sin (t /. 10.0))); transform= id}) + (0, {value= VT_Float (300. *. float_of_int i); transform= id}) in - let ((_, val2) as v2) = - (1, {value= VT_Float (1.5 +. (0.5 *. cos (t /. 80.0))); transform= id}) - in - let ((_, val3) as v3) = - (2, {value= VT_Float (3.5 +. (0.5 *. sin (t /. 700.0))); transform= id}) - in - let ((_, val4) as v4) = - (3, {value= VT_Float (6.5 +. (0.5 *. cos (t /. 5000.0))); transform= id}) - in - ds_update rrd t [|v1; v2; v3; v4|] false ; + ds_update rrd t [|v1|] false ; - Array.iter2 - (fun ds value -> - compare_float __LOC__ ds.ds_value - (float_of_string (ds_value_to_string value.value)) - ) - rrd.rrd_dss [|val1; val2; val3; val4|] + compare_float __LOC__ + (float_of_string (ds_value_to_string val1.value) /. 300.) + rrd.rrd_dss.(0).ds_value done (** Verify that Gauge data soruce values are correctly handled by the RRD lib diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/date.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/date.mli deleted file mode 100644 index 9af45ab6096..00000000000 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/date.mli +++ /dev/null @@ -1,115 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -(** date-time with support for keeping timezone for ISO 8601 conversion *) -type t = Clock.Date.t - -(** Conversions *) - -val of_ptime : Ptime.t -> t -(** Convert ptime to time in UTC *) - -val to_ptime : t -> Ptime.t -(** Convert date/time to a ptime value: the number of seconds since 00:00:00 - UTC, 1 Jan 1970. Assumes the underlying {!t} is in UTC *) - -val of_unix_time : float -> t -(** Convert calendar time [x] (as returned by e.g. Unix.time), to time in UTC *) - -val to_unix_time : t -> float -(** Convert date/time to a unix timestamp: the number of seconds since - 00:00:00 UTC, 1 Jan 1970. Assumes the underlying {!t} is in UTC *) - -val to_rfc822 : t -> string -(** Convert date/time to email-formatted (RFC 822) string. *) - -val to_rfc3339 : t -> string -(** Convert date/time to an RFC-3339-formatted string. It also complies with - the ISO 8601 format *) - -val of_iso8601 : string -> t -(** Convert ISO 8601 formatted string to a date/time value. Does not accept a - timezone annotated datetime - i.e. string must be UTC, and end with a Z *) - -val epoch : t -(** 00:00:00 UTC, 1 Jan 1970, in UTC *) - -val now : unit -> t -(** Count the number of seconds passed since 00:00:00 UTC, 1 Jan 1970, in UTC *) - -val _localtime_string : Ptime.tz_offset_s option -> Ptime.t -> string -(** exposed for testing *) - -val localtime : unit -> t -(** Count the number of seconds passed since 00:00:00 UTC, 1 Jan 1970, in local - time *) - -(** Comparisons *) - -val equal : t -> t -> bool -(** [equal a b] returns whether [a] and [b] are equal *) - -val compare : t -> t -> int -(** [compare a b] returns -1 if [a] is earlier than [b], 1 if [a] is later than - [b] or the ordering of the timezone printer *) - -val is_earlier : than:t -> t -> bool -(** [is_earlier ~than a] returns whether the timestamp [a] happens before - [than] *) - -val is_later : than:t -> t -> bool -(** [is_later ~than a] returns whether the timestamp [a] happens after [than] - *) - -val diff : t -> t -> Ptime.Span.t -(** [diff a b] returns the span of time corresponding to [a - b] *) - -(** Deprecated bindings, these will be removed in a future release: *) - -val eq : t -> t -> bool -[@@deprecated "Use Date.equal"] -(** [eq a b] returns whether [a] and [b] are equal *) - -val rfc822_to_string : t -> string -[@@deprecated "Use Date.to_rfc822"] -(** Same as {!to_rfc822} *) - -val rfc822_of_float : float -> t -[@@deprecated "Use Date.of_unix_time"] -(** Same as {!of_unix_time} *) - -val of_float : float -> t -[@@deprecated "Use Date.of_unix_time"] -(** Same as {!of_unix_time} *) - -val to_float : t -> float -[@@deprecated "Use Date.to_unix_time"] -(** Same as {!to_unix_time} *) - -val to_string : t -> string -[@@deprecated "Use Date.to_rfc3339"] -(** Same as {!to_rfc3339} *) - -val of_string : string -> t -[@@deprecated "Use Date.of_iso8601"] -(** Same as {!of_iso8601} *) - -val never : t [@@deprecated "Use Date.epoch"] -(** Same as {!epoch} *) - -(** Deprecated alias for {!t} *) -type iso8601 = t [@@deprecated "Use Date.t"] - -(** Deprecated alias for {!t} *) -type rfc822 = t [@@deprecated "Use Date.t"] diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/dune deleted file mode 100644 index 8566d86e12c..00000000000 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-date/dune +++ /dev/null @@ -1,6 +0,0 @@ -(library - (name xapi_stdext_date) - (public_name xapi-stdext-date) - (modules :standard) - (libraries clock ptime) -) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/bench/bench_encodings.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/bench/bench_encodings.ml index 7308c756d8b..bb20eed4f4e 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/bench/bench_encodings.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/bench/bench_encodings.ml @@ -1,5 +1,5 @@ open Bechamel -open Xapi_stdext_encodings.Encodings +open Xapi_stdext_encodings let test name f = Test.make_indexed_with_resource ~name ~args:[10; 1000; 10000] @@ -10,6 +10,6 @@ let test name f = let benchmarks = Test.make_grouped ~name:"Encodings.validate" - [test "UTF8_XML" UTF8_XML.validate] + [test "UTF8.XML" Utf8.XML.is_valid] let () = Bechamel_simple_cli.cli benchmarks diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/dune index 742dd212f1e..839346e35ce 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/dune @@ -1,12 +1,6 @@ (library (name xapi_stdext_encodings) (public_name xapi-stdext-encodings) - (modules :standard \ test) + (modules :standard) ) -(test - (name test) - (package xapi-stdext-encodings) - (modules test) - (libraries alcotest xapi-stdext-encodings) -) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/encodings.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/encodings.ml deleted file mode 100644 index 2dfd45a7d18..00000000000 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/encodings.ml +++ /dev/null @@ -1,167 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -exception UCS_value_out_of_range - -exception UCS_value_prohibited_in_UTF8 - -exception UCS_value_prohibited_in_XML - -exception UTF8_character_incomplete - -exception UTF8_header_byte_invalid - -exception UTF8_continuation_byte_invalid - -exception UTF8_encoding_not_canonical - -exception String_incomplete - -(* === Unicode Functions === *) - -module UCS = struct - let is_non_character value = - false - || (0xfdd0 <= value && value <= 0xfdef) (* case 1 *) - || Int.logand 0xfffe value = 0xfffe - (* case 2 *) - [@@inline] -end - -module XML = struct - let is_illegal_control_character value = - let value = Uchar.to_int value in - value < 0x20 && value <> 0x09 && value <> 0x0a && value <> 0x0d - [@@inline] -end - -(* === UCS Validators === *) - -module type UCS_VALIDATOR = sig - val validate : Uchar.t -> unit -end - -module UTF8_UCS_validator = struct - let validate value = - if (UCS.is_non_character [@inlined]) (Uchar.to_int value) then - raise UCS_value_prohibited_in_UTF8 - [@@inline] -end - -module XML_UTF8_UCS_validator = struct - let validate value = - (UTF8_UCS_validator.validate [@inlined]) value ; - if (XML.is_illegal_control_character [@inlined]) value then - raise UCS_value_prohibited_in_XML -end - -(* === String Validators === *) - -module type STRING_VALIDATOR = sig - val is_valid : string -> bool - - val validate : string -> unit - - val longest_valid_prefix : string -> string -end - -exception Validation_error of int * exn - -module UTF8_XML : STRING_VALIDATOR = struct - let decode_continuation_byte byte = - if byte land 0b11000000 = 0b10000000 then - byte land 0b00111111 - else - raise UTF8_continuation_byte_invalid - - let rec decode_continuation_bytes string last value index = - if index <= last then - let chunk = decode_continuation_byte (Char.code string.[index]) in - let value = (value lsl 6) lor chunk in - decode_continuation_bytes string last value (index + 1) - else - value - - let validate_character_utf8 string byte index = - let value, width = - if byte land 0b10000000 = 0b00000000 then - (byte, 1) - else if byte land 0b11100000 = 0b11000000 then - (byte land 0b0011111, 2) - else if byte land 0b11110000 = 0b11100000 then - (byte land 0b0001111, 3) - else if byte land 0b11111000 = 0b11110000 then - (byte land 0b0000111, 4) - else - raise UTF8_header_byte_invalid - in - let value = - if width = 1 then - value - else - decode_continuation_bytes string (index + width - 1) value (index + 1) - in - XML_UTF8_UCS_validator.validate (Uchar.unsafe_of_int value) ; - width - - let rec validate_aux string length index = - if index = length then - () - else - let width = - try - let byte = string.[index] |> Char.code in - validate_character_utf8 string byte index - with - | Invalid_argument _ -> - raise String_incomplete - | error -> - raise (Validation_error (index, error)) - in - validate_aux string length (index + width) - - let validate string = validate_aux string (String.length string) 0 - - let rec validate_with_fastpath string stop pos = - if pos < stop then - (* the compiler is smart enough to optimize the 'int32' away here, - and not allocate *) - let i32 = String.get_int32_ne string pos |> Int32.to_int in - (* test that for all bytes 0x20 <= byte < 0x80. - If any is <0x20 it would cause a negative value to appear in that byte, - which we can detect if we use 0x80 as a mask. - Byte >= 0x80 can be similarly detected with a mask of 0x80 on each byte. - We don't want to see a 0x80 from either of these, hence we bitwise or the 2 values together. - *) - if i32 lor (i32 - 0x20_20_20_20) land 0x80_80_80_80 = 0 then - validate_with_fastpath string stop (pos + 4) - else (* when the condition doesn't hold fall back to full UTF8 decoder *) - validate_aux string (String.length string) pos - else - validate_aux string (String.length string) pos - - let validate_with_fastpath string = - validate_with_fastpath string (String.length string - 3) 0 - - let validate = - if Sys.word_size = 64 then - validate_with_fastpath - else - validate - - let is_valid string = try validate string ; true with _ -> false - - let longest_valid_prefix string = - try validate string ; string - with Validation_error (index, _) -> String.sub string 0 index -end diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/encodings.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/encodings.mli deleted file mode 100644 index 2a139ae3786..00000000000 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/encodings.mli +++ /dev/null @@ -1,84 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -(** Encoding helper modules *) - -(** {2 Exceptions} *) - -exception UCS_value_out_of_range - -exception UCS_value_prohibited_in_UTF8 - -exception UCS_value_prohibited_in_XML - -exception UTF8_character_incomplete - -exception UTF8_header_byte_invalid - -exception UTF8_continuation_byte_invalid - -exception UTF8_encoding_not_canonical - -exception String_incomplete - -(** {2 UCS Validators} *) - -(** Validates UCS character values. *) -module type UCS_VALIDATOR = sig - val validate : Uchar.t -> unit -end - -(** Accepts all values within the UCS character value range except - * those which are invalid for all UTF-8-encoded XML documents. *) -module XML_UTF8_UCS_validator : UCS_VALIDATOR - -module XML : sig - val is_illegal_control_character : Uchar.t -> bool - (** Returns true if and only if the given value corresponds to - * a illegal control character as defined in section 2.2 of - * the XML specification, version 1.0. *) -end - -(** {2 String Validators} *) - -(** Provides functionality for validating and processing - * strings according to a particular character encoding. *) -module type STRING_VALIDATOR = sig - val is_valid : string -> bool - (** Returns true if and only if the given string is validly-encoded. *) - - val validate : string -> unit - (** Raises an encoding error if the given string is not validly-encoded. *) - - val longest_valid_prefix : string -> string - (** Returns the longest validly-encoded prefix of the given string. *) -end - -(** Represents a validation error as a tuple [(i,e)], where: - * [i] = the index of the first non-compliant character; - * [e] = the reason for non-compliance. *) -exception Validation_error of int * exn - -(** Provides functions for validating and processing - * strings according to the UTF-8 character encoding, - * with certain additional restrictions on UCS values - * imposed by the XML specification. - * - * Validly-encoded strings must satisfy both RFC 3629 - * and section 2.2 of the XML specification. - * - * For further information, see: - * http://www.rfc.net/rfc3629.html - * http://www.w3.org/TR/REC-xml/#charsets *) -module UTF8_XML : STRING_VALIDATOR diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/test.ml deleted file mode 100644 index 9cc75b297d0..00000000000 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/test.ml +++ /dev/null @@ -1,533 +0,0 @@ -(* - * Copyright (C) 2006-2009 Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) -module E = Xapi_stdext_encodings.Encodings - -(* Pull in the infix operators from Encodings used in this test *) -let ( --- ), ( +++ ), ( <<< ) = (Int.sub, Int.add, Int.shift_left) - -(* === Mock exceptions ==================================================== *) - -(** Simulates a decoding error. *) -exception Decode_error - -(* === Mock UCS validators ================================================= *) - -(** A validator that always succeeds. *) -module Lenient_UCS_validator : E.UCS_VALIDATOR = struct - let validate _ = () -end - -(* === Mock character validators ============================================= *) - -(** A validator that succeeds for all characters. *) -module Universal_character_validator = struct - let validate _ = () -end - -(** A validator that fails for all characters. *) -module Failing_character_validator = struct - let validate _ = raise Decode_error -end - -(** A validator that succeeds for all characters except the letter 'F'. *) -module Selective_character_validator = struct - let validate uchar = - if Uchar.equal uchar (Uchar.of_char 'F') then raise Decode_error -end - -(* === Test helpers ======================================================== *) - -let assert_true = Alcotest.(check bool) "true" true - -let assert_false = Alcotest.(check bool) "false" false - -let assert_raises_match exception_match fn = - try - fn () ; - Alcotest.fail "assert_raises_match: failure expected" - with failure -> - if not (exception_match failure) then - raise failure - else - () - -(* === Mock codecs ========================================================= *) - -module UCS = struct - (* === Unicode Functions === *) - let min_value = 0x000000 - - let max_value = 0x10ffff - (* used to be 0x1fffff, but this changed and Unicode won't allocate larger than 0x10ffff *) - - let is_non_character value = - false - || (0xfdd0 <= value && value <= 0xfdef) (* case 1 *) - || Int.logand 0xfffe value = 0xfffe - (* case 2 *) - - let is_out_of_range value = value < min_value || value > max_value - - let is_surrogate value = 0xd800 <= value && value <= 0xdfff - - (** A list of UCS non-characters values, including: - a. non-characters within the basic multilingual plane; - b. non-characters at the end of the basic multilingual plane; - c. non-characters at the end of the private use area. *) - let non_characters = - [ - 0x00fdd0 - ; 0x00fdef - ; (* case a. *) - 0x00fffe - ; 0x00ffff - ; (* case b. *) - 0x1ffffe - ; 0x1fffff (* case c. *) - ] - - (** A list of UCS character values located immediately before or - after UCS non-character values, including: - a. non-characters within the basic multilingual plane; - b. non-characters at the end of the basic multilingual plane; - c. non-characters at the end of the private use area. *) - let valid_characters_next_to_non_characters = - [ - 0x00fdcf - ; 0x00fdf0 - ; (* case a. *) - 0x00fffd - ; 0x010000 - ; (* case b. *) - 0x1ffffd - ; 0x200000 (* case c. *) - ] - - let test_is_non_character () = - List.iter (fun value -> assert_true (is_non_character value)) non_characters ; - List.iter - (fun value -> assert_false (is_non_character value)) - valid_characters_next_to_non_characters - - let test_is_out_of_range () = - assert_true (is_out_of_range (min_value --- 1)) ; - assert_false (is_out_of_range min_value) ; - assert_false (is_out_of_range max_value) ; - assert_true (is_out_of_range (max_value +++ 1)) - - let test_is_surrogate () = - assert_false (is_surrogate 0xd7ff) ; - assert_true (is_surrogate 0xd800) ; - assert_true (is_surrogate 0xdfff) ; - assert_false (is_surrogate 0xe000) - - let tests = - [ - ("test_is_non_character", `Quick, test_is_non_character) - ; ("test_is_out_of_range", `Quick, test_is_out_of_range) - ; ("test_is_surrogate", `Quick, test_is_surrogate) - ] -end - -module Lenient_UTF8_codec = struct - let decode_header_byte byte = - if byte land 0b10000000 = 0b00000000 then - (byte, 1) - else if byte land 0b11100000 = 0b11000000 then - (byte land 0b0011111, 2) - else if byte land 0b11110000 = 0b11100000 then - (byte land 0b0001111, 3) - else if byte land 0b11111000 = 0b11110000 then - (byte land 0b0000111, 4) - else - raise E.UTF8_header_byte_invalid - - let decode_continuation_byte byte = - if byte land 0b11000000 = 0b10000000 then - byte land 0b00111111 - else - raise E.UTF8_continuation_byte_invalid - - let width_required_for_ucs_value value = - if value < 0x000080 (* 1 lsl 7 *) then - 1 - else if value < 0x000800 (* 1 lsl 11 *) then - 2 - else if value < 0x010000 (* 1 lsl 16 *) then - 3 - else - 4 - - let decode_character string index = - let value, width = decode_header_byte (Char.code string.[index]) in - let value = - if width = 1 then - value - else - let value = ref value in - for index = index + 1 to index + width - 1 do - let chunk = decode_continuation_byte (Char.code string.[index]) in - value := (!value lsl 6) lor chunk - done ; - if width > width_required_for_ucs_value !value then - raise E.UTF8_encoding_not_canonical ; - !value - in - (value, width) -end - -(* === Mock string validators ============================================== *) -module Mock_String_validator (Validator : E.UCS_VALIDATOR) : - E.STRING_VALIDATOR = struct - (* no longer a functor in Encodings for performance reasons, - so modify the original string passed as argument instead replacing - characters that would be invalid with a known invalid XML char: 0x0B. - *) - - let transform str = - let b = Buffer.create (String.length str) in - let rec loop pos = - if pos < String.length str then - let value, width = Lenient_UTF8_codec.decode_character str pos in - let () = - try - let u = Uchar.of_int value in - Validator.validate u ; Buffer.add_utf_8_uchar b u - with _ -> Buffer.add_char b '\x0B' - in - loop (pos + width) - in - loop 0 ; Buffer.contents b - - let is_valid str = E.UTF8_XML.is_valid (transform str) - - let validate str = - try E.UTF8_XML.validate (transform str) - with E.Validation_error (pos, _) -> - raise (E.Validation_error (pos, Decode_error)) - - let longest_valid_prefix str = E.UTF8_XML.longest_valid_prefix (transform str) -end - -(** A validator that accepts all strings. *) -module Universal_string_validator = - Mock_String_validator (Universal_character_validator) - -(** A validator that rejects all strings. *) -module Failing_string_validator = - Mock_String_validator (Failing_character_validator) - -(** A validator that rejects strings containing the character 'F'. *) -module Selective_string_validator = - Mock_String_validator (Selective_character_validator) - -(* === Tests =============================================================== *) - -module String_validator = struct - let test_is_valid () = - assert_true (Universal_string_validator.is_valid "") ; - assert_true (Universal_string_validator.is_valid "123456789") ; - assert_true (Selective_string_validator.is_valid "") ; - assert_true (Selective_string_validator.is_valid "123456789") ; - assert_false (Selective_string_validator.is_valid "F23456789") ; - assert_false (Selective_string_validator.is_valid "1234F6789") ; - assert_false (Selective_string_validator.is_valid "12345678F") ; - assert_false (Selective_string_validator.is_valid "FFFFFFFFF") - - let test_longest_valid_prefix () = - Alcotest.(check string) - "prefix" - (Universal_string_validator.longest_valid_prefix "") - "" ; - Alcotest.(check string) - "prefix" - (Universal_string_validator.longest_valid_prefix "123456789") - "123456789" ; - Alcotest.(check string) - "prefix" - (Selective_string_validator.longest_valid_prefix "") - "" ; - Alcotest.(check string) - "prefix" - (Selective_string_validator.longest_valid_prefix "123456789") - "123456789" ; - Alcotest.(check string) - "prefix" - (Selective_string_validator.longest_valid_prefix "F23456789") - "" ; - Alcotest.(check string) - "prefix" - (Selective_string_validator.longest_valid_prefix "1234F6789") - "1234" ; - Alcotest.(check string) - "prefix" - (Selective_string_validator.longest_valid_prefix "12345678F") - "12345678" ; - Alcotest.(check string) - "prefix" - (Selective_string_validator.longest_valid_prefix "FFFFFFFFF") - "" - - (** Tests that validation does not fail for an empty string. *) - let test_validate_with_empty_string () = E.UTF8_XML.validate "" - - let test_validate_with_incomplete_string () = - Alcotest.check_raises "Validation fails correctly for an incomplete string" - E.String_incomplete (fun () -> E.UTF8_XML.validate "\xc2" - ) - - let test_validate_with_failing_decoders () = - Failing_string_validator.validate "" ; - assert_raises_match - (function E.Validation_error (0, Decode_error) -> true | _ -> false) - (fun () -> Selective_string_validator.validate "F") ; - assert_raises_match - (function E.Validation_error (0, Decode_error) -> true | _ -> false) - (fun () -> Selective_string_validator.validate "F12345678") ; - assert_raises_match - (function E.Validation_error (4, Decode_error) -> true | _ -> false) - (fun () -> Selective_string_validator.validate "0123F5678") ; - assert_raises_match - (function E.Validation_error (8, Decode_error) -> true | _ -> false) - (fun () -> Selective_string_validator.validate "01234567F") ; - assert_raises_match - (function E.Validation_error (0, Decode_error) -> true | _ -> false) - (fun () -> Selective_string_validator.validate "FFFFFFFFF") - - let tests = - [ - ("test_is_valid", `Quick, test_is_valid) - ; ("test_longest_valid_prefix", `Quick, test_longest_valid_prefix) - ; ( "test_validate_with_empty_string" - , `Quick - , test_validate_with_empty_string - ) - ; ( "test_validate_with_incomplete_string" - , `Quick - , test_validate_with_incomplete_string - ) - ; ( "test_validate_with_failing_decoders" - , `Quick - , test_validate_with_failing_decoders - ) - ] -end - -module XML = struct - include E.XML - - let test_is_illegal_control_character () = - assert_true (is_illegal_control_character (Uchar.of_int 0x00)) ; - assert_true (is_illegal_control_character (Uchar.of_int 0x19)) ; - assert_false (is_illegal_control_character (Uchar.of_int 0x09)) ; - assert_false (is_illegal_control_character (Uchar.of_int 0x0a)) ; - assert_false (is_illegal_control_character (Uchar.of_int 0x0d)) ; - assert_false (is_illegal_control_character (Uchar.of_int 0x20)) - - let tests = - [ - ( "test_is_illegal_control_character" - , `Quick - , test_is_illegal_control_character - ) - ] -end - -(** Tests the XML-specific UTF-8 UCS validation function. *) -module XML_UTF8_UCS_validator = struct - include E.XML_UTF8_UCS_validator - - let validate uchar = - if Uchar.is_valid uchar then - validate @@ Uchar.of_int uchar - else if uchar < Uchar.to_int Uchar.min || uchar > Uchar.to_int Uchar.max - then - raise E.UCS_value_out_of_range - else - raise E.UCS_value_prohibited_in_UTF8 - - let test_validate () = - let value = ref (UCS.min_value --- 1) in - while !value <= UCS.max_value +++ 1 do - if UCS.is_out_of_range !value then - Alcotest.check_raises "should fail" E.UCS_value_out_of_range (fun () -> - validate !value - ) - else if UCS.is_non_character !value || UCS.is_surrogate !value then - Alcotest.check_raises "should fail" E.UCS_value_prohibited_in_UTF8 - (fun () -> validate !value - ) - else if - Uchar.is_valid !value - && XML.is_illegal_control_character (Uchar.of_int !value) - then - Alcotest.check_raises "should fail" E.UCS_value_prohibited_in_XML - (fun () -> validate !value - ) - else - validate !value ; - value := !value +++ 1 - done - - let tests = [("test_validate", `Quick, test_validate)] -end - -module UTF8_codec = struct - (** A list of canonical encoding widths of UCS values, - represented by tuples of the form (v, w), where: - v = the UCS character value to be encoded; and - w = the width of the encoded character, in bytes. *) - let valid_ucs_value_widths = - [ - (1, 1) - ; ((1 <<< 7) --- 1, 1) - ; (1 <<< 7, 2) - ; ((1 <<< 11) --- 1, 2) - ; (1 <<< 11, 3) - ; ((1 <<< 16) --- 1, 3) - ; (1 <<< 16, 4) - ; ((1 <<< 21) --- 1, 4) - ] - - let width_required_for_ucs_value value = - if value < 0x000080 (* 1 lsl 7 *) then - 1 - else if value < 0x000800 (* 1 lsl 11 *) then - 2 - else if value < 0x010000 (* 1 lsl 16 *) then - 3 - else - 4 - - let test_width_required_for_ucs_value () = - List.iter - (fun (value, width) -> - Alcotest.(check int) - "same ints" - (width_required_for_ucs_value value) - width - ) - valid_ucs_value_widths - - (** A list of valid character decodings represented by - tuples of the form (s, (v, w)), where: - - s = a validly-encoded UTF-8 string; - v = the UCS value represented by the string; - (which may or may not be valid in its own right) - w = the width of the encoded string, in bytes. - - For each byte length b in [1...4], the list contains - decodings for: - - v_min = the smallest UCS value encodable in b bytes. - v_max = the greatest UCS value encodable in b bytes. *) - let valid_character_decodings = - [ - (* 7654321 *) - (* 0b0xxxxxxx *) - (* 00000000000000xxxxxxx *) - ( "\x00" (* 0b00000000 *) - , (0b000000000000000000000, 1) - ) - ; ( "\x7f" (* 0b01111111 *) - , (0b000000000000001111111, 1) - ) - ; (* 10987654321 *) - (* 0b110xxxsx 0b10xxxxxx *) - (* 0000000000xxxsxxxxxxx *) - ( "\xc2\x80" (* 0b11000010 0b10000000 *) - , (0b000000000000010000000, 2) - ) - ; ( "\xdf\xbf" (* 0b11011111 0b10111111 *) - , (0b000000000011111111111, 2) - ) - ; (* 6543210987654321 *) - (* 0b1110xxxx 0b10sxxxxx 0b10xxxxxx *) - (* xxxxsxxxxxxxxxxx *) - ( "\xe0\xa0\x80" (* 0b11100000 0b10100000 0b10000000 *) - , (0b000000000100000000000, 3) - ) - ; ( "\xef\xbf\xbf" (* 0b11101111 0b10111111 0b10111111 *) - , (0b000001111111111111111, 3) - ) - ; (* 109876543210987654321 *) - (* 0b11110xxx 0b10xsxxxx 0b10xxxxxx 0b10xxxxxx *) - (* xxxxsxxxxxxxxxxxxxxxx *) - ( "\xf0\x90\x80\x80" (* 0b11110000 0b10010000 0b10000000 0b10000000 *) - , (0b000010000000000000000, 4) - ) - ; ( "\xf7\xbf\xbf\xbf" (* 0b11110111 0b10111111 0b10111111 0b10111111 *) - , (0b111111111111111111111, 4) - ) - ] - - let uchar = Alcotest.int - - let test_decode_character_when_valid () = - List.iter - (fun (string, (value, width)) -> - Alcotest.(check (pair uchar int)) - "same pair" - (Lenient_UTF8_codec.decode_character string 0) - (value, width) - ) - valid_character_decodings - - (** A list of strings containing overlong character encodings. - For each byte length b in [2...4], this list contains the - overlong encoding e (v), where v is the UCS value one less - than the smallest UCS value validly-encodable in b bytes. *) - let overlong_character_encodings = - [ - "\xc1\xbf" (* 0b11000001 0b10111111 *) - ; "\xe0\x9f\xbf" (* 0b11100000 0b10011111 0b10111111 *) - ; "\xf0\x8f\xbf\xbf" (* 0b11110000 0b10001111 0b10111111 0b10111111 *) - ] - - let test_decode_character_when_overlong () = - List.iter - (fun string -> - Alcotest.check_raises "should fail" E.UTF8_encoding_not_canonical - (fun () -> Lenient_UTF8_codec.decode_character string 0 |> ignore - ) - ) - overlong_character_encodings - - let tests = - [ - ( "test_width_required_for_ucs_value" - , `Quick - , test_width_required_for_ucs_value - ) - ; ( "test_decode_character_when_valid" - , `Quick - , test_decode_character_when_valid - ) - ; ( "test_decode_character_when_overlong" - , `Quick - , test_decode_character_when_overlong - ) - ] -end - -let () = - Alcotest.run "Encodings" - [ - ("UCS", UCS.tests) - ; ("XML", XML.tests) - ; ("String_validator", String_validator.tests) - ; ("XML_UTF8_UCS_validator", XML_UTF8_UCS_validator.tests) - ; ("UTF8_codec", UTF8_codec.tests) - ] diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/utf8.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/utf8.ml new file mode 100644 index 00000000000..d17d85b3b37 --- /dev/null +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/utf8.ml @@ -0,0 +1,74 @@ +(* + * Copyright (c) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +let is_valid = String.is_valid_utf_8 + +(* deprecated - reject invalid UTF-8 *) +let longest_valid_prefix str = + let len = String.length str in + let rec loop = function + | i when i < len -> + let dec = String.get_utf_8_uchar str i in + if Uchar.utf_decode_is_valid dec then + loop (i + Uchar.utf_decode_length dec) + else + String.sub str 0 i + | i when i = len -> + str + | i -> + String.sub str 0 i (* never reached *) + in + loop 0 + +module XML = struct + (** some UTF-8 characters are not legal in XML. Assuming uchar is + legal UTF-8, further check that it is legal in XML *) + let is_legal uchar = + let uchar = Uchar.to_int uchar in + uchar >= 0x20 || uchar = 0x09 || uchar = 0x0a || uchar = 0x0d + [@@inline] + + let is_valid str = + let len = String.length str in + let rec loop = function + | i when i < len -> + let dec = String.get_utf_8_uchar str i in + Uchar.utf_decode_is_valid dec + && is_legal (Uchar.utf_decode_uchar dec) + && loop (i + Uchar.utf_decode_length dec) + | _ -> + true + in + loop 0 + + (* deprecated - reject invalid UTF-8 *) + let longest_valid_prefix str = + let len = String.length str in + let rec loop = function + | i when i < len -> + let dec = String.get_utf_8_uchar str i in + if + Uchar.utf_decode_is_valid dec + && is_legal (Uchar.utf_decode_uchar dec) + then + loop (i + Uchar.utf_decode_length dec) + else + String.sub str 0 i + | i when i = len -> + str (* avoid copy *) + | i -> + String.sub str 0 i (* never reached *) + in + loop 0 +end diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/utf8.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/utf8.mli new file mode 100644 index 00000000000..6d8949e2f8f --- /dev/null +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-encodings/utf8.mli @@ -0,0 +1,31 @@ +(* + * Copyright (c) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val is_valid : string -> bool +(** true, if a string is a proper UTF-8 string *) + +val longest_valid_prefix : string -> string +(** Deprecated. Longest prefix of a string that is proper UTF-8 *) + +(* strings in XML are more restricted than UTF-8 in general. The must be + valid UTF-8 and must not contain certain characters *) + +module XML : sig + val is_valid : string -> bool + (** true, if a string is a proper UTF-8 string in XML *) + + val longest_valid_prefix : string -> string + (** Deprecated. longest prefix of a string that is proper UTF-8. + Better reject invalid UTF-8. *) +end diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune index 5d61f52cfc4..0dc52b78cd8 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/dune @@ -1,29 +1,47 @@ (library (public_name xapi-stdext-threads) - (name xapi_stdext_threads) + (name xapi_stdext_threads) (modules :standard \ ipq scheduler threadext_test ipq_test scheduler_test) (libraries + ambient-context.thread_local mtime mtime.clock.os threads.posix unix + tgroup xapi-stdext-unix xapi-stdext-pervasives) (foreign_stubs (language c) - (names delay_stubs)) + (names delay_stubs) + ) ) (library (public_name xapi-stdext-threads.scheduler) (name xapi_stdext_threads_scheduler) (modules ipq scheduler) - (libraries mtime mtime.clock.os threads.posix unix xapi-log xapi-stdext-threads clock) -) + (libraries + mtime + mtime.clock.os + threads.posix + unix + xapi-log + xapi-stdext-threads + clock) + ) (tests (names threadext_test ipq_test scheduler_test) (package xapi-stdext-threads) (modules threadext_test ipq_test scheduler_test) - (libraries xapi_stdext_threads alcotest mtime.clock.os mtime fmt threads.posix xapi_stdext_threads_scheduler) + (libraries + xapi_stdext_threads + alcotest + mtime.clock.os + mtime + fmt + tgroup + threads.posix + xapi_stdext_threads_scheduler) ) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml index 7293ae625e1..45bbd93622c 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.ml @@ -19,6 +19,8 @@ type 'a t = {default: 'a event; mutable size: int; mutable data: 'a event array} exception EmptyHeap +exception OutOfBounds of int + let create n default = if n <= 0 then invalid_arg "create" @@ -61,7 +63,7 @@ let maximum h = let remove h s = if h.size <= 0 then raise EmptyHeap ; if s < 0 || s >= h.size then - invalid_arg (Printf.sprintf "%s: index %d out of bounds" __FUNCTION__ s) ; + raise (OutOfBounds s) ; let n = h.size - 1 in let d = h.data in let x = d.(n) in diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli index 19f8bf1e33f..b542ef9d65d 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq.mli @@ -18,6 +18,8 @@ type 'a t exception EmptyHeap +exception OutOfBounds of int + val create : int -> 'a -> 'a t (** [create n default] creates an empty Imperative priority queue. The queue initially is initialized to store [n] elements. diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml index a9cc2611da8..aab499da74a 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/ipq_test.ml @@ -18,14 +18,9 @@ module Ipq = Xapi_stdext_threads_scheduler.Ipq let test_out_of_index () = let q = Ipq.create 10 0 in Ipq.add q {Ipq.ev= 123; Ipq.time= Mtime_clock.elapsed ()} ; - let is_oob = function - | Invalid_argument s when String.ends_with ~suffix:" out of bounds" s -> - true - | _ -> - false - in let oob_check n = - (Alcotest.match_raises "out of bound" is_oob @@ fun () -> Ipq.remove q n) ; + let oob = Ipq.OutOfBounds n in + (Alcotest.check_raises "out of bound" oob @@ fun () -> Ipq.remove q n) ; Alcotest.(check bool) "same value" false (Ipq.is_empty q) in oob_check 10 ; diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml index 0a4a847403f..259a24ee260 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler_test.ml @@ -14,90 +14,106 @@ module Scheduler = Xapi_stdext_threads_scheduler.Scheduler +let calibrated_ratio () = + let expected = Mtime.Span.(100 * ms |> to_float_ns) in + let elapsed = Mtime_clock.counter () in + (* Add a 10% leeway to the ratio calculated *) + Thread.delay 0.11 ; + let actual = Mtime_clock.count elapsed |> Mtime.Span.to_float_ns in + let ratio = actual /. expected in + Alcotest.(check bool) (Printf.sprintf "ratio is %f" ratio) true true ; + ratio + let started = Atomic.make false let start_schedule () = if not (Atomic.exchange started true) then Thread.create Scheduler.loop () |> ignore -let send event data = Event.(send event data |> sync) +let send event data () = Event.(send event data |> sync) let receive event = Event.(receive event |> sync) -let elapsed_ms cnt = - let elapsed_ns = Mtime_clock.count cnt |> Mtime.Span.to_uint64_ns in - Int64.(div elapsed_ns 1000000L |> to_int) +let is_less ratio a b = + let a = + Mtime.Span.to_float_ns a + |> Float.mul ratio + |> Int64.of_float + |> Mtime.Span.of_uint64_ns + in + Mtime.Span.is_shorter ~than:a b -let is_less = Alcotest.(testable (pp int)) Stdlib.( > ) +let mtime_span () = + let cmp = is_less (calibrated_ratio ()) in + Alcotest.(testable Mtime.Span.pp) cmp let test_single () = let finished = Event.new_channel () in - Scheduler.add_to_queue "one" Scheduler.OneShot 0.001 (fun () -> - send finished true - ) ; + Scheduler.add_to_queue "one" Scheduler.OneShot 0.001 (send finished true) ; start_schedule () ; Alcotest.(check bool) "result" true (receive finished) -let test_remove_self () = +let test_remove_self mtime_span () = let which = Event.new_channel () in Scheduler.add_to_queue "self" (Scheduler.Periodic 0.001) 0.001 (fun () -> (* this should remove the periodic scheduling *) Scheduler.remove_from_queue "self" ; (* add an operation to stop the test *) - Scheduler.add_to_queue "stop" Scheduler.OneShot 0.1 (fun () -> - send which "stop" - ) ; - send which "self" + Scheduler.add_to_queue "stop" Scheduler.OneShot 0.1 (send which "stop") ; + send which "self" () ) ; start_schedule () ; - let cnt = Mtime_clock.counter () in + + let from_wait_to_receive = Mtime_clock.counter () in Alcotest.(check string) "same event name" "self" (receive which) ; Alcotest.(check string) "same event name" "stop" (receive which) ; - let elapsed_ms = elapsed_ms cnt in - Alcotest.check is_less "small time" 300 elapsed_ms -let test_empty () = + let elapsed = Mtime_clock.count from_wait_to_receive in + let expected = Mtime.Span.(300 * ms) in + Alcotest.check mtime_span "small time" expected elapsed + +let test_empty mtime_span () = let finished = Event.new_channel () in - Scheduler.add_to_queue "one" Scheduler.OneShot 0.001 (fun () -> - send finished true - ) ; + Scheduler.add_to_queue "one" Scheduler.OneShot 0.001 (send finished true) ; start_schedule () ; Alcotest.(check bool) "finished" true (receive finished) ; (* wait loop to go to wait with no work to do *) Thread.delay 0.1 ; - Scheduler.add_to_queue "two" Scheduler.OneShot 0.001 (fun () -> - send finished true - ) ; - let cnt = Mtime_clock.counter () in + Scheduler.add_to_queue "two" Scheduler.OneShot 0.001 (send finished true) ; + + let from_wait_to_receive = Mtime_clock.counter () in Alcotest.(check bool) "finished" true (receive finished) ; - let elapsed_ms = elapsed_ms cnt in - Alcotest.check is_less "small time" 100 elapsed_ms -let test_wakeup () = + let elapsed = Mtime_clock.count from_wait_to_receive in + let expected = Mtime.Span.(100 * ms) in + Alcotest.check mtime_span "small time" expected elapsed + +let test_wakeup mtime_span () = let which = Event.new_channel () in (* schedule a long event *) - Scheduler.add_to_queue "long" Scheduler.OneShot 2.0 (fun () -> - send which "long" - ) ; + Scheduler.add_to_queue "long" Scheduler.OneShot 2.0 (send which "long") ; start_schedule () ; (* wait loop to go to wait with no work to do *) Thread.delay 0.1 ; - let cnt = Mtime_clock.counter () in + (* schedule a quick event, should wake up the loop *) - Scheduler.add_to_queue "quick" Scheduler.OneShot 0.1 (fun () -> - send which "quick" - ) ; + Scheduler.add_to_queue "quick" Scheduler.OneShot 0.1 (send which "quick") ; + + let from_wait_to_receive_quick = Mtime_clock.counter () in Alcotest.(check string) "same event name" "quick" (receive which) ; + Scheduler.remove_from_queue "long" ; - let elapsed_ms = elapsed_ms cnt in - Alcotest.check is_less "small time" 150 elapsed_ms + let elapsed = Mtime_clock.count from_wait_to_receive_quick in + let expected = Mtime.Span.(100 * ms) in + Alcotest.check mtime_span "small time" expected elapsed let tests = + let mtime_span = mtime_span () in [ ("test_single", `Quick, test_single) - ; ("test_remove_self", `Quick, test_remove_self) - ; ("test_empty", `Quick, test_empty) - ; ("test_wakeup", `Quick, test_wakeup) + ; ("test_remove_self", `Quick, test_remove_self mtime_span) + ; ("test_empty", `Quick, test_empty mtime_span) + ; ("test_wakeup", `Quick, test_wakeup mtime_span) ] let () = Alcotest.run "Scheduler" [("generic", tests)] diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml index b954a159ddb..251b35473a8 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.ml @@ -19,8 +19,8 @@ let finally = Xapi_stdext_pervasives.Pervasiveext.finally module Mutex = struct (** execute the function f with the mutex hold *) let execute lock f = - Mutex.lock lock ; - finally f (fun () -> Mutex.unlock lock) + let finally () = Mutex.unlock lock in + Mutex.lock lock ; Fun.protect ~finally f end module Semaphore = struct @@ -97,3 +97,41 @@ let wait_timed_write fd timeout = true | _ -> assert false + +module ThreadRuntimeContext = struct + type t = { + ocaml_tid: int + ; thread_name: string + ; mutable time_running: int + ; mutable tepoch: int + ; tgroup: Tgroup.Group.t + } + + (*The documentation for Ambient_context_thread_local isn't really clear is + this context. thread_local_storage is a global variable shared by all + threads. It is a map with keys, the thread IDs and values the above + defined data structure.*) + let thread_local_storage = Ambient_context_thread_local.Thread_local.create () + + let create ?(thread_name = "") () = + let ocaml_tid = Thread.self () |> Thread.id in + let time_running = 0 in + let tepoch = 0 in + let tgroup = Tgroup.Group.authenticated_root in + let tls = {thread_name; tgroup; ocaml_tid; time_running; tepoch} in + let () = + Ambient_context_thread_local.Thread_local.set thread_local_storage tls + in + tls + + let get () = + Ambient_context_thread_local.Thread_local.get_or_create ~create + thread_local_storage + + let update f context = + f context + |> Ambient_context_thread_local.Thread_local.set thread_local_storage + + let remove () = + Ambient_context_thread_local.Thread_local.remove thread_local_storage +end diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli index a1af35ccbeb..7967e3fa573 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext.mli @@ -43,3 +43,27 @@ end val wait_timed_read : Unix.file_descr -> float -> bool val wait_timed_write : Unix.file_descr -> float -> bool + +module ThreadRuntimeContext : sig + type t = { + ocaml_tid: int + ; thread_name: string + ; mutable time_running: int + ; mutable tepoch: int + ; tgroup: Tgroup.Group.t + } + + val create : ?thread_name:string -> unit -> t + (** [create ()] creates and returns an initial thread local strorage for the + current thread. *) + + val get : unit -> t + (** [get ()] returns the current thread local storage. *) + + val update : (t -> t) -> t -> unit + (** [update fn thread_ctx] updates the thread local storage based on + the supplied arguments. *) + + val remove : unit -> unit + (** [remove ()] removes the thread local storage of the current thread. *) +end diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml index b93df9f47a8..2182430b182 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/threadext_test.ml @@ -74,4 +74,97 @@ let tests = ; ("other_thread", `Quick, other_thread) ] -let () = Alcotest.run "Threadext" [("Delay", tests)] +let test_create_ambient_storage () = + let open Xapi_stdext_threads.Threadext in + let _ : Thread.t = + Thread.create + (fun () -> + let storage = ThreadRuntimeContext.create () in + let storage_tid = storage.ocaml_tid in + let ocaml_tid = Thread.self () |> Thread.id in + Alcotest.(check int) + "Ocaml thread id matches the thread id stored" ocaml_tid storage_tid + ) + () + in + () + +let test_thread_storage_update_and_get () = + let open Xapi_stdext_threads.Threadext in + let _ : Thread.t = + Thread.create + (fun () -> + let context : ThreadRuntimeContext.t = ThreadRuntimeContext.create () in + + let expected_name = "thread_1" in + ThreadRuntimeContext.update + (fun t -> {t with thread_name= expected_name}) + context ; + let storage = ThreadRuntimeContext.get () in + Alcotest.(check string) + "Check if correct value is set in storage" expected_name + storage.thread_name + ) + () + in + () + +let test_storage_locality () = + let open Xapi_stdext_threads.Threadext in + let r1 = ref None in + let r2 = ref None in + + let thread1_expected_name = "thread_1" in + let thread2_expected_name = "thread_2" in + + let thread1 = + Thread.create + (fun () -> + let context = ThreadRuntimeContext.create () in + ThreadRuntimeContext.update + (fun t -> {t with thread_name= thread1_expected_name}) + context ; + Thread.delay 1. ; + r1 := Some (ThreadRuntimeContext.get ()) + ) + () + in + let thread2 = + Thread.create + (fun () -> + let context = ThreadRuntimeContext.create () in + ThreadRuntimeContext.update + (fun t -> {t with thread_name= thread2_expected_name}) + context ; + + r2 := Some (ThreadRuntimeContext.get ()) + ) + () + in + Thread.join thread1 ; + Thread.join thread2 ; + Alcotest.(check bool) + "Check thread local storage is set for thread1" true (Option.is_some !r1) ; + Alcotest.(check bool) + "Check thread local storage is set for thread2" true (Option.is_some !r2) ; + let thread1_name = + let r1 = Option.get !r1 in + r1.thread_name + in + let thread2_name = + let r2 = Option.get !r2 in + r2.thread_name + in + Alcotest.(check string) "Thread1 name" thread1_expected_name thread1_name ; + Alcotest.(check string) "Thread2 name" thread2_expected_name thread2_name + +let tls_tests = + [ + ("create storage", `Quick, test_create_ambient_storage) + ; ("storage update and get", `Quick, test_thread_storage_update_and_get) + ; ("thread local storage", `Quick, test_storage_locality) + ] + +let () = + Alcotest.run "Threadext" + [("Delay", tests); ("ThreadRuntimeContext", tls_tests)] diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.ml index 32a9f5119ab..893a7e4d9bc 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.ml @@ -17,6 +17,11 @@ exception Unix_error of int let _exit = Unix._exit +let raise_with_preserved_backtrace exn f = + let bt = Printexc.get_raw_backtrace () in + f () ; + Printexc.raise_with_backtrace exn bt + (** remove a file, but doesn't raise an exception if the file is already removed *) let unlink_safe file = try Unix.unlink file with (* Unix.Unix_error (Unix.ENOENT, _ , _)*) _ -> () diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.mli b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.mli index 047935b475c..3db652bd2a3 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.mli +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext.mli @@ -15,6 +15,10 @@ val _exit : int -> unit +val raise_with_preserved_backtrace : exn -> (unit -> unit) -> 'b +(** A wrapper that preserves the backtrace (otherwise erased by calling + formatting functions, for example) *) + val unlink_safe : string -> unit val mkdir_safe : string -> Unix.file_perm -> unit diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext_stubs.c b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext_stubs.c index 28fd7f9af89..27b2f632d08 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext_stubs.c +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-unix/unixext_stubs.c @@ -39,11 +39,11 @@ #include "blkgetsize.h" /* Set the TCP_NODELAY flag on a Unix.file_descr */ -CAMLprim value stub_unixext_set_tcp_nodelay (value fd, value bool) +CAMLprim value stub_unixext_set_tcp_nodelay (value fd, value nodelay) { - CAMLparam2 (fd, bool); + CAMLparam2 (fd, nodelay); int c_fd = Int_val(fd); - int opt = (Bool_val(bool)) ? 1 : 0; + int opt = (Bool_val(nodelay)) ? 1 : 0; if (setsockopt(c_fd, IPPROTO_TCP, TCP_NODELAY, (void *)&opt, sizeof(opt)) != 0){ uerror("setsockopt", Nothing); } diff --git a/ocaml/message-switch/core/dune b/ocaml/message-switch/core/dune index d61746efe44..c9b5b3e2cff 100644 --- a/ocaml/message-switch/core/dune +++ b/ocaml/message-switch/core/dune @@ -9,6 +9,7 @@ sexplib sexplib0 threads.posix + tracing uri xapi-log xapi-stdext-threads diff --git a/ocaml/message-switch/core/make.ml b/ocaml/message-switch/core/make.ml index 43b7e301a9b..df1d003f5f5 100644 --- a/ocaml/message-switch/core/make.ml +++ b/ocaml/message-switch/core/make.ml @@ -229,7 +229,7 @@ functor in return (Ok t) - let rpc ~t ~queue ?timeout ~body:x () = + let rpc ?_span_parent ~t ~queue ?timeout ~body:x () = let ivar = M.Ivar.create () in let timer = Option.map diff --git a/ocaml/message-switch/core/s.ml b/ocaml/message-switch/core/s.ml index 423304d1b24..fefe4d7a1f6 100644 --- a/ocaml/message-switch/core/s.ml +++ b/ocaml/message-switch/core/s.ml @@ -144,7 +144,8 @@ module type CLIENT = sig (** [disconnect] closes the connection *) val rpc : - t:t + ?_span_parent:Tracing.Span.t + -> t:t -> queue:string -> ?timeout:int -> body:string diff --git a/ocaml/message-switch/unix/dune b/ocaml/message-switch/unix/dune index 92bddfd66fb..1858aa271b3 100644 --- a/ocaml/message-switch/unix/dune +++ b/ocaml/message-switch/unix/dune @@ -11,6 +11,7 @@ rpclib.core rpclib.json threads.posix + tracing xapi-stdext-threads xapi-stdext-unix ) diff --git a/ocaml/message-switch/unix/protocol_unix.ml b/ocaml/message-switch/unix/protocol_unix.ml index f7aa0802c0f..29b95f7ef12 100644 --- a/ocaml/message-switch/unix/protocol_unix.ml +++ b/ocaml/message-switch/unix/protocol_unix.ml @@ -347,7 +347,7 @@ module Client = struct Ok c' ) - let rpc ~t:c ~queue:dest_queue_name ?timeout ~body:x () = + let rpc ?_span_parent ~t:c ~queue:dest_queue_name ?timeout ~body:x () = let t = Ivar.create () in let timer = Option.map @@ -364,9 +364,23 @@ module Client = struct do_rpc c.requests_conn (In.CreatePersistent dest_queue_name) >>|= fun (_ : string) -> let msg = - In.Send - ( dest_queue_name - , {Message.payload= x; kind= Message.Request c.reply_queue_name} + Tracing.with_tracing + ~attributes: + [ + ("messaging.operation.name", "send") + ; ("messaging.system", "message-switch") + ; ("messaging.destination.name", dest_queue_name) + ] + ~span_kind:Producer ~parent:_span_parent + ~name:("send" ^ " " ^ dest_queue_name) + (fun _ -> + In.Send + ( dest_queue_name + , { + Message.payload= x + ; kind= Message.Request c.reply_queue_name + } + ) ) in do_rpc c.requests_conn msg >>|= fun (id : string) -> diff --git a/ocaml/networkd/bin/network_monitor_thread.ml b/ocaml/networkd/bin/network_monitor_thread.ml index 1b15dbe2a42..540468dfdcb 100644 --- a/ocaml/networkd/bin/network_monitor_thread.ml +++ b/ocaml/networkd/bin/network_monitor_thread.ml @@ -125,7 +125,7 @@ let get_link_stats () = in List.map (fun link -> standardise_name (Link.get_name link)) links |> (* Only keep interfaces with prefixes on the whitelist, and exclude VLAN - devices (ethx.y). *) + devices (ethx.y). *) List.filter (fun name -> is_whitelisted name && not (is_vlan name)) in Cache.free cache ; Socket.close s ; Socket.free s ; links diff --git a/ocaml/networkd/bin/network_server.ml b/ocaml/networkd/bin/network_server.ml index 8cc5e9ea908..dee5edb0985 100644 --- a/ocaml/networkd/bin/network_server.ml +++ b/ocaml/networkd/bin/network_server.ml @@ -412,12 +412,23 @@ module Interface = struct Ip.set_ipv6_link_local_addr name ) | DHCP6 -> + let gateway = + Option.fold ~none:[] + ~some:(fun n -> [`gateway n]) + !config.gateway_interface + in + let dns = + Option.fold ~none:[] + ~some:(fun n -> [`dns n]) + !config.dns_interface + in if Dhclient.is_running ~ipv6:true name then ignore (Dhclient.stop ~ipv6:true name) ; Sysctl.set_ipv6_autoconf name false ; Ip.flush_ip_addr ~ipv6:true name ; Ip.set_ipv6_link_local_addr name ; - ignore (Dhclient.ensure_running ~ipv6:true name []) + let options = gateway @ dns in + ignore (Dhclient.ensure_running ~ipv6:true name options) | Autoconf6 -> if Dhclient.is_running ~ipv6:true name then ignore (Dhclient.stop ~ipv6:true name) ; @@ -543,7 +554,8 @@ module Interface = struct let set_dns _ dbg ~name ~nameservers ~domains = Debug.with_thread_associated dbg (fun () -> - update_config name {(get_config name) with dns= (nameservers, domains)} ; + update_config name + {(get_config name) with dns= Some (nameservers, domains)} ; debug "Configuring DNS for %s: nameservers: [%s]; domains: [%s]" name (String.concat ", " (List.map Unix.string_of_inet_addr nameservers)) (String.concat ", " domains) ; @@ -716,7 +728,7 @@ module Interface = struct ; ipv6_conf ; ipv6_gateway ; ipv4_routes - ; dns= nameservers, domains + ; dns ; mtu ; ethtool_settings ; ethtool_offload @@ -725,16 +737,13 @@ module Interface = struct ) ) -> update_config name c ; exec (fun () -> - (* We only apply the DNS settings when not in a DHCP mode - to avoid conflicts. The `dns` field - should really be an option type so that we don't have to - derive the intention of the caller by looking at other - fields. *) - match (ipv4_conf, ipv6_conf) with - | Static4 _, _ | _, Static6 _ | _, Autoconf6 -> - set_dns () dbg ~name ~nameservers ~domains - | _ -> + (* Old configs used empty dns lists to mean none, keep that + behaviour instead of writing an empty resolv.conf *) + match dns with + | None | Some ([], []) -> () + | Some (nameservers, domains) -> + set_dns () dbg ~name ~nameservers ~domains ) ; exec (fun () -> set_ipv4_conf dbg name ipv4_conf) ; exec (fun () -> @@ -924,12 +933,6 @@ module Bridge = struct "standalone" ) in - let vlan_bug_workaround = - if List.mem_assoc "vlan-bug-workaround" other_config then - Some (List.assoc "vlan-bug-workaround" other_config = "true") - else - None - in let external_id = if List.mem_assoc "network-uuids" other_config then Some @@ -957,7 +960,7 @@ module Bridge = struct Option.iter (destroy_existing_vlan_ovs_bridge dbg name) vlan ; ignore (Ovs.create_bridge ?mac ~fail_mode ?external_id ?disable_in_band - ?igmp_snooping vlan vlan_bug_workaround name + ?igmp_snooping vlan name ) ; if igmp_snooping = Some true && not old_igmp_snooping then Ovs.inject_igmp_query ~name diff --git a/ocaml/networkd/bin_db/networkd_db.ml b/ocaml/networkd/bin_db/networkd_db.ml index f62021828fa..e0fea0cca11 100644 --- a/ocaml/networkd/bin_db/networkd_db.ml +++ b/ocaml/networkd/bin_db/networkd_db.ml @@ -38,6 +38,8 @@ let _ = List.concat_map (fun (_, port) -> port.interfaces) bridge_config.ports in Printf.printf "interfaces=%s\n" (String.concat "," ifaces) ; + Printf.printf "hwaddrs=%s\n" + (Option.value ~default:"" bridge_config.bridge_mac) ; match bridge_config.vlan with | None -> () @@ -74,20 +76,25 @@ let _ = [("gateway", Unix.string_of_inet_addr addr)] in let dns = - let dns' = - List.map Unix.string_of_inet_addr (fst interface_config.dns) - in - if dns' = [] then - [] - else - [("dns", String.concat "," dns')] + interface_config.dns + |> Option.map fst + |> Option.map (List.map Unix.string_of_inet_addr) + |> Option.fold ~none:[] ~some:(function + | [] -> + [] + | dns' -> + [("dns", String.concat "," dns')] + ) in let domains = - let domains' = snd interface_config.dns in - if domains' = [] then - [] - else - [("domain", String.concat "," domains')] + interface_config.dns + |> Option.map snd + |> Option.fold ~none:[] ~some:(function + | [] -> + [] + | domains' -> + [("domain", String.concat "," domains')] + ) in mode @ addrs @ gateway @ dns @ domains | None4 -> diff --git a/ocaml/networkd/lib/network_config.ml b/ocaml/networkd/lib/network_config.ml index b306b580b32..3d034f05284 100644 --- a/ocaml/networkd/lib/network_config.ml +++ b/ocaml/networkd/lib/network_config.ml @@ -34,6 +34,52 @@ let bridge_naming_convention (device : string) = else "br" ^ device +let get_list_from ~sep ~key args = + List.assoc_opt key args + |> Option.map (fun v -> Astring.String.cuts ~empty:false ~sep v) + +let parse_ipv4_config args = function + | Some "static" -> + let ip = List.assoc "IP" args |> Unix.inet_addr_of_string in + let prefixlen = List.assoc "NETMASK" args |> netmask_to_prefixlen in + let gateway = + Option.map Unix.inet_addr_of_string (List.assoc_opt "GATEWAY" args) + in + (Static4 [(ip, prefixlen)], gateway) + | Some "dhcp" -> + (DHCP4, None) + | _ -> + (None4, None) + +let parse_ipv6_config args = function + | Some "static" -> + let ipv6_arg = List.assoc "IPv6" args in + let ip, prefixlen = + Scanf.sscanf ipv6_arg "%s@/%d" (fun ip prefixlen -> + let ip = ip |> Unix.inet_addr_of_string in + (ip, prefixlen) + ) + in + let gateway = + Option.map Unix.inet_addr_of_string (List.assoc_opt "IPv6_GATEWAY" args) + in + (Static6 [(ip, prefixlen)], gateway) + | Some "dhcp" -> + (DHCP6, None) + | Some "autoconf" -> + (Autoconf6, None) + | _ -> + (None6, None) + +let parse_dns_config args = + let ( let* ) = Option.bind in + let* nameservers = + get_list_from ~sep:"," ~key:"DNS" args + |> Option.map (List.map Unix.inet_addr_of_string) + in + let* domains = get_list_from ~sep:" " ~key:"DOMAIN" args in + Some (nameservers, domains) + let read_management_conf () = try let management_conf = @@ -42,35 +88,23 @@ let read_management_conf () = in let args = Astring.String.cuts ~empty:false ~sep:"\n" (String.trim management_conf) - in - let args = - List.map - (fun s -> - match Astring.String.cuts ~sep:"=" s with - | [k; v] -> - (k, Astring.String.trim ~drop:(( = ) '\'') v) - | _ -> - ("", "") - ) - args + |> List.filter_map (fun s -> + match Astring.String.cut ~sep:"=" s with + | Some (_, "") | None -> + None + | Some (k, v) -> + Some (k, Astring.String.trim ~drop:(( = ) '\'') v) + ) in debug "Firstboot file management.conf has: %s" (String.concat "; " (List.map (fun (k, v) -> k ^ "=" ^ v) args)) ; let vlan = List.assoc_opt "VLAN" args in - let bond_mode = - Option.value ~default:"" (List.assoc_opt "BOND_MODE" args) - in - let bond_members = - match List.assoc_opt "BOND_MEMBERS" args with - | None -> - [] - | Some x -> - String.split_on_char ',' x - in + let bond_mode = List.assoc_opt "BOND_MODE" args in + let bond_members = get_list_from ~sep:"," ~key:"BOND_MEMBERS" args in let device = (* Take 1st member of bond *) match (bond_mode, bond_members) with - | "", _ | _, [] -> ( + | None, _ | _, (None | Some []) -> ( match List.assoc_opt "LABEL" args with | Some x -> x @@ -78,7 +112,7 @@ let read_management_conf () = error "%s: missing LABEL in %s" __FUNCTION__ management_conf ; raise Read_error ) - | _, hd :: _ -> + | _, Some (hd :: _) -> hd in Inventory.reread_inventory () ; @@ -105,45 +139,28 @@ let read_management_conf () = bridge in let mac = Network_utils.Ip.get_mac device in - let ipv4_conf, ipv4_gateway, dns = - match List.assoc "MODE" args with - | "static" -> - let ip = List.assoc "IP" args |> Unix.inet_addr_of_string in - let prefixlen = List.assoc "NETMASK" args |> netmask_to_prefixlen in - let gateway = - if List.mem_assoc "GATEWAY" args then - Some (List.assoc "GATEWAY" args |> Unix.inet_addr_of_string) - else - None - in - let nameservers = - if List.mem_assoc "DNS" args && List.assoc "DNS" args <> "" then - List.map Unix.inet_addr_of_string - (Astring.String.cuts ~empty:false ~sep:"," - (List.assoc "DNS" args) - ) - else - [] - in - let domains = - if List.mem_assoc "DOMAIN" args && List.assoc "DOMAIN" args <> "" - then - Astring.String.cuts ~empty:false ~sep:" " - (List.assoc "DOMAIN" args) - else - [] - in - let dns = (nameservers, domains) in - (Static4 [(ip, prefixlen)], gateway, dns) - | "dhcp" -> - (DHCP4, None, ([], [])) - | _ -> - (None4, None, ([], [])) + let dns = parse_dns_config args in + let (ipv4_conf, ipv4_gateway), (ipv6_conf, ipv6_gateway) = + match (List.assoc_opt "MODE" args, List.assoc_opt "MODEV6" args) with + | None, None -> + error "%s: at least one of 'MODE', 'MODEV6' needs to be specified" + __FUNCTION__ ; + raise Read_error + | v4, v6 -> + (parse_ipv4_config args v4, parse_ipv6_config args v6) in let phy_interface = {default_interface with persistent_i= true} in let bridge_interface = - {default_interface with ipv4_conf; ipv4_gateway; persistent_i= true; dns} + { + default_interface with + ipv4_conf + ; ipv4_gateway + ; ipv6_conf + ; ipv6_gateway + ; persistent_i= true + ; dns + } in let interface_config, bridge_config = let primary_bridge_conf = diff --git a/ocaml/networkd/lib/network_utils.ml b/ocaml/networkd/lib/network_utils.ml index 1c8c8cd1a27..ca6153dae5a 100644 --- a/ocaml/networkd/lib/network_utils.ml +++ b/ocaml/networkd/lib/network_utils.ml @@ -51,8 +51,6 @@ let ovs_ofctl = "/usr/bin/ovs-ofctl" let ovs_appctl = "/usr/bin/ovs-appctl" -let ovs_vlan_bug_workaround = "/usr/sbin/ovs-vlan-bug-workaround" - let brctl = ref "/sbin/brctl" let modprobe = "/sbin/modprobe" @@ -162,7 +160,8 @@ module Sysfs = struct with | End_of_file -> "" - | Unix.Unix_error (Unix.EINVAL, _, _) -> + | Unix.Unix_error (Unix.EINVAL, _, _) | Unix.Unix_error (Unix.ENOENT, _, _) + -> (* The device is not yet up *) raise (Network_error (Read_error file)) | exn -> @@ -180,18 +179,29 @@ module Sysfs = struct close_out outchan ; raise (Network_error (Write_error file)) - let is_physical name = + exception Unable_to_read_driver_link + + let is_vif name = + let devpath = getpath name "device" in try - let devpath = getpath name "device" in let driver_link = Unix.readlink (devpath ^ "/driver") in (* filter out symlinks under device/driver which look like /../../../devices/xen-backend/vif- *) - not - (List.mem "xen-backend" - (Astring.String.cuts ~empty:false ~sep:"/" driver_link) - ) + List.mem "xen-backend" + (Astring.String.cuts ~empty:false ~sep:"/" driver_link) + with _ -> raise Unable_to_read_driver_link + + let is_vf name = + let devpath = getpath name "device" in + try + ignore @@ Unix.readlink (devpath ^ "/physfn") ; + true with _ -> false + let is_physical name = + try not (is_vif name || is_vf name) + with Unable_to_read_driver_link -> false + (* device types are defined in linux/if_arp.h *) let is_ether_device name = match int_of_string (read_one_line (getpath name "type")) with @@ -262,25 +272,6 @@ module Sysfs = struct Result.Error (Fail_to_get_driver_name, "Failed to get driver name for: " ^ dev) - (** Returns the features bitmap for the driver for [dev]. The features bitmap - is a set of NETIF_F_ flags supported by its driver. *) - let get_features dev = - try Some (int_of_string (read_one_line (getpath dev "features"))) - with _ -> None - - (** Returns [true] if [dev] supports VLAN acceleration, [false] otherwise. *) - let has_vlan_accel dev = - let flag_NETIF_F_HW_VLAN_TX = 128 in - let flag_NETIF_F_HW_VLAN_RX = 256 in - let flag_NETIF_F_VLAN = - flag_NETIF_F_HW_VLAN_TX lor flag_NETIF_F_HW_VLAN_RX - in - match get_features dev with - | None -> - false - | Some features -> - features land flag_NETIF_F_VLAN <> 0 - let set_multicast_snooping bridge value = try let path = getpath bridge "bridge/multicast_snooping" in @@ -1340,44 +1331,6 @@ module Ovs = struct ) with _ -> warn "Failed to set max-idle=%d on OVS" t - let handle_vlan_bug_workaround override bridge = - (* This is a list of drivers that do support VLAN tx or rx acceleration, - but to which the VLAN bug workaround should not be applied. This could - be because these are known-good drivers (that is, they do not have any - of the bugs that the workaround avoids) or because the VLAN bug - workaround will not work for them and may cause other problems. - - This is a very short list because few drivers have been tested. *) - let no_vlan_workaround_drivers = ["bonding"] in - let phy_interfaces = - try - let interfaces = bridge_to_interfaces bridge in - List.filter Sysfs.is_physical interfaces - with _ -> [] - in - List.iter - (fun interface -> - let do_workaround = - match override with - | Some value -> - value - | None -> ( - match Sysfs.get_driver_name interface with - | None -> - Sysfs.has_vlan_accel interface - | Some driver -> - if List.mem driver no_vlan_workaround_drivers then - false - else - Sysfs.has_vlan_accel interface - ) - in - let setting = if do_workaround then "on" else "off" in - try ignore (call_script ovs_vlan_bug_workaround [interface; setting]) - with _ -> () - ) - phy_interfaces - let get_vlans name = try let vlans_with_uuid = @@ -1474,13 +1427,12 @@ module Ovs = struct ["--"; "--may-exist"; "add-port"; bridge; name] @ type_args let create_bridge ?mac ?external_id ?disable_in_band ?igmp_snooping - ~fail_mode vlan vlan_bug_workaround name = + ~fail_mode vlan name = let vlan_arg = match vlan with | None -> [] | Some (parent, tag) -> - handle_vlan_bug_workaround vlan_bug_workaround parent ; [parent; string_of_int tag] in let mac_arg = @@ -1546,7 +1498,7 @@ module Ovs = struct let vif_arg = let existing_vifs = List.filter - (fun iface -> not (Sysfs.is_physical iface)) + (fun iface -> try Sysfs.is_vif iface with _ -> false) (bridge_to_interfaces name) in let ifaces_with_type = diff --git a/ocaml/qcow-stream-tool/dune b/ocaml/qcow-stream-tool/dune new file mode 100644 index 00000000000..2125dea0a0c --- /dev/null +++ b/ocaml/qcow-stream-tool/dune @@ -0,0 +1,10 @@ +(executable + (modes exe) + (name qcow_stream_tool) + (public_name qcow-stream-tool) + (package qcow-stream-tool) + (libraries + qcow-stream + cmdliner + ) +) diff --git a/ocaml/qcow-stream-tool/qcow_stream_tool.ml b/ocaml/qcow-stream-tool/qcow_stream_tool.ml new file mode 100644 index 00000000000..7158867c248 --- /dev/null +++ b/ocaml/qcow-stream-tool/qcow_stream_tool.ml @@ -0,0 +1,29 @@ +module Impl = struct + let stream_decode output = + Qcow_stream.stream_decode Unix.stdin output ; + `Ok () +end + +module Cli = struct + open Cmdliner + + let stream_decode_cmd = + let doc = "decode qcow2 formatted data from stdin and write a raw image" in + let man = + [ + `S "DESCRIPTION" + ; `P "Decode qcow2 formatted data from stdin and write to a raw file." + ] + in + let output default = + let doc = Printf.sprintf "Path to the output file." in + Arg.(value & pos 0 string default & info [] ~doc) + in + Cmd.v + (Cmd.info "stream_decode" ~doc ~man) + Term.(ret (const Impl.stream_decode $ output "test.raw")) + + let main () = Cmd.eval stream_decode_cmd +end + +let () = exit (Cli.main ()) diff --git a/ocaml/qcow-stream-tool/qcow_stream_tool.mli b/ocaml/qcow-stream-tool/qcow_stream_tool.mli new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/quicktest/quicktest.ml b/ocaml/quicktest/quicktest.ml index f4f8309ec34..337b1ae2b3e 100644 --- a/ocaml/quicktest/quicktest.ml +++ b/ocaml/quicktest/quicktest.ml @@ -25,6 +25,7 @@ let () = let suite = [ ("Quicktest_example", Quicktest_example.tests ()) + ; ("Quicktest_message", Quicktest_message.tests ()) ; ("xenstore", Quicktest_xenstore.tests ()) ; ("cbt", Quicktest_cbt.tests ()) ; ("event", Quicktest_event.tests ()) diff --git a/ocaml/quicktest/quicktest_message.ml b/ocaml/quicktest/quicktest_message.ml new file mode 100644 index 00000000000..55e8dc67492 --- /dev/null +++ b/ocaml/quicktest/quicktest_message.ml @@ -0,0 +1,73 @@ +module Message = Client.Client.Message + +let rpc = Quicktest_args.rpc + +module Testable = struct + let ref () = + let fmt = Fmt.of_to_string Ref.string_of in + Alcotest.testable fmt ( = ) +end + +let get_all_records_where_test rpc session_id () = + let with_api fn = fn ~rpc ~session_id in + let create_message = with_api Message.create in + + let vm = List.hd (with_api Client.Client.VM.get_all) in + let vm_uuid = with_api Client.Client.VM.get_uuid ~self:vm in + + let host = List.hd (with_api Client.Client.Host.get_all) in + let host_uuid = with_api Client.Client.Host.get_uuid ~self:host in + + (* Create several different messages *) + let messages = + [ + create_message ~name:"VM_EXPLODED" ~priority:100L ~cls:`VM + ~obj_uuid:vm_uuid ~body:"body" + ; create_message ~name:"HOST_EXPLODED" ~priority:100L ~cls:`Host + ~obj_uuid:host_uuid ~body:"body" + ; create_message ~name:"VM_DROWNED" ~priority:50L ~cls:`VM ~obj_uuid:vm_uuid + ~body:"body" + ] + in + + let get_messages_where = with_api Message.get_all_records_where in + + let queries = + [ + (* query | is message expected in the list [bool ; bool ; bool] *) + ({| field "name" = "VM_EXPLODED" |}, [true; false; false]) + ; ({| field "priority" = "100" |}, [true; true; false]) + ; ({| field "cls" = "VM" |}, [true; false; true]) + ; ({| field "cls" = "VM" and field "priority" = "50"|}, [false; false; true]) + ; ({| field "cls" = "VDI"|}, [false; false; false]) + ; ({| field "class" = "VM"|}, [false; false; false]) + ; ( {| field "cls" = "VM" or field "name" = "HOST_EXPLODED"|} + , [true; true; true] + ) + ] + in + let lists = + List.map + (fun (expr, expected) -> (expr, get_messages_where ~expr, expected)) + queries + in + + (* Check that filtering returns correct messages *) + let message_test msg list expected = + let list = List.map fst list in + List.iter2 + (fun msg_ref expected -> + Alcotest.check' Alcotest.bool ~msg ~expected + ~actual:(List.mem msg_ref list) + ) + messages expected + in + List.iter (fun (msg, list, expected) -> message_test msg list expected) lists + +let tests () = + let open Qt_filter in + [ + [("Message.get_all_records_where test", `Quick, get_all_records_where_test)] + |> conn + ] + |> List.concat diff --git a/ocaml/rrd2csv/src/rrd2csv.ml b/ocaml/rrd2csv/src/rrd2csv.ml index a6866874ee2..37e00f8148d 100644 --- a/ocaml/rrd2csv/src/rrd2csv.ml +++ b/ocaml/rrd2csv/src/rrd2csv.ml @@ -304,7 +304,7 @@ module Ds_selector = struct if fs = [] then true else - List.fold_left (fun acc f -> acc || filter11 f d) false fs + List.exists (fun f -> filter11 f d) fs (* Returns the d \in ds that passes at least one of the filters fs *) diff --git a/ocaml/sdk-gen/c/autogen/dune b/ocaml/sdk-gen/c/autogen/dune index 78b81f38e4c..ff89723f136 100644 --- a/ocaml/sdk-gen/c/autogen/dune +++ b/ocaml/sdk-gen/c/autogen/dune @@ -1,26 +1,20 @@ (rule - (targets COPYING) - (deps - ../../LICENSE - ) - (action (copy %{deps} %{targets})) -) + (targets COPYING) + (deps ../../LICENSE) + (action + (copy %{deps} %{targets}))) (rule - (targets README) - (deps - ../README.dist - ) - (action (copy %{deps} %{targets})) -) + (targets README) + (deps ../README.dist) + (action + (copy %{deps} %{targets}))) (alias - (name generate) - (deps - COPYING - README - (source_tree .) - ) -) + (name generate) + (deps + COPYING + README + (source_tree .))) (data_only_dirs src include) diff --git a/ocaml/sdk-gen/c/dune b/ocaml/sdk-gen/c/dune index adbea6905fa..ef7e42abbd5 100644 --- a/ocaml/sdk-gen/c/dune +++ b/ocaml/sdk-gen/c/dune @@ -1,34 +1,26 @@ (executable - (modes exe) - (name gen_c_binding) - (libraries - astring - CommonFunctions - - mustache - xapi-datamodel - ) -) + (modes exe) + (name gen_c_binding) + (libraries astring CommonFunctions mustache xapi-datamodel)) (rule - (alias generate) - (package xapi-sdk) - (targets (dir autogen-out)) - (deps - (:x gen_c_binding.exe) - (source_tree templates) - (source_tree autogen) - ) - (action (concurrent - (bash "cp -r autogen/ autogen-out/") - (run %{x}) - )) -) + (alias generate) + (package xapi-sdk) + (targets + (dir autogen-out)) + (deps + (:x gen_c_binding.exe) + (source_tree templates) + (source_tree autogen)) + (action + (progn + (bash "cp -r autogen/ autogen-out/") + (run %{x})))) (data_only_dirs templates) (install - (package xapi-sdk) - (section share_root) - (dirs (autogen-out as c)) -) + (package xapi-sdk) + (section share_root) + (dirs + (autogen-out as c))) diff --git a/ocaml/sdk-gen/common/dune b/ocaml/sdk-gen/common/dune index ea0011e71ce..1475ba4da8d 100644 --- a/ocaml/sdk-gen/common/dune +++ b/ocaml/sdk-gen/common/dune @@ -1,14 +1,6 @@ (library - (name CommonFunctions) - (modes best) - (wrapped false) - (libraries - astring - xapi-datamodel - mustache - xapi-stdext-std - xapi-stdext-unix - ) - (modules_without_implementation license) -) - + (name CommonFunctions) + (modes best) + (wrapped false) + (libraries astring xapi-datamodel mustache xapi-stdext-std xapi-stdext-unix) + (modules_without_implementation license)) diff --git a/ocaml/sdk-gen/csharp/XE_SR_ERRORCODES.xml b/ocaml/sdk-gen/csharp/XE_SR_ERRORCODES.xml index 47fefd83086..792fe17fcd7 100644 --- a/ocaml/sdk-gen/csharp/XE_SR_ERRORCODES.xml +++ b/ocaml/sdk-gen/csharp/XE_SR_ERRORCODES.xml @@ -513,6 +513,12 @@ 117 + + PVMultiIDs + PVs found with multiple SCSI IDs + 119 + + APISession diff --git a/ocaml/sdk-gen/csharp/autogen/dune b/ocaml/sdk-gen/csharp/autogen/dune index 2a9744e4ae6..bd393d9a6e6 100644 --- a/ocaml/sdk-gen/csharp/autogen/dune +++ b/ocaml/sdk-gen/csharp/autogen/dune @@ -1,11 +1,8 @@ (rule - (alias generate) - (targets LICENSE) - (deps - ../../LICENSE - ) - (action (copy %{deps} %{targets})) -) + (alias generate) + (targets LICENSE) + (deps ../../LICENSE) + (action + (copy %{deps} %{targets}))) (data_only_dirs src) - diff --git a/ocaml/sdk-gen/csharp/autogen/src/Event.cs b/ocaml/sdk-gen/csharp/autogen/src/Event.cs index 62bb7d16ae8..1eed4e3ef10 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/Event.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/Event.cs @@ -45,6 +45,7 @@ public override void UpdateFrom(Event update) id = update.id; } + [Obsolete("Use the calls setting individual fields of the API object instead.")] public override string SaveChanges(Session session, string opaqueRef, Event serverObject) { if (opaqueRef == null) diff --git a/ocaml/sdk-gen/csharp/autogen/src/Failure.cs b/ocaml/sdk-gen/csharp/autogen/src/Failure.cs index 62cd536afd0..e8b514f20ea 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/Failure.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/Failure.cs @@ -31,9 +31,11 @@ using System.Collections.Generic; using System.Linq; using System.Resources; +#if !(NET8_0_OR_GREATER) using System.Runtime.Serialization; using System.Text.RegularExpressions; using System.Xml; +#endif using Newtonsoft.Json.Linq; @@ -88,12 +90,14 @@ public Failure(string message, Exception exception) ParseExceptionMessage(); } +#if !(NET8_0_OR_GREATER) protected Failure(SerializationInfo info, StreamingContext context) : base(info, context) { errorDescription = (List)info.GetValue("errorDescription", typeof(List)); errorText = info.GetString("errorText"); } +#endif #endregion @@ -174,7 +178,7 @@ public override string ToString() { return Message; } - +#if !(NET8_0_OR_GREATER) public override void GetObjectData(SerializationInfo info, StreamingContext context) { if (info == null) @@ -185,5 +189,6 @@ public override void GetObjectData(SerializationInfo info, StreamingContext cont base.GetObjectData(info, context); } +#endif } } diff --git a/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs b/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs index 60fe64f4de5..732478828f2 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/HTTP.cs @@ -38,7 +38,9 @@ using System.Security.Authentication; using System.Security.Cryptography; using System.Security.Cryptography.X509Certificates; +#if !(NET8_0_OR_GREATER) using System.Runtime.Serialization; +#endif namespace XenAPI { @@ -58,12 +60,13 @@ public TooManyRedirectsException(int redirect, Uri uri) this.uri = uri; } - public TooManyRedirectsException() : base() { } + public TooManyRedirectsException() { } public TooManyRedirectsException(string message) : base(message) { } public TooManyRedirectsException(string message, Exception exception) : base(message, exception) { } +#if !(NET8_0_OR_GREATER) protected TooManyRedirectsException(SerializationInfo info, StreamingContext context) : base(info, context) { @@ -81,42 +84,47 @@ public override void GetObjectData(SerializationInfo info, StreamingContext cont base.GetObjectData(info, context); } +#endif } [Serializable] public class BadServerResponseException : Exception { - public BadServerResponseException() : base() { } + public BadServerResponseException() { } public BadServerResponseException(string message) : base(message) { } public BadServerResponseException(string message, Exception exception) : base(message, exception) { } +#if !(NET8_0_OR_GREATER) protected BadServerResponseException(SerializationInfo info, StreamingContext context) : base(info, context) { } +#endif } [Serializable] public class CancelledException : Exception { - public CancelledException() : base() { } + public CancelledException() { } public CancelledException(string message) : base(message) { } public CancelledException(string message, Exception exception) : base(message, exception) { } - +#if !(NET8_0_OR_GREATER) protected CancelledException(SerializationInfo info, StreamingContext context) : base(info, context) { } +#endif } [Serializable] public class ProxyServerAuthenticationException : Exception { - public ProxyServerAuthenticationException() : base() { } + public ProxyServerAuthenticationException() { } public ProxyServerAuthenticationException(string message) : base(message) { } public ProxyServerAuthenticationException(string message, Exception exception) : base(message, exception) { } - +#if !(NET8_0_OR_GREATER) protected ProxyServerAuthenticationException(SerializationInfo info, StreamingContext context) : base(info, context) { } +#endif } #endregion @@ -133,6 +141,9 @@ protected ProxyServerAuthenticationException(SerializationInfo info, StreamingCo public const int DEFAULT_HTTPS_PORT = 443; private const int NONCE_LENGTH = 16; + private const int FILE_MOVE_MAX_RETRIES = 5; + private const int FILE_MOVE_SLEEP_BETWEEN_RETRIES = 100; + public enum ProxyAuthenticationMethod { Basic = 0, @@ -149,7 +160,7 @@ public enum ProxyAuthenticationMethod private static void WriteLine(String txt, Stream stream) { - byte[] bytes = System.Text.Encoding.ASCII.GetBytes(String.Format("{0}\r\n", txt)); + byte[] bytes = Encoding.ASCII.GetBytes($"{txt}\r\n"); stream.Write(bytes, 0, bytes.Length); } @@ -164,7 +175,7 @@ private static void WriteLine(Stream stream) // done here. private static string ReadLine(Stream stream) { - System.Text.StringBuilder result = new StringBuilder(); + StringBuilder result = new StringBuilder(); while (true) { int b = stream.ReadByte(); @@ -208,9 +219,8 @@ private static bool ReadHttpHeaders(ref Stream stream, IWebProxy proxy, bool nod // read chunk size string chunkSizeStr = ReadLine(stream); chunkSizeStr = chunkSizeStr.TrimEnd('\r', '\n'); - int chunkSize = 0; int.TryParse(chunkSizeStr, System.Globalization.NumberStyles.HexNumber, - System.Globalization.CultureInfo.InvariantCulture, out chunkSize); + System.Globalization.CultureInfo.InvariantCulture, out var chunkSize); // read number of bytes from the stream int totalNumberOfBytesRead = 0; @@ -222,8 +232,8 @@ private static bool ReadHttpHeaders(ref Stream stream, IWebProxy proxy, bool nod totalNumberOfBytesRead += numberOfBytesRead; } while (numberOfBytesRead > 0 && totalNumberOfBytesRead < chunkSize); - string str = System.Text.Encoding.ASCII.GetString(bytes); - string[] split = str.Split(new string[] {"\r\n"}, StringSplitOptions.RemoveEmptyEntries); + string str = Encoding.ASCII.GetString(bytes); + string[] split = str.Split(new [] {"\r\n"}, StringSplitOptions.RemoveEmptyEntries); headers.AddRange(split); entityBody += str; @@ -267,7 +277,7 @@ private static bool ReadHttpHeaders(ref Stream stream, IWebProxy proxy, bool nod private static int getResultCode(string line) { - string[] bits = line.Split(new char[] { ' ' }); + string[] bits = line.Split(' '); return (bits.Length < 2 ? 0 : Int32.Parse(bits[1])); } @@ -292,7 +302,8 @@ private static bool ValidateServerCertificate( /// The secure hash as a hex string. private static string _MD5Hash(string str) { - return ComputeHash(str, "MD5"); + using (var hasher = MD5.Create()) + return ComputeHash(hasher, str); } /// @@ -302,32 +313,24 @@ private static string _MD5Hash(string str) /// The secure hash as a hex string. private static string Sha256Hash(string str) { - return ComputeHash(str, "SHA256"); + using (var hasher = SHA256.Create()) + return ComputeHash(hasher, str); } - private static string ComputeHash(string input, string method) + private static string ComputeHash(HashAlgorithm hasher, string input) { - if (input == null) + if (hasher == null || input == null) return null; var enc = new UTF8Encoding(); byte[] bytes = enc.GetBytes(input); - - using (var hasher = HashAlgorithm.Create(method)) - { - if (hasher != null) - { - byte[] hash = hasher.ComputeHash(bytes); - return BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant(); - } - } - - return null; + byte[] hash = hasher.ComputeHash(bytes); + return BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant(); } private static string GenerateNonce() { - using (var rngCsProvider = new RNGCryptoServiceProvider()) + using (var rngCsProvider = RandomNumberGenerator.Create()) { var nonceBytes = new byte[NONCE_LENGTH]; rngCsProvider.GetBytes(nonceBytes); @@ -417,7 +420,7 @@ public static Uri BuildUri(string hostname, string path, params object[] args) private static string GetPartOrNull(string str, int partIndex) { - string[] parts = str.Split(new char[] { ' ' }, partIndex + 2, StringSplitOptions.RemoveEmptyEntries); + string[] parts = str.Split(new [] { ' ' }, partIndex + 2, StringSplitOptions.RemoveEmptyEntries); return partIndex < parts.Length - 1 ? parts[partIndex] : null; } @@ -448,8 +451,7 @@ private static NetworkStream ConnectSocket(Uri uri, bool nodelay, int timeoutMs) /// Timeout, in ms. 0 for no timeout. public static Stream ConnectStream(Uri uri, IWebProxy proxy, bool nodelay, int timeoutMs) { - IMockWebProxy mockProxy = proxy as IMockWebProxy; - if (mockProxy != null) + if (proxy is IMockWebProxy mockProxy) return mockProxy.GetStream(uri); Stream stream; @@ -469,7 +471,7 @@ public static Stream ConnectStream(Uri uri, IWebProxy proxy, bool nodelay, int t { if (useProxy) { - string line = string.Format("CONNECT {0}:{1} HTTP/1.0", uri.Host, uri.Port); + string line = $"CONNECT {uri.Host}:{uri.Port} HTTP/1.0"; WriteLine(line, stream); WriteLine(stream); @@ -481,9 +483,8 @@ public static Stream ConnectStream(Uri uri, IWebProxy proxy, bool nodelay, int t if (UseSSL(uri)) { - SslStream sslStream = new SslStream(stream, false, - new RemoteCertificateValidationCallback(ValidateServerCertificate), null); - sslStream.AuthenticateAsClient("", null, SslProtocols.Tls | SslProtocols.Tls11 | SslProtocols.Tls12, true); + SslStream sslStream = new SslStream(stream, false, ValidateServerCertificate, null); + sslStream.AuthenticateAsClient("", null, SslProtocols.Tls12, true); stream = sslStream; } @@ -514,7 +515,7 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox } if (proxy.Credentials == null) - throw new BadServerResponseException(string.Format("Received error code {0} from the server", initialResponse[0])); + throw new BadServerResponseException($"Received error code {initialResponse[0]} from the server"); NetworkCredential credentials = proxy.Credentials.GetCredential(uri, null); @@ -526,10 +527,9 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox if (string.IsNullOrEmpty(basicField)) throw new ProxyServerAuthenticationException("Basic authentication scheme is not supported/enabled by the proxy server."); - string authenticationFieldReply = string.Format("Proxy-Authorization: Basic {0}", - Convert.ToBase64String(Encoding.UTF8.GetBytes(credentials.UserName + ":" + credentials.Password))); + var creds = Convert.ToBase64String(Encoding.UTF8.GetBytes(credentials.UserName + ":" + credentials.Password)); WriteLine(header, stream); - WriteLine(authenticationFieldReply, stream); + WriteLine($"Proxy-Authorization: Basic {creds}", stream); WriteLine(stream); } else if (CurrentProxyAuthenticationMethod == ProxyAuthenticationMethod.Digest) @@ -539,9 +539,7 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox if (string.IsNullOrEmpty(digestField)) throw new ProxyServerAuthenticationException("Digest authentication scheme is not supported/enabled by the proxy server."); - string authenticationFieldReply = string.Format( - "Proxy-Authorization: Digest username=\"{0}\", uri=\"{1}:{2}\"", - credentials.UserName, uri.Host, uri.Port); + string authenticationFieldReply = $"Proxy-Authorization: Digest username=\"{credentials.UserName}\", uri=\"{uri.Host}:{uri.Port}\""; int len = "Proxy-Authorization: Digest".Length; string directiveString = digestField.Substring(len, digestField.Length - len); @@ -562,19 +560,19 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox throw new ProxyServerAuthenticationException("Stale nonce in Digest authentication attempt."); break; case "realm=": - authenticationFieldReply += string.Format(", realm=\"{0}\"", directives[++i]); + authenticationFieldReply += $", realm=\"{directives[++i]}\""; realm = directives[i]; break; case "nonce=": - authenticationFieldReply += string.Format(", nonce=\"{0}\"", directives[++i]); + authenticationFieldReply += $", nonce=\"{directives[++i]}\""; nonce = directives[i]; break; case "opaque=": - authenticationFieldReply += string.Format(", opaque=\"{0}\"", directives[++i]); + authenticationFieldReply += $", opaque=\"{directives[++i]}\""; opaque = directives[i]; break; case "algorithm=": - authenticationFieldReply += string.Format(", algorithm={0}", directives[++i]); //unquoted; see RFC7616-3.4 + authenticationFieldReply += $", algorithm={directives[++i]}"; //unquoted; see RFC7616-3.4 algorithm = directives[i]; break; case "qop=": @@ -584,9 +582,8 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox qop = qops.FirstOrDefault(q => q.ToLowerInvariant() == "auth") ?? qops.FirstOrDefault(q => q.ToLowerInvariant() == "auth-int"); if (qop == null) - throw new ProxyServerAuthenticationException( - "Digest authentication's quality-of-protection directive is not supported."); - authenticationFieldReply += string.Format(", qop={0}", qop); //unquoted; see RFC7616-3.4 + throw new ProxyServerAuthenticationException("Digest authentication's quality-of-protection directive is not supported."); + authenticationFieldReply += $", qop={qop}"; //unquoted; see RFC7616-3.4 } break; } @@ -594,11 +591,11 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox string clientNonce = GenerateNonce(); if (qop != null) - authenticationFieldReply += string.Format(", cnonce=\"{0}\"", clientNonce); + authenticationFieldReply += $", cnonce=\"{clientNonce}\""; string nonceCount = "00000001"; // todo: track nonces and their corresponding nonce counts if (qop != null) - authenticationFieldReply += string.Format(", nc={0}", nonceCount); //unquoted; see RFC7616-3.4 + authenticationFieldReply += $", nc={nonceCount}"; //unquoted; see RFC7616-3.4 Func algFunc; var scratch1 = string.Join(":", credentials.UserName, realm, credentials.Password); @@ -636,7 +633,7 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox : new[] {HA1, nonce, nonceCount, clientNonce, qop, HA2}; var response = algFunc(string.Join(":", array3)); - authenticationFieldReply += string.Format(", response=\"{0}\"", response); + authenticationFieldReply += $", response=\"{response}\""; WriteLine(header, stream); WriteLine(authenticationFieldReply, stream); @@ -645,8 +642,7 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox else { string authType = GetPartOrNull(fields[0], 1); - throw new ProxyServerAuthenticationException( - string.Format("Proxy server's {0} authentication method is not supported.", authType ?? "chosen")); + throw new ProxyServerAuthenticationException($"Proxy server's {authType ?? "chosen"} authentication method is not supported."); } // handle authentication attempt response @@ -662,12 +658,10 @@ private static void AuthenticateProxy(ref Stream stream, Uri uri, IWebProxy prox case 407: throw new ProxyServerAuthenticationException("Proxy server denied access due to wrong credentials."); default: - throw new BadServerResponseException(string.Format( - "Received error code {0} from the server", authenticatedResponse[0])); + throw new BadServerResponseException($"Received error code {authenticatedResponse[0]} from the server"); } } - private static Stream DoHttp(Uri uri, IWebProxy proxy, bool noDelay, int timeoutMs, params string[] headers) { Stream stream = ConnectStream(uri, proxy, noDelay, timeoutMs); @@ -829,9 +823,6 @@ public static void Get(DataCopiedDelegate dataCopiedDelegate, FuncBool cancellin } } - private const int FILE_MOVE_MAX_RETRIES = 5; - private const int FILE_MOVE_SLEEP_BETWEEN_RETRIES = 100; - /// /// Move a file, retrying a few times with a short sleep between retries. /// If it still fails after these retries, then throw the error. diff --git a/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs b/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs index 519cc430d4e..a790f397320 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/JsonRpc.cs @@ -31,6 +31,13 @@ using System.Collections.Generic; using System.IO; using System.Net; +#if (NET8_0_OR_GREATER) +using System.Diagnostics; +using System.Linq; +using System.Net.Http; +using System.Net.Http.Headers; +using System.Security.Cryptography.X509Certificates; +#endif using System.Net.Security; using System.Threading; using Newtonsoft.Json; @@ -49,9 +56,9 @@ internal abstract class JsonRequest { protected JsonRequest(int id, string method, JToken parameters) { - this.Id = id; - this.Method = method; - this.Parameters = parameters; + Id = id; + Method = method; + Parameters = parameters; } public static JsonRequest Create(JsonRpcVersion jsonRpcVersion, int id, string method, JToken parameters) @@ -65,6 +72,8 @@ public static JsonRequest Create(JsonRpcVersion jsonRpcVersion, int id, string m } } + public abstract string JsonRPC { get;} + /// /// Unique call id. Can be null in JSON_RPC v2.0, but xapi disallows it. /// @@ -95,6 +104,9 @@ public JsonRequestV1(int id, string method, JToken parameters) : base(id, method, parameters) { } + + [JsonIgnore] + public override string JsonRPC => "1.0"; } internal class JsonRequestV2 : JsonRequest @@ -105,18 +117,15 @@ public JsonRequestV2(int id, string method, JToken parameters) } [JsonProperty("jsonrpc", Required = Required.Always)] - public string JsonRPC - { - get { return "2.0"; } - } + public override string JsonRPC => "2.0"; } internal abstract class JsonResponse { - [JsonProperty("id", Required = Required.AllowNull)] public int Id = 0; + [JsonProperty("id", Required = Required.AllowNull)] public int Id { get; set; } - [JsonProperty("result", Required = Required.Default)] public T Result = default(T); + [JsonProperty("result", Required = Required.Default)] public T Result { get; set; } public override string ToString() { @@ -126,23 +135,23 @@ public override string ToString() internal class JsonResponseV1 : JsonResponse { - [JsonProperty("error", Required = Required.AllowNull)] public JToken Error = null; + [JsonProperty("error", Required = Required.AllowNull)] public JToken Error { get; set; } } internal class JsonResponseV2 : JsonResponse { - [JsonProperty("error", Required = Required.DisallowNull)] public JsonResponseV2Error Error = null; + [JsonProperty("error", Required = Required.DisallowNull)] public JsonResponseV2Error Error { get; set; } - [JsonProperty("jsonrpc", Required = Required.Always)] public string JsonRpc = null; + [JsonProperty("jsonrpc", Required = Required.Always)] public string JsonRpc { get; set; } } internal class JsonResponseV2Error { - [JsonProperty("code", Required = Required.Always)] public int Code = 0; + [JsonProperty("code", Required = Required.Always)] public int Code { get; set; } - [JsonProperty("message", Required = Required.Always)] public string Message = null; + [JsonProperty("message", Required = Required.Always)] public string Message { get; set; } - [JsonProperty("data", Required = Required.Default)] public JToken Data = null; + [JsonProperty("data", Required = Required.Default)] public JToken Data { get; set; } public override string ToString() { @@ -155,6 +164,42 @@ public partial class JsonRpcClient { private int _globalId; +#if (NET8_0_OR_GREATER) + private static readonly Type ClassType = typeof(JsonRpcClient); + private static readonly System.Reflection.AssemblyName ClassAssemblyName = ClassType?.Assembly?.GetName(); + private static readonly ActivitySource source = new ActivitySource(ClassAssemblyName.Name + "." + ClassType?.FullName, ClassAssemblyName.Version?.ToString()); + + // Follow naming conventions from OpenTelemetry.SemanticConventions + // Not yet on NuGet though: + // dotnet add package OpenTelemetry.SemanticConventions + private static class RpcAttributes + { + public const string AttributeRpcMethod = "rpc.method"; + public const string AttributeRpcSystem = "rpc.system"; + public const string AttributeRpcService = "rpc.service"; + public const string AttributeRpcJsonrpcErrorCode = "rpc.jsonrpc.error_code"; + public const string AttributeRpcJsonrpcErrorMessage = "rpc.jsonrpc.error_message"; + public const string AttributeRpcJsonrpcRequestId = "rpc.jsonrpc.request_id"; + public const string AttributeRpcJsonrpcVersion = "rpc.jsonrpc.version"; + public const string AttributeRpcMessageType = "rpc.message.type"; + + public static class RpcMessageTypeValues + { + public const string Sent = "SENT"; + public const string Received = "RECEIVED"; + } + } + + private static class ServerAttributes + { + public const string AttributeServerAddress = "server.address"; + } + + // not part of the SemanticConventions package + private const string ValueJsonRpc = "jsonrpc"; + private const string EventRpcMessage = "rpc.message"; +#endif + public JsonRpcClient(string baseUrl) { Url = baseUrl; @@ -180,7 +225,13 @@ public JsonRpcClient(string baseUrl) public bool AllowAutoRedirect { get; set; } public bool PreAuthenticate { get; set; } public CookieContainer Cookies { get; set; } + +#if (NET8_0_OR_GREATER) + public Func ServerCertificateValidationCallback { get; set; } +#else public RemoteCertificateValidationCallback ServerCertificateValidationCallback { get; set; } +#endif + public Dictionary RequestHeaders { get; set; } public Dictionary ResponseHeaders { get; set; } @@ -207,69 +258,186 @@ protected virtual T Rpc(string callName, JToken parameters, JsonSerializer se // therefore the latter will be done only in DEBUG mode using (var postStream = new MemoryStream()) { - using (var sw = new StreamWriter(postStream)) +#if (NET8_0_OR_GREATER) + // the semantic convention is $package.$service/$method + using (Activity activity = source.CreateActivity("XenAPI/" + callName, ActivityKind.Client)) { + activity?.Start(); + // Set the fields described in the OpenTelemetry Semantic Conventions: + // https://opentelemetry.io/docs/specs/semconv/rpc/json-rpc/ + // https://opentelemetry.io/docs/specs/semconv/rpc/rpc-spans/ + activity?.SetTag(RpcAttributes.AttributeRpcSystem, ValueJsonRpc); + activity?.SetTag(ServerAttributes.AttributeServerAddress, new Uri(Url).Host); + activity?.SetTag(RpcAttributes.AttributeRpcMethod, callName); + activity?.SetTag(RpcAttributes.AttributeRpcJsonrpcRequestId, id.ToString()); +#endif + using (var sw = new StreamWriter(postStream)) + { #if DEBUG - var settings = CreateSettings(serializer.Converters); - string jsonReq = JsonConvert.SerializeObject(request, settings); - if (RequestEvent != null) - RequestEvent(jsonReq); - sw.Write(jsonReq); + var settings = CreateSettings(serializer.Converters); + string jsonReq = JsonConvert.SerializeObject(request, settings); + if (RequestEvent != null) + RequestEvent(jsonReq); + sw.Write(jsonReq); #else - if (RequestEvent != null) - RequestEvent(callName); - serializer.Serialize(sw, request); + if (RequestEvent != null) + RequestEvent(callName); + serializer.Serialize(sw, request); #endif - sw.Flush(); - postStream.Seek(0, SeekOrigin.Begin); + sw.Flush(); + postStream.Seek(0, SeekOrigin.Begin); - using (var responseStream = new MemoryStream()) - { - PerformPostRequest(postStream, responseStream); - responseStream.Position = 0; - - using (var responseReader = new StreamReader(responseStream)) + using (var responseStream = new MemoryStream()) { - switch (JsonRpcVersion) + PerformPostRequest(postStream, responseStream); + responseStream.Position = 0; + + using (var responseReader = new StreamReader(responseStream)) { - case JsonRpcVersion.v2: +#if (NET8_0_OR_GREATER) + activity?.SetTag(RpcAttributes.AttributeRpcJsonrpcVersion, request.JsonRPC); +#endif + switch (JsonRpcVersion) + { + case JsonRpcVersion.v2: #if DEBUG - string json2 = responseReader.ReadToEnd(); - var res2 = JsonConvert.DeserializeObject>(json2, settings); + string json2 = responseReader.ReadToEnd(); + var res2 = JsonConvert.DeserializeObject>(json2, settings); #else - var res2 = (JsonResponseV2)serializer.Deserialize(responseReader, typeof(JsonResponseV2)); + var res2 = (JsonResponseV2)serializer.Deserialize(responseReader, typeof(JsonResponseV2)); #endif - if (res2.Error != null) - { - var descr = new List { res2.Error.Message }; - descr.AddRange(res2.Error.Data.ToObject()); - throw new Failure(descr); - } - return res2.Result; - default: + if (res2.Error != null) + { + var descr = new List { res2.Error.Message }; + descr.AddRange(res2.Error.Data.ToObject()); +#if (NET8_0_OR_GREATER) + activity?.SetTag(RpcAttributes.AttributeRpcJsonrpcErrorCode, res2.Error.Code); + activity?.SetTag(RpcAttributes.AttributeRpcJsonrpcErrorMessage, descr); + activity?.SetStatus(ActivityStatusCode.Error); +#endif + throw new Failure(descr); + } +#if (NET8_0_OR_GREATER) + activity?.SetStatus(ActivityStatusCode.Ok); +#endif + return res2.Result; + default: #if DEBUG - string json1 = responseReader.ReadToEnd(); - var res1 = JsonConvert.DeserializeObject>(json1, settings); + string json1 = responseReader.ReadToEnd(); + var res1 = JsonConvert.DeserializeObject>(json1, settings); #else - var res1 = (JsonResponseV1)serializer.Deserialize(responseReader, typeof(JsonResponseV1)); + var res1 = (JsonResponseV1)serializer.Deserialize(responseReader, typeof(JsonResponseV1)); #endif - if (res1.Error != null) - { - var errorArray = res1.Error.ToObject(); + var errorArray = res1.Error?.ToObject(); if (errorArray != null) + { +#if (NET8_0_OR_GREATER) + activity?.SetStatus(ActivityStatusCode.Error); + // we can't be sure whether we'll have a Code here + // the exact format of an error object is not specified in JSONRPC v1 + activity?.SetTag(RpcAttributes.AttributeRpcJsonrpcErrorMessage, errorArray.ToString()); +#endif throw new Failure(errorArray); - } - return res1.Result; + } +#if (NET8_0_OR_GREATER) + activity?.SetStatus(ActivityStatusCode.Ok); +#endif + return res1.Result; + } } } } +#if (NET8_0_OR_GREATER) } +#endif } } - protected virtual void PerformPostRequest(Stream postStream, Stream responseStream) { +#if (NET8_0_OR_GREATER) + HttpClient httpClient = null; + HttpClientHandler httpHandler = null; + HttpRequestMessage requestMessage = null; + HttpResponseMessage responseMessage = null; + + try + { + httpHandler = new HttpClientHandler + { + AllowAutoRedirect = AllowAutoRedirect, + PreAuthenticate = PreAuthenticate, + CookieContainer = Cookies ?? new CookieContainer(), + Proxy = WebProxy + }; + + Func callBack = null; + if (ServicePointManager.ServerCertificateValidationCallback != null) + callBack = ServicePointManager.ServerCertificateValidationCallback.Invoke; + + httpHandler.ServerCertificateCustomValidationCallback = ServerCertificateValidationCallback ?? callBack; + + httpClient = new HttpClient(httpHandler) { Timeout = TimeSpan.FromMilliseconds(Timeout) }; + + requestMessage = new HttpRequestMessage(HttpMethod.Post, new Uri(JsonRpcUrl)); + if (ProtocolVersion != null) + requestMessage.Version = ProtocolVersion; + requestMessage.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json")); + requestMessage.Headers.UserAgent.ParseAdd(UserAgent); + requestMessage.Headers.ConnectionClose = !KeepAlive; + requestMessage.Headers.ExpectContinue = Expect100Continue; + requestMessage.Content = new StreamContent(postStream); + + if (RequestHeaders != null) + { + foreach (var header in RequestHeaders) + requestMessage.Headers.Add(header.Key, header.Value); + } + + // propagate W3C traceparent and tracestate + // HttpClient would do this automatically on .NET 5, + // and .NET 6 would provide even more control over this: https://blog.ladeak.net/posts/opentelemetry-net6-httpclient + // the caller must ensure that the activity is in W3C format (by inheritance or direct setting) + var activity = Activity.Current; + if (activity != null) + { + if (activity.IdFormat == ActivityIdFormat.W3C) + { + requestMessage.Headers.Add("traceparent", activity.Id); + var state = activity.TraceStateString; + + if (state?.Length > 0) + requestMessage.Headers.Add("tracestate", state); + } + + var tags = new ActivityTagsCollection { { RpcAttributes.AttributeRpcMessageType, RpcAttributes.RpcMessageTypeValues.Sent } }; + activity.AddEvent(new ActivityEvent(EventRpcMessage, DateTimeOffset.Now, tags)); + } + + responseMessage = httpClient.SendAsync(requestMessage).Result; + responseMessage.EnsureSuccessStatusCode(); + + var str = responseMessage.Content.ReadAsStream(); + str.CopyTo(responseStream); + responseStream.Flush(); + + ResponseHeaders = responseMessage.Headers.ToDictionary(header => header.Key, header => string.Join(",", header.Value)); + + if (activity != null) + { + var tags = new ActivityTagsCollection { { RpcAttributes.AttributeRpcMessageType, RpcAttributes.RpcMessageTypeValues.Received } }; + activity.AddEvent(new ActivityEvent(EventRpcMessage, DateTimeOffset.Now, tags)); + } + } + finally + { + RequestHeaders = null; + responseMessage?.Dispose(); + requestMessage?.Dispose(); + httpClient?.Dispose(); + httpHandler?.Dispose(); + } +#else var webRequest = (HttpWebRequest)WebRequest.Create(JsonRpcUrl); webRequest.Method = "POST"; webRequest.ContentType = "application/json"; @@ -332,6 +500,7 @@ protected virtual void PerformPostRequest(Stream postStream, Stream responseStre RequestHeaders = null; webResponse?.Dispose(); } +#endif } private JsonSerializerSettings CreateSettings(IList converters) diff --git a/ocaml/sdk-gen/csharp/autogen/src/Session.cs b/ocaml/sdk-gen/csharp/autogen/src/Session.cs index 82db84a8210..5d999136833 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/Session.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/Session.cs @@ -31,6 +31,10 @@ using System.Collections.Generic; using System.Linq; using System.Net; +#if (NET8_0_OR_GREATER) +using System.Net.Http; +using System.Security.Cryptography.X509Certificates; +#endif using System.Net.Security; using Newtonsoft.Json; @@ -44,7 +48,7 @@ public partial class Session : XenObject /// /// This string is used as the HTTP UserAgent for each request. /// - public static string UserAgent = string.Format("XenAPI/{0}", Helper.APIVersionString(API_Version.LATEST)); + public static string UserAgent = $"XenAPI/{Helper.APIVersionString(API_Version.LATEST)}"; /// /// If null, no proxy is used, otherwise this proxy is used for each request. @@ -55,8 +59,6 @@ public partial class Session : XenObject public object Tag; - private List roles = new List(); - #region Constructors public Session(JsonRpcClient client) @@ -124,7 +126,7 @@ public Session(Session session) private static string GetUrl(string hostname, int port) { - return string.Format("{0}://{1}:{2}", port == 8080 || port == 80 ? "http" : "https", hostname, port); + return $"{(port == 8080 || port == 80 ? "http" : "https")}://{hostname}:{port}"; } private void SetupSessionDetails() @@ -159,7 +161,7 @@ private void CopyADFromSession(Session session) IsLocalSuperuser = session.IsLocalSuperuser; SessionSubject = session.SessionSubject; UserSid = session.UserSid; - roles = session.Roles; + Roles = session.Roles; Permissions = session.Permissions; } @@ -208,7 +210,7 @@ private void SetRbacPermissions() if (r.subroles.Count > 0 && r.name_label == s) { r.opaque_ref = xr.opaque_ref; - roles.Add(r); + Roles.Add(r); break; } } @@ -220,7 +222,8 @@ public override void UpdateFrom(Session update) throw new Exception("The method or operation is not implemented."); } - public override string SaveChanges(Session session, string _serverOpaqueRef, Session serverObject) + [Obsolete("Use the calls setting individual fields of the API object instead.")] + public override string SaveChanges(Session session, string serverOpaqueRef, Session serverObject) { throw new Exception("The method or operation is not implemented."); } @@ -248,11 +251,19 @@ public int Timeout set => JsonRpcClient.Timeout = value; } +#if (NET8_0_OR_GREATER) + public Func ServerCertificateValidationCallback + { + get => JsonRpcClient?.ServerCertificateValidationCallback; + set => JsonRpcClient.ServerCertificateValidationCallback = value; + } +#else public RemoteCertificateValidationCallback ServerCertificateValidationCallback { get => JsonRpcClient?.ServerCertificateValidationCallback; set => JsonRpcClient.ServerCertificateValidationCallback = value; } +#endif public ICredentials Credentials => JsonRpcClient?.WebProxy?.Credentials; @@ -306,7 +317,7 @@ public Dictionary RequestHeaders /// instead use Permissions. This list should only be used for UI purposes. /// [JsonConverter(typeof(XenRefListConverter))] - public List Roles => roles; + public List Roles { get; private set; } = new List(); #endregion @@ -315,9 +326,9 @@ public string[] GetSystemMethods() return JsonRpcClient.system_list_methods(); } - public static Session get_record(Session session, string _session) + public static Session get_record(Session session, string sessionOpaqueRef) { - Session newSession = new Session(session.Url) { opaque_ref = _session }; + Session newSession = new Session(session.Url) { opaque_ref = sessionOpaqueRef }; newSession.SetAPIVersion(); return newSession; } @@ -402,13 +413,13 @@ public void logout(Session session2) /// /// Log out of the session with the given reference, using this session for the connection. /// - /// The session to log out - public void logout(string _self) + /// The session to log out + public void logout(string self) { - if (_self == null) + if (self == null) return; - JsonRpcClient.session_logout(_self); + JsonRpcClient.session_logout(self); } public void local_logout() @@ -451,9 +462,9 @@ public string get_this_host() return get_this_host(this, opaque_ref); } - public static string get_this_host(Session session, string _self) + public static string get_this_host(Session session, string self) { - return session.JsonRpcClient.session_get_this_host(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_this_host(session.opaque_ref, self ?? ""); } public string get_this_user() @@ -461,9 +472,9 @@ public string get_this_user() return get_this_user(this, opaque_ref); } - public static string get_this_user(Session session, string _self) + public static string get_this_user(Session session, string self) { - return session.JsonRpcClient.session_get_this_user(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_this_user(session.opaque_ref, self ?? ""); } public bool get_is_local_superuser() @@ -471,14 +482,14 @@ public bool get_is_local_superuser() return get_is_local_superuser(this, opaque_ref); } - public static bool get_is_local_superuser(Session session, string _self) + public static bool get_is_local_superuser(Session session, string self) { - return session.JsonRpcClient.session_get_is_local_superuser(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_is_local_superuser(session.opaque_ref, self ?? ""); } - public static string[] get_rbac_permissions(Session session, string _self) + public static string[] get_rbac_permissions(Session session, string self) { - return session.JsonRpcClient.session_get_rbac_permissions(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_rbac_permissions(session.opaque_ref, self ?? ""); } public DateTime get_last_active() @@ -486,9 +497,9 @@ public DateTime get_last_active() return get_last_active(this, opaque_ref); } - public static DateTime get_last_active(Session session, string _self) + public static DateTime get_last_active(Session session, string self) { - return session.JsonRpcClient.session_get_last_active(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_last_active(session.opaque_ref, self ?? ""); } public bool get_pool() @@ -496,9 +507,9 @@ public bool get_pool() return get_pool(this, opaque_ref); } - public static bool get_pool(Session session, string _self) + public static bool get_pool(Session session, string self) { - return session.JsonRpcClient.session_get_pool(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_pool(session.opaque_ref, self ?? ""); } public XenRef get_subject() @@ -506,9 +517,9 @@ public XenRef get_subject() return get_subject(this, opaque_ref); } - public static XenRef get_subject(Session session, string _self) + public static XenRef get_subject(Session session, string self) { - return session.JsonRpcClient.session_get_subject(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_subject(session.opaque_ref, self ?? ""); } public string get_auth_user_sid() @@ -516,9 +527,9 @@ public string get_auth_user_sid() return get_auth_user_sid(this, opaque_ref); } - public static string get_auth_user_sid(Session session, string _self) + public static string get_auth_user_sid(Session session, string self) { - return session.JsonRpcClient.session_get_auth_user_sid(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_auth_user_sid(session.opaque_ref, self ?? ""); } #region AD SID enumeration and bootout @@ -543,25 +554,25 @@ public static XenRef async_get_all_subject_identifiers(Session session) return session.JsonRpcClient.async_session_get_all_subject_identifiers(session.opaque_ref); } - public string logout_subject_identifier(string subject_identifier) + public string logout_subject_identifier(string subjectIdentifier) { - return logout_subject_identifier(this, subject_identifier); + return logout_subject_identifier(this, subjectIdentifier); } - public static string logout_subject_identifier(Session session, string subject_identifier) + public static string logout_subject_identifier(Session session, string subjectIdentifier) { - session.JsonRpcClient.session_logout_subject_identifier(session.opaque_ref, subject_identifier); + session.JsonRpcClient.session_logout_subject_identifier(session.opaque_ref, subjectIdentifier); return string.Empty; } - public XenRef async_logout_subject_identifier(string subject_identifier) + public XenRef async_logout_subject_identifier(string subjectIdentifier) { - return async_logout_subject_identifier(this, subject_identifier); + return async_logout_subject_identifier(this, subjectIdentifier); } - public static XenRef async_logout_subject_identifier(Session session, string subject_identifier) + public static XenRef async_logout_subject_identifier(Session session, string subjectIdentifier) { - return session.JsonRpcClient.async_session_logout_subject_identifier(session.opaque_ref, subject_identifier); + return session.JsonRpcClient.async_session_logout_subject_identifier(session.opaque_ref, subjectIdentifier); } #endregion @@ -573,39 +584,39 @@ public Dictionary get_other_config() return get_other_config(this, opaque_ref); } - public static Dictionary get_other_config(Session session, string _self) + public static Dictionary get_other_config(Session session, string self) { - return session.JsonRpcClient.session_get_other_config(session.opaque_ref, _self ?? ""); + return session.JsonRpcClient.session_get_other_config(session.opaque_ref, self ?? ""); } - public void set_other_config(Dictionary _other_config) + public void set_other_config(Dictionary otherConfig) { - set_other_config(this, opaque_ref, _other_config); + set_other_config(this, opaque_ref, otherConfig); } - public static void set_other_config(Session session, string _self, Dictionary _other_config) + public static void set_other_config(Session session, string self, Dictionary otherConfig) { - session.JsonRpcClient.session_set_other_config(session.opaque_ref, _self ?? "", _other_config); + session.JsonRpcClient.session_set_other_config(session.opaque_ref, self ?? "", otherConfig); } - public void add_to_other_config(string _key, string _value) + public void add_to_other_config(string key, string value) { - add_to_other_config(this, opaque_ref, _key, _value); + add_to_other_config(this, opaque_ref, key, value); } - public static void add_to_other_config(Session session, string _self, string _key, string _value) + public static void add_to_other_config(Session session, string self, string key, string value) { - session.JsonRpcClient.session_add_to_other_config(session.opaque_ref, _self ?? "", _key ?? "", _value ?? ""); + session.JsonRpcClient.session_add_to_other_config(session.opaque_ref, self ?? "", key ?? "", value ?? ""); } - public void remove_from_other_config(string _key) + public void remove_from_other_config(string key) { - remove_from_other_config(this, opaque_ref, _key); + remove_from_other_config(this, opaque_ref, key); } - public static void remove_from_other_config(Session session, string _self, string _key) + public static void remove_from_other_config(Session session, string self, string key) { - session.JsonRpcClient.session_remove_from_other_config(session.opaque_ref, _self ?? "", _key ?? ""); + session.JsonRpcClient.session_remove_from_other_config(session.opaque_ref, self ?? "", key ?? ""); } #endregion diff --git a/ocaml/sdk-gen/csharp/autogen/src/XenObject.cs b/ocaml/sdk-gen/csharp/autogen/src/XenObject.cs index 3d372799771..10f238a2b04 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/XenObject.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/XenObject.cs @@ -42,15 +42,7 @@ public abstract partial class XenObject : IXenObject where S : XenObject /// public abstract void UpdateFrom(S record); - /// - /// Save any changed fields to the server. - /// This method is usually invoked on a thread pool thread. - /// - /// - /// - /// Changes are sent to the server if the field in "this" - /// is different from serverObject. Can be the object in the cache, or another reference - /// object that we want to save changes to. + [Obsolete("Use the calls setting individual fields of the API object instead.")] public abstract string SaveChanges(Session session, string serverOpaqueRef, S serverObject); public string opaque_ref { get; set; } diff --git a/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj b/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj index 8f36aba76fa..22acc1de24a 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj +++ b/ocaml/sdk-gen/csharp/autogen/src/XenServer.csproj @@ -1,7 +1,7 @@  0.0.0 - netstandard2.0;net45 + net80;netstandard2.0;net45 Library XenAPI True @@ -18,6 +18,7 @@ packageIcon.png git README-NuGet.md + true @@ -26,6 +27,7 @@ true + 8981 diff --git a/ocaml/sdk-gen/csharp/dune b/ocaml/sdk-gen/csharp/dune index 07e2fd42950..25f35763c4b 100644 --- a/ocaml/sdk-gen/csharp/dune +++ b/ocaml/sdk-gen/csharp/dune @@ -1,60 +1,41 @@ (executable - (modes exe) - (name gen_csharp_binding) - (modules Gen_csharp_binding) - (libraries - astring - CommonFunctions - - mustache - xapi-consts - xapi-datamodel - ) -) + (modes exe) + (name gen_csharp_binding) + (modules Gen_csharp_binding) + (libraries astring CommonFunctions mustache xapi-consts xapi-datamodel)) (executable - (modes exe) - (name friendly_error_names) - (modules Friendly_error_names) - (libraries - CommonFunctions - - mustache - xapi-datamodel - xmllight2 - str - ) -) + (modes exe) + (name friendly_error_names) + (modules Friendly_error_names) + (libraries CommonFunctions mustache xapi-datamodel xmllight2 str)) (rule - (alias generate) - (targets (dir autogen-out)) - (deps - (:x gen_csharp_binding.exe) - (source_tree templates) - (:sh ../windows-line-endings.sh) - (source_tree autogen) - (:x2 friendly_error_names.exe) - FriendlyErrorNames.resx - (:y XE_SR_ERRORCODES.xml) - (source_tree templates) - ) - (action - (progn - (concurrent - (bash "cp -r autogen/ autogen-out/") - (run %{x}) - (run %{x2} -s %{y}) - ) - (bash "rm autogen-out/.gitignore") - (bash "%{sh} autogen-out/") - )) -) + (alias generate) + (targets + (dir autogen-out)) + (deps + (:x gen_csharp_binding.exe) + (source_tree templates) + (:sh ../windows-line-endings.sh) + (source_tree autogen) + (:x2 friendly_error_names.exe) + FriendlyErrorNames.resx + (:y XE_SR_ERRORCODES.xml) + (source_tree templates)) + (action + (progn + (progn + (bash "cp -r autogen/ autogen-out/") + (run %{x}) + (run %{x2} -s %{y})) + (bash "rm autogen-out/.gitignore") + (bash "%{sh} autogen-out/")))) (data_only_dirs templates) (install - (package xapi-sdk) - (section share_root) - (dirs (autogen-out as csharp)) -) + (package xapi-sdk) + (section share_root) + (dirs + (autogen-out as csharp))) diff --git a/ocaml/sdk-gen/csharp/gen_csharp_binding.ml b/ocaml/sdk-gen/csharp/gen_csharp_binding.ml index 14b6af5e225..45ee61f46c7 100644 --- a/ocaml/sdk-gen/csharp/gen_csharp_binding.ml +++ b/ocaml/sdk-gen/csharp/gen_csharp_binding.ml @@ -353,6 +353,8 @@ and gen_class out_chan cls = print ";\n\ \ }\n\n\ + \ [Obsolete(\"Use the calls setting individual fields of the API \ + object instead.\")]\n\ \ public override string SaveChanges(Session session, string \ opaqueRef, %s server)\n\ \ {\n\ diff --git a/ocaml/sdk-gen/dune b/ocaml/sdk-gen/dune index 76bdaaab2ca..6c4a09913d5 100644 --- a/ocaml/sdk-gen/dune +++ b/ocaml/sdk-gen/dune @@ -1,18 +1,16 @@ (data_only_dirs component-test) (alias - (name sdkgen) - (package xapi-sdk) - (deps - c/gen_c_binding.exe - csharp/gen_csharp_binding.exe - java/main.exe - powershell/gen_powershell_binding.exe - go/gen_go_binding.exe - (alias_rec c/generate) - (alias_rec csharp/generate) - (alias_rec java/generate) - (alias_rec powershell/generate) - (alias_rec go/generate) - ) -) + (name sdkgen) + (package xapi-sdk) + (deps + c/gen_c_binding.exe + csharp/gen_csharp_binding.exe + java/main.exe + powershell/gen_powershell_binding.exe + go/gen_go_binding.exe + (alias_rec c/generate) + (alias_rec csharp/generate) + (alias_rec java/generate) + (alias_rec powershell/generate) + (alias_rec go/generate))) diff --git a/ocaml/sdk-gen/go/autogen/dune b/ocaml/sdk-gen/go/autogen/dune index 98bbd45a418..05b35e921a1 100644 --- a/ocaml/sdk-gen/go/autogen/dune +++ b/ocaml/sdk-gen/go/autogen/dune @@ -1,26 +1,20 @@ (rule - (targets LICENSE) - (deps - ../../LICENSE - ) - (action (copy %{deps} %{targets})) -) + (targets LICENSE) + (deps ../../LICENSE) + (action + (copy %{deps} %{targets}))) (rule - (targets README) - (deps - ../README.md - ) - (action (copy %{deps} %{targets})) -) + (targets README) + (deps ../README.md) + (action + (copy %{deps} %{targets}))) (alias - (name generate) - (deps - LICENSE - README - (source_tree .) - ) -) + (name generate) + (deps + LICENSE + README + (source_tree .))) (data_only_dirs src) diff --git a/ocaml/sdk-gen/go/dune b/ocaml/sdk-gen/go/dune index a126ee856bd..64717b85c6d 100644 --- a/ocaml/sdk-gen/go/dune +++ b/ocaml/sdk-gen/go/dune @@ -1,60 +1,52 @@ (executable - (modes exe) - (name gen_go_binding) - (modules gen_go_binding) - (libraries - CommonFunctions - mustache - xapi-datamodel - xapi-stdext-unix - gen_go_helper - ) -) + (modes exe) + (name gen_go_binding) + (modules gen_go_binding) + (libraries + CommonFunctions + mustache + xapi-datamodel + xapi-stdext-unix + gen_go_helper)) (library - (name gen_go_helper) - (modules gen_go_helper) - (modes best) - (libraries - CommonFunctions - astring - (re_export mustache) - (re_export xapi-consts) - (re_export xapi-datamodel) - xapi-stdext-std - ) -) + (name gen_go_helper) + (modules gen_go_helper) + (modes best) + (libraries + CommonFunctions + astring + (re_export mustache) + (re_export xapi-consts) + (re_export xapi-datamodel) + xapi-stdext-std)) (rule - (alias generate) - (targets (dir autogen-out)) - (deps - (:x gen_go_binding.exe) - (source_tree templates) - (source_tree autogen) - ) - (action - (concurrent - (bash "cp -r autogen/ autogen-out/") - (run %{x} --destdir autogen-out) - ) - ) -) + (alias generate) + (targets + (dir autogen-out)) + (deps + (:x gen_go_binding.exe) + (source_tree templates) + (source_tree autogen)) + (action + (progn + (bash "cp -r autogen/ autogen-out/") + (run %{x} --destdir autogen-out)))) (test - (name test_gen_go) - (package xapi-sdk) - (modules test_gen_go) - (libraries CommonFunctions alcotest fmt xapi-test-utils gen_go_helper) - (deps - (source_tree test_data) - (source_tree templates) - ) -) + (name test_gen_go) + (package xapi-sdk) + (modules test_gen_go) + (libraries CommonFunctions alcotest fmt xapi-test-utils gen_go_helper) + (deps + (source_tree test_data) + (source_tree templates))) (data_only_dirs test_data templates) + (install - (package xapi-sdk) - (section share_root) - (dirs (autogen-out as go)) -) + (package xapi-sdk) + (section share_root) + (dirs + (autogen-out as go))) diff --git a/ocaml/sdk-gen/go/gen_go_binding.ml b/ocaml/sdk-gen/go/gen_go_binding.ml index eb7bc73a96b..bfa541732a3 100644 --- a/ocaml/sdk-gen/go/gen_go_binding.ml +++ b/ocaml/sdk-gen/go/gen_go_binding.ml @@ -105,6 +105,7 @@ let render_converts destdir = let json : Mustache.Json.t = of_json params in render_template template json () ) + |> List.sort_uniq compare |> String.concat "" in let rendered = diff --git a/ocaml/sdk-gen/go/gen_go_helper.ml b/ocaml/sdk-gen/go/gen_go_helper.ml index 47540f55ef7..84b91260ae2 100644 --- a/ocaml/sdk-gen/go/gen_go_helper.ml +++ b/ocaml/sdk-gen/go/gen_go_helper.ml @@ -38,6 +38,7 @@ let acronyms = ; "db" ; "xml" ; "eof" + ; "ssh" ] |> StringSet.of_list diff --git a/ocaml/sdk-gen/java/autogen/dune b/ocaml/sdk-gen/java/autogen/dune index da324f0b9d0..e14eba6a578 100644 --- a/ocaml/sdk-gen/java/autogen/dune +++ b/ocaml/sdk-gen/java/autogen/dune @@ -1,9 +1,6 @@ (alias - (name generate) - (deps - (source_tree .) - ) -) + (name generate) + (deps + (source_tree .))) (data_only_dirs xen-api) - diff --git a/ocaml/sdk-gen/java/autogen/xen-api/pom.xml b/ocaml/sdk-gen/java/autogen/xen-api/pom.xml index c3a6cabdfda..5dc18e7ec61 100644 --- a/ocaml/sdk-gen/java/autogen/xen-api/pom.xml +++ b/ocaml/sdk-gen/java/autogen/xen-api/pom.xml @@ -74,12 +74,12 @@ vcc-releases VCC Release Repository - http://oss.sonatype.org/content/repositories/java-net-releases/ + https://oss.sonatype.org/content/repositories/java-net-releases/ vcc-snapshots VCC Snapshot Repository - http://oss.sonatype.org/content/repositories/java-net-snapshots/ + https://oss.sonatype.org/content/repositories/java-net-snapshots/ diff --git a/ocaml/sdk-gen/java/dune b/ocaml/sdk-gen/java/dune index 07167296b84..31fd56640a6 100644 --- a/ocaml/sdk-gen/java/dune +++ b/ocaml/sdk-gen/java/dune @@ -1,44 +1,38 @@ (executable - (modes exe) - (name main) - (libraries - astring - CommonFunctions - - mustache - str - xapi-datamodel - xapi-stdext-unix - ) -) + (modes exe) + (name main) + (libraries + astring + CommonFunctions + mustache + str + xapi-datamodel + xapi-stdext-unix)) (rule - (targets LICENSE) - (deps - ../LICENSE - ) - (action (copy %{deps} %{targets})) -) + (targets LICENSE) + (deps ../LICENSE) + (action + (copy %{deps} %{targets}))) (rule - (alias generate) - (targets (dir autogen-out)) - (deps - LICENSE - (:x main.exe) - (source_tree templates) - (source_tree autogen) - ) - (action (concurrent - (bash "cp -r autogen/ autogen-out/") - (run %{x}) - )) -) + (alias generate) + (targets + (dir autogen-out)) + (deps + LICENSE + (:x main.exe) + (source_tree templates) + (source_tree autogen)) + (action + (progn + (bash "cp -r autogen/ autogen-out/") + (run %{x})))) (data_only_dirs templates) (install - (package xapi-sdk) - (section share_root) - (dirs (autogen-out as java)) -) + (package xapi-sdk) + (section share_root) + (dirs + (autogen-out as java))) diff --git a/ocaml/sdk-gen/powershell/autogen/dune b/ocaml/sdk-gen/powershell/autogen/dune index c4c2a5f8633..4cfb2b8c487 100644 --- a/ocaml/sdk-gen/powershell/autogen/dune +++ b/ocaml/sdk-gen/powershell/autogen/dune @@ -1,22 +1,20 @@ (rule - (targets LICENSE) - (deps - ../../LICENSE - ) - (action (copy %{deps} %{targets})) -) + (targets LICENSE) + (deps ../../LICENSE) + (action + (copy %{deps} %{targets}))) (alias - (name generate) - (deps - LICENSE - (source_tree .) - ) -) + (name generate) + (deps + LICENSE + (source_tree .))) (data_only_dirs src) + (install - (package xapi-sdk) - (section doc) - (files (glob_files_rec (autogen/* with_prefix powershell))) -) + (package xapi-sdk) + (section doc) + (files + (glob_files_rec + (autogen/* with_prefix powershell)))) diff --git a/ocaml/sdk-gen/powershell/autogen/src/Connect-XenServer.cs b/ocaml/sdk-gen/powershell/autogen/src/Connect-XenServer.cs index a1dc4ecf964..52cb8e21e54 100644 --- a/ocaml/sdk-gen/powershell/autogen/src/Connect-XenServer.cs +++ b/ocaml/sdk-gen/powershell/autogen/src/Connect-XenServer.cs @@ -32,6 +32,9 @@ using System.IO; using System.Management.Automation; using System.Net; +#if NET8_0_OR_GREATER +using System.Net.Http; +#endif using System.Net.Security; using System.Runtime.InteropServices; using System.Security; @@ -159,7 +162,7 @@ protected override void ProcessRecord() } ServicePointManager.ServerCertificateValidationCallback = ValidateServerCertificate; - ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls | SecurityProtocolType.Tls11 | SecurityProtocolType.Tls12; + ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12; if (Url == null || Url.Length == 0) { @@ -209,7 +212,7 @@ protected override void ProcessRecord() throw; } } - catch (WebException e) + catch (Exception e) { var inner = e.InnerException?.InnerException ?? //.NET case e.InnerException; //.NET Framework case @@ -271,8 +274,13 @@ private bool ValidateServerCertificate(object sender, X509Certificate certificat bool ignoreChanged = Force || NoWarnCertificates || (bool)GetVariableValue("NoWarnCertificates", false); bool ignoreNew = Force || NoWarnNewCertificates || (bool)GetVariableValue("NoWarnNewCertificates", false); - HttpWebRequest webreq = (HttpWebRequest)sender; - string hostname = webreq.Address.Host; +#if NET8_0_OR_GREATER + var requestMessage = sender as HttpRequestMessage; + string hostname = requestMessage?.RequestUri?.Host ?? string.Empty; +#else + var webreq = sender as HttpWebRequest; + string hostname = webreq?.Address?.Host ?? string.Empty; +#endif string fingerprint = CommonCmdletFunctions.FingerprintPrettyString(certificate.GetCertHashString()); bool trusted = VerifyInAllStores(new X509Certificate2(certificate)); diff --git a/ocaml/sdk-gen/powershell/autogen/src/XenServerPowerShell.csproj b/ocaml/sdk-gen/powershell/autogen/src/XenServerPowerShell.csproj index 1fb6483bd34..35c2fc8fa42 100644 --- a/ocaml/sdk-gen/powershell/autogen/src/XenServerPowerShell.csproj +++ b/ocaml/sdk-gen/powershell/autogen/src/XenServerPowerShell.csproj @@ -1,7 +1,7 @@ 0.0.0 - net8.0;net6.0;net45 + net8.0;net45 Library True @@ -12,14 +12,8 @@ - - - - - False - $(MSBuildProgramFiles32)\Reference Assemblies\Microsoft\WindowsPowerShell\3.0\System.Management.Automation.dll - + diff --git a/ocaml/sdk-gen/powershell/dune b/ocaml/sdk-gen/powershell/dune index 7eb4d3e56d6..6fdee3e0fcf 100644 --- a/ocaml/sdk-gen/powershell/dune +++ b/ocaml/sdk-gen/powershell/dune @@ -1,38 +1,29 @@ (executable - (modes exe) - (name gen_powershell_binding) - (libraries - astring - CommonFunctions - - mustache - xapi-datamodel - ) -) + (modes exe) + (name gen_powershell_binding) + (libraries astring CommonFunctions mustache xapi-datamodel)) (rule - (alias generate) - (targets (dir autogen-out)) - (deps - (:x gen_powershell_binding.exe) - (source_tree templates) - (:sh ../windows-line-endings.sh) - (source_tree autogen) - ) - (action - (progn - (concurrent - (bash "cp -r autogen/ autogen-out/") - (run %{x}) - ) - (bash "rm autogen-out/.gitignore") - (bash "%{sh} autogen-out/") - )) -) + (alias generate) + (targets + (dir autogen-out)) + (deps + (:x gen_powershell_binding.exe) + (source_tree templates) + (:sh ../windows-line-endings.sh) + (source_tree autogen)) + (action + (progn + (progn + (bash "cp -r autogen/ autogen-out/") + (run %{x})) + (bash "rm autogen-out/.gitignore") + (bash "%{sh} autogen-out/")))) (data_only_dirs templates) + (install - (package xapi-sdk) - (section share_root) - (dirs (autogen-out as powershell)) -) + (package xapi-sdk) + (section share_root) + (dirs + (autogen-out as powershell))) diff --git a/ocaml/tests/bench/bechamel_simple_cli.ml b/ocaml/tests/bench/bechamel_simple_cli.ml index e40399cf04d..bcbd574f7f0 100644 --- a/ocaml/tests/bench/bechamel_simple_cli.ml +++ b/ocaml/tests/bench/bechamel_simple_cli.ml @@ -1,3 +1,17 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + open Bechamel open Toolkit @@ -83,11 +97,19 @@ let thread_workload ~before ~run ~after = a few times. Bechamel has both an iteration count and time limit, so this won't be a problem for slower benchmarks. *) -let limit = 10_000_000 +let default_limit = 10_000_000 -let benchmark ~instances tests = - let cfg = Benchmark.cfg ~limit ~quota:(Time.second 10.0) () in - Benchmark.all cfg instances tests +let benchmark ~instances cfg tests = + let n = List.length tests in + tests + |> List.to_seq + |> Seq.mapi (fun i test -> + let name = Test.Elt.name test in + Format.eprintf "Running benchmark %u/%u %s ...@?" (i + 1) n name ; + let results = Benchmark.run cfg instances test in + Format.eprintf "@." ; (name, results) + ) + |> Hashtbl.of_seq let analyze ~instances raw_results = let ols ~bootstrap = @@ -108,14 +130,13 @@ open Notty_unix let img (window, results) = Bechamel_notty.Multiple.image_of_ols_results ~rect:window ~predictor:Measure.run results - |> eol let not_workload measure = not (Measure.label measure = skip_label) -let run_and_print instances tests = - let results, _ = +let run_and_print cfg instances tests = + let results, raw_results = tests - |> benchmark ~instances + |> benchmark ~instances cfg |> analyze ~instances:(List.filter not_workload instances) in let window = @@ -127,27 +148,132 @@ let run_and_print instances tests = in img (window, results) |> eol |> output_image ; results - |> Hashtbl.iter @@ fun label results -> - if label = Measure.label Instance.monotonic_clock then - let units = Bechamel_notty.Unit.unit_of_label label in - results - |> Hashtbl.iter @@ fun name ols -> - Format.printf "%s (%s):@, %a@." name units Analyze.OLS.pp ols + |> Hashtbl.iter (fun label results -> + if label = Measure.label Instance.monotonic_clock then + let units = Bechamel_notty.Unit.unit_of_label label in + results + |> Hashtbl.iter @@ fun name ols -> + Format.printf "%s (%s):@, %a@." name units Analyze.OLS.pp ols + ) ; + (results, raw_results) -let cli ?(always = []) ?(workloads = []) tests = +let cli ~always ~workloads cfg tests store = let instances = always @ Instance.[monotonic_clock; minor_allocated; major_allocated] @ always in List.iter (fun i -> Bechamel_notty.Unit.add i (Measure.unit i)) instances ; - Format.printf "@,Running benchmarks (no workloads)@." ; - run_and_print instances tests ; - + Format.eprintf "@,Running benchmarks (no workloads)@." ; + let _, raw_results = run_and_print cfg instances tests in if workloads <> [] then ( - Format.printf "@,Running benchmarks (workloads)@." ; + Format.eprintf "@,Running benchmarks (workloads)@." ; List.iter (fun i -> Bechamel_notty.Unit.add i (Measure.unit i)) workloads ; (* workloads come first, so that we unpause them in time *) let instances = workloads @ instances @ workloads in - run_and_print instances tests - ) + let _, _ = run_and_print cfg instances tests in + () + ) ; + store + |> Option.iter @@ fun dir -> + let epoch = Unix.gettimeofday () in + raw_results + |> Hashtbl.iter @@ fun label results -> + let label = String.map (function '/' -> '_' | c -> c) label in + let dir = Filename.concat dir (Float.to_string epoch) in + let () = + try Unix.mkdir dir 0o700 + with Unix.Unix_error (Unix.EEXIST, _, _) -> () + in + + let file = Filename.concat dir (label ^ ".dat") in + Out_channel.with_open_text file @@ fun out -> + let label = Measure.label Instance.monotonic_clock in + results.Benchmark.lr + |> Array.iter @@ fun measurement -> + let repeat = Measurement_raw.run measurement in + let avg = Measurement_raw.get ~label measurement /. repeat in + (* ministat wants to compare individual measurements, but all we have is a sum. *) + Printf.fprintf out "%.16g\n" avg + +open Cmdliner + +let cli ?(always = []) ?(workloads = []) tests = + let tests = List.concat_map Test.elements tests in + let cmd = + let test_names = tests |> List.map (fun t -> (Test.Elt.name t, t)) in + let filtered = + let doc = + Printf.sprintf "Choose the benchmarks to run. $(docv) must be %s" + Arg.(doc_alts_enum test_names) + in + Arg.( + value + & pos_all (enum test_names) tests + & info [] ~absent:"all" ~doc ~docv:"BENCHMARK" + ) + and cfg = + let open Term.Syntax in + let+ limit = + Arg.( + value + & opt int default_limit + & info ["limit"] ~doc:"Maximum number of samples" ~docv:"SAMPLES" + ) + and+ quota = + Arg.( + value + & opt float 10.0 (* 1s is too short to reach high batch sizes *) + & info ["quota"] ~doc:"Maximum time per benchmark" ~docv:"SECONDS" + ) + and+ kde = + Arg.( + value + & opt (some int) None + & info ["kde"] ~doc:"Additional samples for Kernel Density Estimation" + ~docv:"SAMPLES" + ) + and+ stabilize = + Arg.( + value + & opt bool false + & info ["stabilize"] ~doc:"Stabilize the GC between measurements" + (* this actually makes measurements more noisy, not less + (although there'll be the ocasional outlier). + When stabilization is disabled we can instead get more measurements within the same amount of time, + which ultimately increases accuracy. + core_bench also has this disabled by default + *) + ) + and+ compaction = + Arg.( + value + & opt bool false + (* avoid large differences between runs (since we no longer stabilize the GC) *) + & info ["compaction"] ~doc:"Enable GC compaction" + ) + and+ start = + Arg.( + value + & opt int 5 (* small batches can have higher overhead: skip them *) + & info ["start"] ~doc:"Starting iteration count" ~docv:"COUNT" + ) + in + Benchmark.cfg ~limit + ~quota:Time.(second quota) + ~kde ~stabilize ~compaction ~start () + and store = + Arg.( + value + & opt (some dir) None + & info ["output-dir"; "d"] + ~doc: + "directory to save the raw results to. The output can be used by \ + ministat" + ~docv:"DIRECTORY" + ) + in + let info = Cmd.info "benchmark" ~doc:"Run benchmarks" in + Cmd.v info Term.(const (cli ~always ~workloads) $ cfg $ filtered $ store) + in + exit (Cmd.eval cmd) diff --git a/ocaml/tests/bench/bench_cached_reads.ml b/ocaml/tests/bench/bench_cached_reads.ml index e81a8991cb4..bcba2ed6cf3 100644 --- a/ocaml/tests/bench/bench_cached_reads.ml +++ b/ocaml/tests/bench/bench_cached_reads.ml @@ -8,7 +8,6 @@ let mutex_workload = Bechamel_simple_cli.thread_workload ~before:ignore ~after:ignore ~run let benchmarks = - Test.make_grouped ~name:"Cached reads" - [Test.make ~name:"Pool_role.is_master" (Staged.stage Pool_role.is_master)] + [Test.make ~name:"Pool_role.is_master" (Staged.stage Pool_role.is_master)] let () = Bechamel_simple_cli.cli ~workloads:[mutex_workload] benchmarks diff --git a/ocaml/tests/bench/bench_pool_field.ml b/ocaml/tests/bench/bench_pool_field.ml new file mode 100644 index 00000000000..bd34693a92f --- /dev/null +++ b/ocaml/tests/bench/bench_pool_field.ml @@ -0,0 +1,142 @@ +(* + * Copyright (C) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Bechamel + +let () = + Suite_init.harness_init () ; + Printexc.record_backtrace true ; + Debug.set_level Syslog.Emerg ; + Xapi_event.register_hooks () + +let date = "20250102T03:04:05Z" + +let json_dict = + [ + ("fingerprint_sha256", String.make 64 'd') + ; ("not_before", date) + ; ("not_after", date) + ; ("subject", String.make 100 'x') + ; ("san", String.make 50 'y') + ] + +let json_str = + Rpc.Dict (List.map (fun (k, v) -> (k, Rpc.rpc_of_string v)) json_dict) + |> Jsonrpc.to_string + +let __context = Test_common.make_test_database () + +let host = Test_common.make_host ~__context () + +let pool = Test_common.make_pool ~__context ~master:host () + +let () = + Db.Pool.set_license_server ~__context ~self:pool + ~value:[("jsontest", json_str)] ; + let open Xapi_database in + Db_ref.update_database + (Context.database_of __context) + (Db_cache_types.Database.register_callback "redo_log" + Redo_log.database_callback + ) + +let vm = Test_common.make_vm ~__context ~name_label:"test" () + +let get_all () : API.pool_t list = + Db.Pool.get_all_records ~__context |> List.map snd + +let all = get_all () + +let serialize () : Rpc.t list = all |> List.map API.rpc_of_pool_t + +let serialized = serialize () + +let deserialize () : API.pool_t list = serialized |> List.map API.pool_t_of_rpc + +let str_sexpr_json = SExpr.(string_of (String json_str)) + +let sexpr_of_json_string () = SExpr.(string_of (String json_str)) + +let str_of_sexpr_json () = SExpr.mkstring str_sexpr_json + +let date_of_iso8601 () = Clock.Date.of_iso8601 date + +let local_session_hook () = + Xapi_local_session.local_session_hook ~__context ~session_id:Ref.null + +let atomic = Atomic.make 0 + +let atomic_inc () = Atomic.incr atomic + +let mutex = Mutex.create () + +let locked_ref = ref 0 + +let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute + +let inc_locked () = incr locked_ref + +let inc_with_mutex () = with_lock mutex inc_locked + +let noop () = Sys.opaque_identity () + +let db_lock_uncontended () : unit = Xapi_database.Db_lock.with_lock noop + +let event = + let open Event_types in + { + id= "id" + ; ts= "1000" + ; ty= "test" + ; op= `add + ; reference= "test" + ; snapshot= Some (Rpc.Dict []) + } + +let test_rpc_of_event () = Event_types.rpc_of_event event + +let counter = Atomic.make 0 + +let test_set_vm_nvram () : unit = + let c = Atomic.fetch_and_add counter 1 mod 0x7F in + (* use different value each iteration, otherwise it becomes a noop *) + Db.VM.set_NVRAM ~__context ~self:vm + ~value:[("test", String.make 32768 (Char.chr @@ c))] + +let test_db_pool_write () = + let c = Atomic.fetch_and_add counter 1 mod 0x7F in + Db.Pool.set_tags ~__context ~self:pool ~value:[String.make 16 (Char.chr @@ c)] + +let test_db_pool_read () = Db.Pool.get_tags ~__context ~self:pool + +let benchmarks = + [ + Test.make ~name:"local_session_hook" (Staged.stage local_session_hook) + ; Test.make ~name:"Date.of_iso8601" (Staged.stage date_of_iso8601) + ; Test.make ~name:"sexpr_of_json_string" (Staged.stage sexpr_of_json_string) + ; Test.make ~name:"str_of_sexp_json" (Staged.stage str_of_sexpr_json) + ; Test.make ~name:"Db.Pool.get_all_records" (Staged.stage get_all) + ; Test.make ~name:"pool_t -> Rpc.t" (Staged.stage serialize) + ; Test.make ~name:"Rpc.t -> pool_t" (Staged.stage deserialize) + ; Test.make ~name:"Atomic.incr" (Staged.stage atomic_inc) + ; Test.make ~name:"Mutex+incr" (Staged.stage inc_with_mutex) + ; Test.make ~name:"Db_lock.with_lock uncontended" + (Staged.stage db_lock_uncontended) + ; Test.make ~name:"rpc_of_event" (Staged.stage test_rpc_of_event) + ; Test.make ~name:"Db.Pool.set_tags" (Staged.stage test_db_pool_write) + ; Test.make ~name:"Db.Pool.get_tags" (Staged.stage test_db_pool_read) + ; Test.make ~name:"Db.VM.set_NVRAM" (Staged.stage test_set_vm_nvram) + ] + +let () = Bechamel_simple_cli.cli benchmarks diff --git a/ocaml/tests/bench/bench_throttle2.ml b/ocaml/tests/bench/bench_throttle2.ml index 50582eff4cc..b4f61173420 100644 --- a/ocaml/tests/bench/bench_throttle2.ml +++ b/ocaml/tests/bench/bench_throttle2.ml @@ -66,21 +66,20 @@ let run_tasks'' n (__context, tasks) = Thread.join t let benchmarks = - Test.make_grouped ~name:"Task latency" - [ - Test.make_indexed_with_resource ~name:"task complete+wait latency" - ~args:[1; 10; 100] Test.multiple ~allocate:allocate_tasks - ~free:free_tasks (fun n -> Staged.stage (run_tasks n) - ) - ; Test.make_indexed_with_resource ~name:"task complete+wait all latency" - ~args:[1; 10; 100] Test.multiple ~allocate:allocate_tasks - ~free:free_tasks (fun n -> Staged.stage (run_tasks' n) - ) - ; Test.make_indexed_with_resource - ~name:"task complete+wait all latency (thread)" ~args:[1; 10; 100] - Test.multiple ~allocate:allocate_tasks ~free:free_tasks (fun n -> - Staged.stage (run_tasks'' n) - ) - ] + [ + Test.make_indexed_with_resource ~name:"task complete+wait latency" + ~args:[1; 10; 100] Test.multiple ~allocate:allocate_tasks ~free:free_tasks + (fun n -> Staged.stage (run_tasks n) + ) + ; Test.make_indexed_with_resource ~name:"task complete+wait all latency" + ~args:[1; 10; 100] Test.multiple ~allocate:allocate_tasks ~free:free_tasks + (fun n -> Staged.stage (run_tasks' n) + ) + ; Test.make_indexed_with_resource + ~name:"task complete+wait all latency (thread)" ~args:[1; 10; 100] + Test.multiple ~allocate:allocate_tasks ~free:free_tasks (fun n -> + Staged.stage (run_tasks'' n) + ) + ] let () = Bechamel_simple_cli.cli benchmarks diff --git a/ocaml/tests/bench/bench_tracing.ml b/ocaml/tests/bench/bench_tracing.ml index eebe6e6aef2..8db30cfc220 100644 --- a/ocaml/tests/bench/bench_tracing.ml +++ b/ocaml/tests/bench/bench_tracing.ml @@ -25,7 +25,7 @@ let export_thread = (* need to ensure this isn't running outside the benchmarked section, or bechamel might fail with 'Failed to stabilize GC' *) - let after _ = Tracing_export.flush_and_exit () in + let after _ = Tracing_export.flush_and_exit ~max_wait:0. () in Bechamel_simple_cli.thread_workload ~before:Tracing_export.main ~after ~run:ignore @@ -52,7 +52,7 @@ let allocate () = let free t = Tracing.TracerProvider.destroy ~uuid ; - Tracing_export.flush_and_exit () ; + Tracing_export.flush_and_exit ~max_wait:0. () ; Thread.join t let test_tracing_on ?(overflow = false) ~name f = @@ -64,24 +64,23 @@ let test_tracing_on ?(overflow = false) ~name f = allocate () and free t = if overflow then ( - Tracing.Spans.set_max_spans Bechamel_simple_cli.limit ; - Tracing.Spans.set_max_traces Bechamel_simple_cli.limit + Tracing.Spans.set_max_spans Bechamel_simple_cli.default_limit ; + Tracing.Spans.set_max_traces Bechamel_simple_cli.default_limit ) ; free t in Test.make_with_resource ~name ~allocate ~free Test.uniq f let benchmarks = - Tracing.Spans.set_max_spans Bechamel_simple_cli.limit ; - Tracing.Spans.set_max_traces Bechamel_simple_cli.limit ; - Test.make_grouped ~name:"tracing" - [ - Test.make ~name:"overhead(off)" (Staged.stage trace_test_off) - ; test_tracing_on ~name:"overhead(on, no span)" (Staged.stage trace_test_off) - ; test_tracing_on ~name:"overhead(on, create span)" - (Staged.stage trace_test_span) - ; test_tracing_on ~overflow:true ~name:"max span overflow" - (Staged.stage trace_test_span) - ] + Tracing.Spans.set_max_spans Bechamel_simple_cli.default_limit ; + Tracing.Spans.set_max_traces Bechamel_simple_cli.default_limit ; + [ + Test.make ~name:"overhead(off)" (Staged.stage trace_test_off) + ; test_tracing_on ~name:"overhead(on, no span)" (Staged.stage trace_test_off) + ; test_tracing_on ~name:"overhead(on, create span)" + (Staged.stage trace_test_span) + ; test_tracing_on ~overflow:true ~name:"max span overflow" + (Staged.stage trace_test_span) + ] let () = Bechamel_simple_cli.cli ~always:[export_thread] ~workloads benchmarks diff --git a/ocaml/tests/bench/bench_uuid.ml b/ocaml/tests/bench/bench_uuid.ml index f13118e48db..53e817211a6 100644 --- a/ocaml/tests/bench/bench_uuid.ml +++ b/ocaml/tests/bench/bench_uuid.ml @@ -1,10 +1,9 @@ open Bechamel let benchmarks = - Test.make_grouped ~name:"uuidx creation" - [ - Test.make ~name:"Uuidx.make_uuid_urnd" (Staged.stage Uuidx.make_uuid_urnd) - ; Test.make ~name:"Uuidx.make" (Staged.stage Uuidx.make) - ] + [ + Test.make ~name:"Uuidx.make_uuid_urnd" (Staged.stage Uuidx.make_uuid_urnd) + ; Test.make ~name:"Uuidx.make" (Staged.stage Uuidx.make) + ] let () = Bechamel_simple_cli.cli benchmarks diff --git a/ocaml/tests/bench/bench_vdi_allowed_operations.ml b/ocaml/tests/bench/bench_vdi_allowed_operations.ml new file mode 100644 index 00000000000..5b13084370a --- /dev/null +++ b/ocaml/tests/bench/bench_vdi_allowed_operations.ml @@ -0,0 +1,58 @@ +open Bechamel + +module D = Debug.Make (struct let name = __MODULE__ end) + +(* tested configuration limits *) +let max_hosts = 64 + +let max_vms = (*2400*) 240 + +let max_vbds = (* 255 *) 25 + +let () = + (* a minimal harness init *) + Suite_init.harness_init () ; + (* don't spam the logs in [allocate] *) + Debug.set_level Syslog.Info + +let allocate () = + let open Test_common in + let __context = make_test_database () in + let (_sm_ref : API.ref_SM) = make_sm ~__context () in + let sr_ref = make_sr ~__context () in + let (_ : API.ref_PBD array) = + Array.init max_hosts (fun _ -> make_pbd ~__context ~sR:sr_ref ()) + in + let vms = + Array.init max_vms @@ fun _ -> + let vm_ref = make_vm ~__context () in + Array.init (max_vbds / 2) @@ fun _ -> + let vdi_ref = make_vdi ~__context ~sR:sr_ref () in + let vbd_ref = + make_vbd ~__context ~vDI:vdi_ref ~vM:vm_ref ~currently_attached:true + ~mode:`RO () + in + let vdi_ref' = make_vdi ~__context ~sR:sr_ref () in + let vbd_ref' = + make_vbd ~__context ~vDI:vdi_ref' ~vM:vm_ref ~currently_attached:true + ~mode:`RW () + in + (vdi_ref, vbd_ref, vdi_ref', vbd_ref') + in + D.info "Created test database" ; + (__context, vms) + +let test_vdi_update_allowed_operations (__context, vm_disks) = + let _, _, vdi_ref, vbd_ref = vm_disks.(0).(0) in + Db.VBD.set_currently_attached ~__context ~self:vbd_ref ~value:true ; + Xapi_vdi.update_allowed_operations ~__context ~self:vdi_ref ; + Db.VBD.set_currently_attached ~__context ~self:vbd_ref ~value:false ; + Xapi_vdi.update_allowed_operations ~__context ~self:vdi_ref + +let benchmarks = + [ + Test.make_with_resource ~name:"VDI" ~allocate ~free:ignore Test.uniq + (Staged.stage test_vdi_update_allowed_operations) + ] + +let () = Bechamel_simple_cli.cli benchmarks diff --git a/ocaml/tests/bench/dune b/ocaml/tests/bench/dune index 0c088389dfe..fe0af458c14 100644 --- a/ocaml/tests/bench/dune +++ b/ocaml/tests/bench/dune @@ -1,4 +1,31 @@ (executables - (names bench_tracing bench_uuid bench_throttle2 bench_cached_reads) - (libraries tracing bechamel bechamel-notty notty.unix tracing_export threads.posix fmt notty uuid xapi_aux tests_common log xapi_internal) -) + (names + bench_tracing + bench_uuid + bench_throttle2 + bench_cached_reads + bench_vdi_allowed_operations + bench_pool_field) + (libraries + dune-build-info + tracing + bechamel + bechamel-notty + clock + cmdliner + notty.unix + tracing_export + threads.posix + rpclib.core + rpclib.json + sexpr + fmt + notty + uuid + xapi_aux + tests_common + log + xapi_database + xapi_datamodel + xapi_internal + xapi-stdext-threads)) diff --git a/ocaml/tests/binpack_test.ml b/ocaml/tests/binpack_test.ml index 27ab15e9f33..4544d7ffcb8 100644 --- a/ocaml/tests/binpack_test.ml +++ b/ocaml/tests/binpack_test.ml @@ -45,10 +45,7 @@ let check_plan config dead_hosts plan = let memory_remaining = account config.hosts config.vms plan in (* List.iter (fun mem -> Printf.printf "%Ld\n" mem) free; *) (* No host should be overcommitted: *) - if - List.fold_left ( || ) false - (List.map (fun x -> x < 0L) (List.map snd memory_remaining)) - then + if List.exists (fun (_, x) -> x < 0L) memory_remaining then raise BadPlan ; (* All failed VMs should be restarted: *) let failed_vms = get_failed_vms config dead_hosts in diff --git a/ocaml/tests/common/test_common.ml b/ocaml/tests/common/test_common.ml index 7b5484a02ba..ec058006851 100644 --- a/ocaml/tests/common/test_common.ml +++ b/ocaml/tests/common/test_common.ml @@ -170,13 +170,16 @@ let make_host ~__context ?(uuid = make_uuid ()) ?(name_label = "host") ?(external_auth_service_name = "") ?(external_auth_configuration = []) ?(license_params = []) ?(edition = "free") ?(license_server = []) ?(local_cache_sr = Ref.null) ?(chipset_info = []) ?(ssl_legacy = false) - ?(last_software_update = Date.epoch) ?(last_update_hash = "") () = + ?(last_software_update = Date.epoch) ?(last_update_hash = "") + ?(ssh_enabled = true) ?(ssh_enabled_timeout = 0L) ?(ssh_expiry = Date.epoch) + ?(console_idle_timeout = 0L) ?(ssh_auto_mode = false) () = let host = Xapi_host.create ~__context ~uuid ~name_label ~name_description ~hostname ~address ~external_auth_type ~external_auth_service_name ~external_auth_configuration ~license_params ~edition ~license_server ~local_cache_sr ~chipset_info ~ssl_legacy ~last_software_update - ~last_update_hash + ~last_update_hash ~ssh_enabled ~ssh_enabled_timeout ~ssh_expiry + ~console_idle_timeout ~ssh_auto_mode in Db.Host.set_cpu_info ~__context ~self:host ~value:default_cpu_info ; host @@ -215,7 +218,8 @@ let make_host2 ~__context ?(ref = Ref.make ()) ?(uuid = make_uuid ()) ~last_software_update:(Xapi_host.get_servertime ~__context ~host:ref) ~recommended_guidances:[] ~latest_synced_updates_applied:`unknown ~pending_guidances_recommended:[] ~pending_guidances_full:[] - ~last_update_hash:"" ; + ~last_update_hash:"" ~ssh_enabled:true ~ssh_enabled_timeout:0L + ~ssh_expiry:Date.epoch ~console_idle_timeout:0L ~ssh_auto_mode:false ; ref let make_pif ~__context ~network ~host ?(device = "eth0") @@ -300,7 +304,8 @@ let make_pool ~__context ~master ?(name_label = "") ?(name_description = "") ?(telemetry_next_collection = API.Date.epoch) ?(last_update_sync = API.Date.epoch) ?(update_sync_frequency = `daily) ?(update_sync_day = 0L) ?(update_sync_enabled = false) - ?(recommendations = []) ?(license_server = []) () = + ?(recommendations = []) ?(license_server = []) + ?(ha_reboot_vm_on_internal_shutdown = true) () = let pool_ref = Ref.make () in Db.Pool.create ~__context ~ref:pool_ref ~uuid:(make_uuid ()) ~name_label ~name_description ~master ~default_SR ~suspend_image_SR ~crash_dump_SR @@ -320,7 +325,8 @@ let make_pool ~__context ~master ?(name_label = "") ?(name_description = "") ~local_auth_max_threads:8L ~ext_auth_max_threads:8L ~ext_auth_cache_enabled:false ~ext_auth_cache_size:50L ~ext_auth_cache_expiry:300L ~update_sync_frequency ~update_sync_day - ~update_sync_enabled ~recommendations ~license_server ; + ~update_sync_enabled ~recommendations ~license_server + ~ha_reboot_vm_on_internal_shutdown ; pool_ref let default_sm_features = @@ -624,12 +630,13 @@ let make_cluster_and_cluster_host ~__context ?(ref = Ref.make ()) ?(token_timeout = Constants.default_token_timeout_s) ?(token_timeout_coefficient = Constants.default_token_timeout_coefficient_s) ?(cluster_config = []) ?(other_config = []) ?(host = Ref.null) - ?(is_quorate = false) ?(quorum = 0L) ?(live_hosts = 0L) () = + ?(is_quorate = false) ?(quorum = 0L) ?(live_hosts = 0L) + ?(expected_hosts = 0L) () = Db.Cluster.create ~__context ~ref ~uuid ~cluster_token ~pending_forget:[] ~cluster_stack ~cluster_stack_version ~allowed_operations ~current_operations ~pool_auto_join ~token_timeout ~token_timeout_coefficient ~cluster_config ~other_config ~is_quorate ~quorum - ~live_hosts ; + ~live_hosts ~expected_hosts ; let cluster_host_ref = make_cluster_host ~__context ~cluster:ref ~host ~pIF () in diff --git a/ocaml/tests/dune b/ocaml/tests/dune index c4b590c6cb8..9f4ad45b9d9 100644 --- a/ocaml/tests/dune +++ b/ocaml/tests/dune @@ -7,7 +7,7 @@ test_cluster_host test_cluster test_pusb test_network_sriov test_client test_valid_ref_list suite_alcotest_server test_vm_placement test_vm_helpers test_repository test_repository_helpers - test_ref test_xapi_helpers test_vm_group test_host_driver_helpers + test_ref test_xapi_helpers test_vm_group test_livepatch test_rpm test_updateinfo test_storage_smapiv1_wrapper test_storage_quicktest test_observer test_pool_periodic_update_sync test_pkg_mgr test_tar_ext test_pool_repository)) (libraries @@ -15,7 +15,7 @@ angstrom astring cstruct - + digestif fmt http_lib httpsvr @@ -50,7 +50,6 @@ xapi_xenopsd xapi_cli_server xapi_database - xapi_host_driver_helpers xapi_internal xml-light2 ) @@ -78,19 +77,18 @@ ) ) - (tests (names test_vm_helpers test_vm_placement test_network_sriov test_vdi_cbt test_bounded_psq test_auth_cache test_clustering test_pusb test_daemon_manager test_repository test_repository_helpers test_livepatch test_rpm test_updateinfo test_pool_periodic_update_sync test_pkg_mgr - test_xapi_helpers test_tar_ext test_pool_repository test_host_driver_helpers) + test_xapi_helpers test_tar_ext test_pool_repository) (package xapi) (modes exe) (modules test_vm_helpers test_vm_placement test_network_sriov test_vdi_cbt test_bounded_psq test_auth_cache test_event test_clustering test_cluster_host test_cluster test_pusb test_daemon_manager test_repository test_repository_helpers test_livepatch test_rpm test_updateinfo test_pool_periodic_update_sync test_pkg_mgr - test_xapi_helpers test_tar_ext test_pool_repository test_host_driver_helpers) + test_xapi_helpers test_tar_ext test_pool_repository) (libraries alcotest bos @@ -121,13 +119,13 @@ xapi-types xapi_cli_server xapi_database - xapi_host_driver_helpers xapi_internal xml-light2 yojson ) (preprocess (per_module ((pps ppx_deriving_rpc) Test_cluster_host))) ) + (test (name test_storage_smapiv1_wrapper) (modes exe) @@ -171,27 +169,6 @@ (action (run ./check-no-xenctrl %{x})) ) -(rule - (alias runtest) - (package xapi) - (targets - .note.XenServer - .note.Linux - .note.gnu.build-id - .note.XenServerTwo - ) - (deps - (:asm - test_data/xenserver.s - test_data/xenserver_two_notes.s - test_data/linux.s - test_data/buildid.s - ) - (:script test_data/gen_notes.sh) - ) - (action (bash "%{script} %{asm}")) -) - (env (_ (env-vars (XAPI_TEST 1)))) ; disassemble, but without sources diff --git a/ocaml/tests/test_certificates.ml b/ocaml/tests/test_certificates.ml index 96017d3156a..dcd018e0993 100644 --- a/ocaml/tests/test_certificates.ml +++ b/ocaml/tests/test_certificates.ml @@ -13,7 +13,7 @@ let pp_hash_test = (fun (hashable, expected) -> let test_hash () = let digest = - Cstruct.of_string hashable |> Mirage_crypto.Hash.digest `SHA256 + Digestif.SHA256.(digest_string hashable |> to_raw_string) in Alcotest.(check string) "fingerprints must match" expected diff --git a/ocaml/tests/test_cluster.ml b/ocaml/tests/test_cluster.ml index b42621a300f..9c945776cd9 100644 --- a/ocaml/tests/test_cluster.ml +++ b/ocaml/tests/test_cluster.ml @@ -34,9 +34,11 @@ let test_clusterd_rpc ~__context call = | "Observer.init" | "Observer.set_trace_log_dir" | "Observer.set_export_interval" + | "Observer.set_export_chunk_size" | "Observer.set_host_id" | "Observer.set_max_traces" | "Observer.set_max_spans" + | "Observer.set_max_depth" | "Observer.set_max_file_size" | "Observer.set_compress_tracing_files" ) , _ ) -> @@ -69,6 +71,7 @@ let test_clusterd_rpc ~__context call = ; num_times_booted= 1 ; is_quorate= true ; total_votes= 1 + ; expected_votes= 1 ; quorum= 1 ; quorum_members= Some [me] ; is_running= true diff --git a/ocaml/tests/test_cluster_host.ml b/ocaml/tests/test_cluster_host.ml index 0887f72ade5..17673d6c0fd 100644 --- a/ocaml/tests/test_cluster_host.ml +++ b/ocaml/tests/test_cluster_host.ml @@ -24,7 +24,7 @@ let create_cluster ~__context pool_auto_join = ~token_timeout_coefficient:Constants.default_token_timeout_coefficient_s ~allowed_operations:[] ~current_operations:[] ~pool_auto_join ~cluster_config:[] ~other_config:[] ~pending_forget:[] ~is_quorate:false - ~quorum:0L ~live_hosts:0L ; + ~quorum:0L ~live_hosts:0L ~expected_hosts:0L ; cluster_ref let check_cluster_option = diff --git a/ocaml/tests/test_clustering.ml b/ocaml/tests/test_clustering.ml index 9be97c5fdb5..edd33cb6025 100644 --- a/ocaml/tests/test_clustering.ml +++ b/ocaml/tests/test_clustering.ml @@ -581,11 +581,21 @@ let test_disallow_unplug_during_cluster_host_create () = let key = Context.get_task_id __context |> Ref.string_of in Db.Cluster.add_to_current_operations ~__context ~self:cluster ~key ~value in - let check_disallow_unplug_false_fails self msg = + let check_disallow_unplug_false_fails self op msg = + let op_ref, _ = + List.hd (Db.Cluster.get_current_operations ~__context ~self:cluster) + in Alcotest.check_raises msg Api_errors.( Server_error - (other_operation_in_progress, ["Cluster"; Ref.string_of cluster]) + ( other_operation_in_progress + , [ + "Cluster" + ; Ref.string_of cluster + ; API.cluster_operation_to_string op + ; op_ref + ] + ) ) (fun () -> Xapi_pif.set_disallow_unplug ~__context ~self ~value:false) in @@ -598,14 +608,14 @@ let test_disallow_unplug_during_cluster_host_create () = let test_with_current op = Xapi_pif.set_disallow_unplug ~__context ~self:pIF ~value:true ; add_op op ; - check_disallow_unplug_false_fails pIF + check_disallow_unplug_false_fails pIF op "disallow_unplug cannot be set to false during cluster_host creation or \ enable on same PIF" ; let other_pif = T.make_pif ~__context ~network ~host () in check_successful_disallow_unplug true other_pif "Should always be able to set disallow_unplug:true regardless of \ clustering operations" ; - check_disallow_unplug_false_fails other_pif + check_disallow_unplug_false_fails other_pif op "disallow_unplug cannot be set to false during cluster_host creation or \ enable on any PIF" ; let key = Context.get_task_id __context |> Ref.string_of in diff --git a/ocaml/tests/test_data/buildid.s b/ocaml/tests/test_data/buildid.s deleted file mode 100644 index 75f77766980..00000000000 --- a/ocaml/tests/test_data/buildid.s +++ /dev/null @@ -1,9 +0,0 @@ -.section ".note.gnu.build-id", "a" - .p2align 2 - .long 1f - 0f # name size (not including padding) - .long 3f - 2f # desc size (not including padding) - .long 0x1 # type -0: .asciz "gnu.build-id" # name -1: .p2align 2 -2: .long 0x000000 # desc -3: .p2align 2 diff --git a/ocaml/tests/test_data/gen_notes.sh b/ocaml/tests/test_data/gen_notes.sh deleted file mode 100755 index 9b173bd31da..00000000000 --- a/ocaml/tests/test_data/gen_notes.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -# -# Copyright (c) Cloud Software Group, Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published -# by the Free Software Foundation; version 2.1 only. with the special -# exception on linking described in file LICENSE. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. - -elf_file=test_data/xenserver_elf_file -as "$@" -o $elf_file - -sections=$(readelf -n $elf_file | grep -Po "(?<=Displaying notes found in: ).*") -for dep in $sections; do - objcopy "$elf_file" "$dep" --only-section="$dep" -O binary -done - diff --git a/ocaml/tests/test_data/linux.s b/ocaml/tests/test_data/linux.s deleted file mode 100644 index ca106e94af7..00000000000 --- a/ocaml/tests/test_data/linux.s +++ /dev/null @@ -1,9 +0,0 @@ -.section ".note.Linux", "a" - .p2align 2 - .long 1f - 0f # name size (not including padding) - .long 3f - 2f # desc size (not including padding) - .long 0x257 # type -0: .asciz "Linux" # name -1: .p2align 2 -2: .asciz "4.19.0+1" # desc -3: .p2align 2 diff --git a/ocaml/tests/test_data/xenserver.s b/ocaml/tests/test_data/xenserver.s deleted file mode 100644 index f44575ce5eb..00000000000 --- a/ocaml/tests/test_data/xenserver.s +++ /dev/null @@ -1,9 +0,0 @@ -.section ".note.XenServer", "a" - .p2align 2 - .long 1f - 0f # name size (not including padding) - .long 3f - 2f # desc size (not including padding) - .long 0x1 # type -0: .asciz "XenServer" # name -1: .p2align 2 -2: .asciz "v2.1.3+0.1fix" # desc -3: .p2align 2 diff --git a/ocaml/tests/test_data/xenserver_two_notes.s b/ocaml/tests/test_data/xenserver_two_notes.s deleted file mode 100644 index cbde4916dd5..00000000000 --- a/ocaml/tests/test_data/xenserver_two_notes.s +++ /dev/null @@ -1,20 +0,0 @@ -.section ".note.XenServerTwo", "a" - .p2align 2 - .long 1f - 0f # name size (not including padding) - .long 3f - 2f # desc size (not including padding) - .long 0x2 # type -0: .asciz "XenServer" # name -1: .p2align 2 -2: .asciz "Built on December 25th" # desc -3: .p2align 2 - -.section ".note.XenServerTwo", "a" - .p2align 2 - .long 1f - 0f # name size (not including padding) - .long 3f - 2f # desc size (not including padding) - .long 0x1 # type -0: .asciz "XenServer" # name -1: .p2align 2 -2: .asciz "2.0.0-rc.2" # desc -3: .p2align 2 - diff --git a/ocaml/tests/test_extauth_plugin_ADwinbind.ml b/ocaml/tests/test_extauth_plugin_ADwinbind.ml index a0180ee5e25..6b3e58e3b34 100644 --- a/ocaml/tests/test_extauth_plugin_ADwinbind.ml +++ b/ocaml/tests/test_extauth_plugin_ADwinbind.ml @@ -219,6 +219,27 @@ let test_parse_wbinfo_uid_info = ; gecos= {|ladmin|} } ) + (* XSI-1901: output of customer environment, has `:` in the gecos, + other fields does not likely contain it *) + ; ( {|HVS\udaadmin:*:3000000:3000000:ADMIN: Dalsem, Ulric:/home/HVS/udaadmin:/bin/bash|} + , Ok + { + user_name= {|HVS\udaadmin|} + ; uid= 3000000 + ; gid= 3000000 + ; gecos= {|ADMIN: Dalsem, Ulric|} + } + ) + (* Multiple `:` in gecos *) + ; ( {|HVS\udaadmin:*:3000000:3000000:ADMIN: Dalsem, Ulric, POOL OP: udaadmin:/home/HVS/udaadmin:/bin/bash|} + , Ok + { + user_name= {|HVS\udaadmin|} + ; uid= 3000000 + ; gid= 3000000 + ; gecos= {|ADMIN: Dalsem, Ulric, POOL OP: udaadmin|} + } + ) ; ( {|CONNAPP\locked:*:3000004:3000174::/home/CONNAPP/locked:/bin/bash|} , Ok {user_name= {|CONNAPP\locked|}; uid= 3000004; gid= 3000174; gecos= ""} @@ -499,6 +520,106 @@ let test_wbinfo_exception_of_stderr = in matrix |> List.map @@ fun (inp, exp) -> ("", `Quick, check inp exp) +let test_add_ipv4_localhost_to_hosts = + let open Extauth_plugin_ADwinbind in + let check inp exp () = + let msg = + Printf.sprintf "%s -> %s" (String.concat "\n" inp) (String.concat "\n" exp) + in + let actual = + HostsConfIPv4.join ~name:"hostname" ~domain:"domain" ~lines:inp + in + Alcotest.(check @@ list string) msg exp actual + in + let matrix = + [ + ( [ + "127.0.0.1 localhost localhost.localdomain localhost4 \ + localhost4.localdomain4" + ] + , [ + "127.0.0.1 hostname.domain hostname localhost \ + localhost.localdomain localhost4 localhost4.localdomain4" + ] + ) + ; ( ["127.0.0.1 localhost hostname hostname.domain localhost.localdomain"] + , ["127.0.0.1 hostname.domain hostname localhost localhost.localdomain"] + ) + ; ( ["192.168.0.1 some_host"] + , ["127.0.0.1 hostname.domain hostname"; "192.168.0.1 some_host"] + ) + ; ([], ["127.0.0.1 hostname.domain hostname"]) + ] + in + matrix |> List.map @@ fun (inp, exp) -> ("", `Quick, check inp exp) + +let test_add_ipv4_and_ipv6_localhost_to_hosts = + let open Extauth_plugin_ADwinbind in + let check inp exp () = + let msg = + Printf.sprintf "%s -> %s" (String.concat "\n" inp) (String.concat "\n" exp) + in + let actual = + HostsConfIPv6.join ~name:"hostname" ~domain:"domain" ~lines:inp + |> fun lines -> + HostsConfIPv4.join ~name:"hostname" ~domain:"domain" ~lines + in + Alcotest.(check @@ list string) msg exp actual + in + let matrix = + [ + ( ["127.0.0.1 localhost"] + , [ + "::1 hostname.domain hostname" + ; "127.0.0.1 hostname.domain hostname localhost" + ] + ) + ; ( ["127.0.0.1 localhost"; "::1 localhost"] + , [ + "127.0.0.1 hostname.domain hostname localhost" + ; "::1 hostname.domain hostname localhost" + ] + ) + ; ( [] + , ["127.0.0.1 hostname.domain hostname"; "::1 hostname.domain hostname"] + ) + ] + in + matrix |> List.map @@ fun (inp, exp) -> ("", `Quick, check inp exp) + +let test_remove_ipv4_localhost_from_hosts = + let open Extauth_plugin_ADwinbind in + let check inp exp () = + let msg = + Printf.sprintf "%s -> %s" (String.concat "\n" inp) (String.concat "\n" exp) + in + let actual = + HostsConfIPv4.leave ~name:"hostname" ~domain:"domain" ~lines:inp + in + Alcotest.(check @@ list string) msg exp actual + in + let matrix = + [ + ( [ + "127.0.0.1 localhost localhost.localdomain localhost4 \ + localhost4.localdomain4" + ] + , [ + "127.0.0.1 localhost localhost.localdomain localhost4 \ + localhost4.localdomain4" + ] + ) + ; ( ["127.0.0.1 localhost hostname hostname.domain localhost.localdomain"] + , ["127.0.0.1 localhost localhost.localdomain"] + ) + ; (["127.0.0.1 hostname hostname.domain"], []) + ; ( ["192.168.0.1 some_host"; "127.0.0.1 localhost hostname"] + , ["192.168.0.1 some_host"; "127.0.0.1 localhost"] + ) + ] + in + matrix |> List.map @@ fun (inp, exp) -> ("", `Quick, check inp exp) + let tests = [ ("ADwinbind:extract_ou_config", ExtractOuConfig.tests) @@ -512,4 +633,13 @@ let tests = ; ( "ADwinbind:test_wbinfo_exception_of_stderr" , test_wbinfo_exception_of_stderr ) + ; ( "ADwinbind:test_add_ipv4_localhost_to_hosts" + , test_add_ipv4_localhost_to_hosts + ) + ; ( "ADwinbind:test_remove_ipv4_localhost_from_hosts" + , test_remove_ipv4_localhost_from_hosts + ) + ; ( "ADwinbind:test_add_ipv4_and_ipv6_localhost_to_hosts" + , test_add_ipv4_and_ipv6_localhost_to_hosts + ) ] diff --git a/ocaml/tests/test_guest_agent.ml b/ocaml/tests/test_guest_agent.ml index 6d9c7d8f40b..6bc0f227c7b 100644 --- a/ocaml/tests/test_guest_agent.ml +++ b/ocaml/tests/test_guest_agent.ml @@ -285,6 +285,7 @@ module Initial_guest_metrics = Generic.MakeStateless (struct Xapi_guest_agent.get_initial_guest_metrics (lookup tree) (list tree) in guest_metrics.Xapi_guest_agent.networks + @ guest_metrics.Xapi_guest_agent.pv_drivers_version let tests = `QuickAndAutoDocumented @@ -465,6 +466,116 @@ module Initial_guest_metrics = Generic.MakeStateless (struct ] , [] ) + ; (* windows pv driver versions parsing *) + ( [ + ("drivers/0", "XenServer XENBUS 9.1.9.105 ") + ; ("drivers/1", "XenServer XENVBD 9.1.8.79 ") + ; ("drivers/2", "XenServer XENVIF 9.1.12.101 ") + ; ("drivers/3", "XenServer XENIFACE 9.1.10.87 ") + ; ("drivers/4", "XenServer XENNET 9.1.7.65 ") + ] + , [ + ("micro", "-1") + ; ("xennet", "XenServer 9.1.7.65 ") + ; ("xeniface", "XenServer 9.1.10.87 ") + ; ("xenvif", "XenServer 9.1.12.101 ") + ; ("xenvbd", "XenServer 9.1.8.79 ") + ; ("xenbus", "XenServer 9.1.9.105 ") + ] + ) + ; ( [ + ("drivers/0", "XenServer XENBUS 9.1.9.105 (DEBUG) (MOREDEBUG)") + ; ("drivers/2", "XCP_ng XENVIF 9.1.12.101 ") + ] + , [ + ("micro", "-1") + ; ("xenvif", "XCP_ng 9.1.12.101 ") + ; ("xenbus", "XenServer 9.1.9.105 (DEBUG) (MOREDEBUG)") + ] + ) + ] +end) + +module Services = Generic.MakeStateless (struct + module Io = struct + type input_t = (string * string) list + + type output_t = (string * string) list + + let string_of_input_t = Test_printers.(assoc_list string string) + + let string_of_output_t = Test_printers.(assoc_list string string) + end + + (* prototype funtions lookup and list are in Xapi_xenops.ml::update_vm *) + let lookup state key = List.assoc_opt key state + + let list_subkeys state dir = + if dir = "" then + [] + else + let dir = + if dir.[0] = '/' then + String.sub dir 1 (String.length dir - 1) + else + dir + in + let results = + List.filter_map + (fun (path, _) -> + if String.starts_with ~prefix:dir path then + let rest = + String.sub path (String.length dir) + (String.length path - String.length dir) + in + let is_sep = function '/' -> true | _ -> false in + match Astring.String.fields ~empty:false ~is_sep rest with + | x :: _ -> + Some x + | _ -> + None + else + None + ) + state + |> Xapi_stdext_std.Listext.List.setify + in + results + + let transform input = + Xapi_guest_agent.get_guest_services (lookup input) (list_subkeys input) + + let tests = + `QuickAndAutoDocumented + [ + (* no data/service *) + ([("data/key1", "v1"); ("data/key2", "v2")], []) + ; (* less than two depth in data/service *) + ([("data/service/key1", "v1"); ("data/service/key2", "v2")], []) + ; (* beyond two depth in data/service *) + ( [ + ("data/service/service-a/sub/key1", "sab-v1") + ; ("data/service/service-a/sub/key2", "sab-v2") + ] + , [("service-a/sub", "")] + ) + ; (* normal case *) + ( [ + ("data/service", "") + ; ("data/service/service-a", "") + ; ("data/service/service-b", "") + ; ("data/service/service-a/key1", "sa-v1") + ; ("data/service/service-a/key2", "sa-v2") + ; ("data/service/service-b/key1", "sb-v1") + ; ("data/service/service-b/key2", "sb-v2") + ] + , [ + ("service-a/key1", "sa-v1") + ; ("service-a/key2", "sa-v2") + ; ("service-b/key1", "sb-v1") + ; ("service-b/key2", "sb-v2") + ] + ) ] end) @@ -473,4 +584,5 @@ let tests = [ ("networks", Networks.tests) ; ("get_initial_guest_metrics", Initial_guest_metrics.tests) + ; ("get_guest_services", Services.tests) ] diff --git a/ocaml/tests/test_helpers.ml b/ocaml/tests/test_helpers.ml index b856bb363e3..aecd8d4b15a 100644 --- a/ocaml/tests/test_helpers.ml +++ b/ocaml/tests/test_helpers.ml @@ -466,6 +466,138 @@ module RunInParallel = Generic.MakeStateless (struct ] end) +module Version = struct + let test_compare_int_list () = + let test_cases = + [ + ("Equal Lists", [1; 2; 3], [1; 2; 3], 0) + ; ("Empty Lists", [], [], 0) + ; ("'a' is smaller (first element)", [1; 10; 100], [2; 0; 0], -1) + ; ("'a' is smaller (later element)", [1; 2; 3], [1; 2; 4], -1) + ; ("'a' is greater (first element)", [5; 1; 1], [2; 10; 10], 1) + ; ("'a' is greater (later element)", [1; 3; 3], [1; 2; 4], 1) + ; ("Lists with negative numbers", [0; -5; 10], [0; -2; -10], -1) + ; ("Single element lists (equal)", [42], [42], 0) + ; ("Single element lists (unequal)", [42], [43], -1) + ; ("Different number of element in lists", [25; 27], [25; 27; 1], -1) + ] + in + let test_compare (description, list1, list2, expected) = + let actual = Helpers.compare_int_lists list1 list2 in + let description = Printf.sprintf "compate_int_lists: %s" description in + Alcotest.(check int) description expected actual + in + List.iter test_compare test_cases + + let test_version_numbers_of_string () = + let test_cases = + [ + ( "Standard major.minor.patch version, e.g. xapi build version stored \ + in the database" + , "25.30.0" + , [25; 30; 0] + ) + ; ( "Dev build version, e.g. xapi build version stored in the database" + , "25.30.0.6.gb239bd75a" + , [25; 30; 0; 6] + ) + ; ( "Version with a patch identifier e.g. xen versions stored in the \ + database" + , "25.15.0-13" + , [25; 15; 0; 13] + ) + ; ("Default version", "0.0.0", [0; 0; 0]) + ] + in + let test_version_numbers (description, version_string, expected) = + let actual = Helpers.version_numbers_of_string version_string in + let description = + Printf.sprintf "version_numbers_of_string: %s" description + in + Alcotest.(check @@ list int) description expected actual + in + List.iter test_version_numbers test_cases + + let test_compare_versions () = + let sw_vers_a = + Xapi_globs.[(_platform_version, "2.4.0"); (_xen_version, "4.14.0-13")] + in + let sw_vers_b = Xapi_globs.[(_xen_version, "4.13.0-13")] in + let test_cases = + Xapi_globs. + [ + ( "Software versions 'b' are missing platform version" + , _platform_version + , sw_vers_a + , sw_vers_b + , 1 + ) + ; ( "Software versions 'a' are missing platform version" + , _platform_version + , sw_vers_b + , sw_vers_a + , -1 + ) + ; ( "xen version exists in both (`a` is greater)" + , _xen_version + , sw_vers_a + , sw_vers_b + , 1 + ) + ; ( "xapi build version is missing from both (equal)" + , _xapi_build_version + , sw_vers_a + , sw_vers_b + , 0 + ) + ] + in + let test_compare (description, key, value_a, value_b, expected) = + let actual = Helpers.compare_versions ~version_key:key value_a value_b in + let description = Printf.sprintf "compare_versions: %s" description in + Alcotest.(check int) description expected actual + in + List.iter test_compare test_cases + + let test_compare_all_versions () = + let current = + Xapi_globs.[(_platform_version, "8.1.0"); (_xen_version, "4.13.0-15")] + in + let newer = + Xapi_globs.[(_platform_version, "8.2.0"); (_xen_version, "4.13.0-15")] + in + let mixed = + Xapi_globs.[(_platform_version, "8.2.0"); (_xen_version, "4.12.0-15")] + in + let test_cases = + [ + ("Newer is greater or equal than Current", newer, current, true) + ; ("Current is greater or equal than Current", current, current, true) + ; ("Current is not greater or equal than Newer", current, newer, false) + ; ("Mixed is not greater or equal then Current", mixed, current, false) + ; ("Current is not greater or equal than Mixed", current, mixed, false) + ] + in + let test_compare (description, vers_a, vers_b, expected) = + let actual = + Helpers.compare_all_versions ~is_greater_or_equal:vers_a ~than:vers_b + in + let description = Printf.sprintf "compare_all_versions: %s" description in + Alcotest.(check bool) description expected actual + in + List.iter test_compare test_cases + + let test = + [ + ("Compare int list", `Quick, test_compare_int_list) + ; ("Version numbers from string", `Quick, test_version_numbers_of_string) + ; ("Compare versions", `Quick, test_compare_versions) + ; ("Compare all versions", `Quick, test_compare_all_versions) + ] + + let tests = [("Version compare tests", test)] +end + let tests = make_suite "helpers_" [ @@ -476,3 +608,4 @@ let tests = ; ("assert_is_valid_cidr", CIDRCheckers.tests) ; ("run_in_parallel", RunInParallel.tests) ] + @ Version.tests diff --git a/ocaml/tests/test_host.ml b/ocaml/tests/test_host.ml index edca58ac032..1f814b5adf7 100644 --- a/ocaml/tests/test_host.ml +++ b/ocaml/tests/test_host.ml @@ -24,6 +24,8 @@ let add_host __context name = ~license_params:[] ~edition:"" ~license_server:[] ~local_cache_sr:Ref.null ~chipset_info:[] ~ssl_legacy:false ~last_software_update:Clock.Date.epoch ~last_update_hash:"" + ~ssh_enabled:true ~ssh_enabled_timeout:0L ~ssh_expiry:Clock.Date.epoch + ~console_idle_timeout:0L ~ssh_auto_mode:false ) (* Creates an unlicensed pool with the maximum number of hosts *) diff --git a/ocaml/tests/test_host_driver_helpers.ml b/ocaml/tests/test_host_driver_helpers.ml deleted file mode 100644 index bb1a49050b1..00000000000 --- a/ocaml/tests/test_host_driver_helpers.ml +++ /dev/null @@ -1,89 +0,0 @@ -(* - Copyright (c) Cloud Software Group, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published - by the Free Software Foundation; version 2.1 only. with the special - exception on linking described in file LICENSE. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - *) - -open Xapi_host_driver_helpers - -let note = - Alcotest.testable - (Fmt.of_to_string (fun n -> - Printf.sprintf "{typ=%d; name=%s; desc=%s}" (Int32.to_int n.typ) n.name - n.desc - ) - ) - ( = ) - -let versions = - [ - (".note.XenServer", Some "v2.1.3+0.1fix") - ; (".note.XenServerTwo", Some "2.0.0-rc.2") - ; (".note.Linux", None) - ; (".note.gnu.build-id", None) - ] - -let get_version_test = - List.map - (fun (filename, expected) -> - let test_version () = - let parsed_ver = Result.to_option (get_version filename) in - Printf.printf "%s\n" filename ; - Alcotest.(check (option string)) - "ELF notes should be parsed properly" expected parsed_ver - in - ( Printf.sprintf {|Validation of ELF note parsing: "%s"|} filename - , `Quick - , test_version - ) - ) - versions - -let notes = - [ - (".note.XenServer", [{typ= 1l; name= "XenServer"; desc= "v2.1.3+0.1fix"}]) - ; ( ".note.XenServerTwo" - , [ - {typ= 2l; name= "XenServer"; desc= "Built on December 25th"} - ; {typ= 1l; name= "XenServer"; desc= "2.0.0-rc.2"} - ] - ) - ; (".note.Linux", [{typ= 599l; name= "Linux"; desc= "4.19.0+1"}]) - ; ( ".note.gnu.build-id" - , [{typ= 1l; name= "gnu.build-id"; desc= "\x00\x00\x00"}] - ) - ] - -let note_parsing_test = - List.map - (fun (filename, expected) -> - let test_note () = - let parsed = - match get_notes filename with Ok res -> res | Error e -> failwith e - in - Printf.printf "%s\n" filename ; - Alcotest.(check (list note)) - "ELF notes should be parsed properly" expected parsed - in - ( Printf.sprintf {|Validation of ELF note parsing: "%s"|} filename - , `Quick - , test_note - ) - ) - notes - -let () = - Suite_init.harness_init () ; - Alcotest.run "Test Host Driver Helpers suite" - [ - ("Test_host_driver_helpers.get_note", note_parsing_test) - ; ("Test_host_driver_helpers.get_version", get_version_test) - ] diff --git a/ocaml/tests/test_observer.ml b/ocaml/tests/test_observer.ml index 2e2f8e6aa29..07d746e81c0 100644 --- a/ocaml/tests/test_observer.ml +++ b/ocaml/tests/test_observer.ml @@ -305,6 +305,7 @@ let verify_json_fields_and_values ~json = ; ("xs.host.uuid", `String _) ; ("xs.host.name", `String _) ; ("service.name", `String _) + ; ("span.depth", `String _) ] ) ; ("annotations", `List _) diff --git a/ocaml/tests/test_repository_helpers.ml b/ocaml/tests/test_repository_helpers.ml index c05e7c8a63e..d6c8421afdb 100644 --- a/ocaml/tests/test_repository_helpers.ml +++ b/ocaml/tests/test_repository_helpers.ml @@ -253,6 +253,46 @@ module AssertUrlIsValid = Generic.MakeStateless (struct ] end) +module AssertUrlIsNotBlocked = Generic.MakeStateless (struct + module Io = struct + type input_t = string * string list + + type output_t = (unit, exn) result + + let string_of_input_t = Fmt.(str "%a" Dump.(pair string (list string))) + + let string_of_output_t = + Fmt.(str "%a" Dump.(result ~ok:(any "()") ~error:exn)) + end + + let transform (url, url_blocklist) = + Xapi_globs.repository_url_blocklist := url_blocklist ; + try Ok (assert_url_is_not_blocked ~url) with e -> Error e + + let tests = + `QuickAndAutoDocumented + [ + (* no blocklist *) + (("https://test.com", []), Ok ()) + ; (* Not match in blocklist *) + ( ("https://test.com", ["http://blocked.com"; "http://also/blocked.com"]) + , Ok () + ) + ; (* match in blocklist *) + ( ( "http://blocked.com" + , ["http://blocked.com"; "http://also/blocked.com"] + ) + , Error + Api_errors.(Server_error (blocked_repo_url, ["http://blocked.com"])) + ) + ; (* match keyword in blocklist *) + ( ("http://blocked.com", ["private"; "blocked"]) + , Error + Api_errors.(Server_error (blocked_repo_url, ["http://blocked.com"])) + ) + ] +end) + module WriteYumConfig = Generic.MakeStateless (struct module Io = struct (* ( (source_url, binary_url), (need_gpg_check, gpgkey_path) ) *) @@ -4780,6 +4820,7 @@ let tests = [ ("update_of_json", UpdateOfJsonTest.tests) ; ("assert_url_is_valid", AssertUrlIsValid.tests) + ; ("assert_url_is_not_blocked", AssertUrlIsNotBlocked.tests) ; ("write_yum_config", WriteYumConfig.tests) ; ("eval_guidance_for_one_update", EvalGuidanceForOneUpdate.tests) ; ("get_update_in_json", GetUpdateInJson.tests) diff --git a/ocaml/tests/test_sm_features.ml b/ocaml/tests/test_sm_features.ml index 43bce4c3807..6b7ef99502d 100644 --- a/ocaml/tests/test_sm_features.ml +++ b/ocaml/tests/test_sm_features.ml @@ -49,6 +49,7 @@ let test_sequences = ; "SR_CACHING" ; "SR_PROBE" ; "SR_UPDATE" + ; "SR_CACHING" ; "VDI_ATTACH" ; "VDI_CLONE" ; "VDI_CONFIG_CBT" @@ -63,7 +64,8 @@ let test_sequences = ] ; smapiv1_features= [ - (Sr_probe, 1L) + (Sr_caching, 1L) + ; (Sr_probe, 1L) ; (Sr_update, 1L) ; (Vdi_attach, 1L) ; (Vdi_clone, 1L) @@ -79,7 +81,8 @@ let test_sequences = ] ; smapiv2_features= [ - "SR_PROBE/1" + "SR_CACHING/1" + ; "SR_PROBE/1" ; "SR_UPDATE/1" ; "VDI_ATTACH/1" ; "VDI_CLONE/1" @@ -97,7 +100,8 @@ let test_sequences = { capabilities= [ - "SR_PROBE" + "SR_CACHING" + ; "SR_PROBE" ; "SR_UPDATE" ; "VDI_ATTACH" ; "VDI_CLONE" @@ -113,7 +117,8 @@ let test_sequences = ] ; features= [ - ("SR_PROBE", 1L) + ("SR_CACHING", 1L) + ; ("SR_PROBE", 1L) ; ("SR_UPDATE", 1L) ; ("VDI_ATTACH", 1L) ; ("VDI_CLONE", 1L) @@ -244,6 +249,7 @@ module CreateSMObject = Generic.MakeStateful (struct ; features ; configuration= [] ; required_cluster_stack= [] + ; smapi_version= SMAPIv2 } let extract_output __context _ = diff --git a/ocaml/tests/test_storage_migrate_state.ml b/ocaml/tests/test_storage_migrate_state.ml index 42087887995..ea059ae07e2 100644 --- a/ocaml/tests/test_storage_migrate_state.ml +++ b/ocaml/tests/test_storage_migrate_state.ml @@ -17,11 +17,11 @@ open Test_highlevel module StorageMigrateState = struct type state_t = unit - let create_default_state () = Storage_migrate.State.clear () + let create_default_state () = Storage_migrate_helper.State.clear () end let sample_send_state = - Storage_migrate.State.Send_state. + Storage_migrate_helper.State.Send_state. { url= "url" ; dest_sr= Storage_interface.Sr.of_string "dest_sr" @@ -41,11 +41,14 @@ let sample_send_state = ) ; failed= false ; watchdog= None + ; live_vm= Storage_interface.Vm.of_string "0" + ; mirror_key= None + ; vdi= Storage_interface.Vdi.of_string "" } let sample_receive_state = let open Storage_interface in - Storage_migrate.State.Receive_state. + Storage_migrate_helper.State.Receive_state. { sr= Sr.of_string "my_sr" ; dummy_vdi= Vdi.of_string "dummy_vdi" @@ -54,10 +57,12 @@ let sample_receive_state = ; parent_vdi= Vdi.of_string "parent_vdi" ; remote_vdi= Vdi.of_string "remote_vdi" ; mirror_vm= Vm.of_string "mirror_vm" + ; url= "" + ; verify_dest= false } let sample_copy_state = - Storage_migrate.State.Copy_state. + Storage_migrate_helper.State.Copy_state. { base_dp= "base_dp" ; leaf_dp= "leaf_dp" @@ -70,7 +75,7 @@ let sample_copy_state = module MapOf = Generic.MakeStateful (struct module Io = struct - open Storage_migrate.State + open Storage_migrate_helper.State type input_t = (string * osend operation) option @@ -88,7 +93,7 @@ module MapOf = Generic.MakeStateful (struct end module State = StorageMigrateState - open Storage_migrate.State + open Storage_migrate_helper.State let load_input () (send, recv, copy) = Option.iter (fun (id, send) -> add id send) send ; @@ -116,7 +121,7 @@ module MapOf = Generic.MakeStateful (struct end) let test_clear () = - let open Storage_migrate.State in + let open Storage_migrate_helper.State in clear () ; add "foo" (Send_op sample_send_state) ; add "bar" (Recv_op sample_receive_state) ; @@ -130,5 +135,5 @@ let test_clear () = let test = [("clear", `Quick, test_clear)] let tests = - Storage_migrate.State.persist_root := Test_common.working_area ; + Storage_migrate_helper.State.persist_root := Test_common.working_area ; [("storage_migrate_state_map_of", MapOf.tests)] diff --git a/ocaml/tests/test_vdi_cbt.ml b/ocaml/tests/test_vdi_cbt.ml index 3137e0485cb..54ae411ac97 100644 --- a/ocaml/tests/test_vdi_cbt.ml +++ b/ocaml/tests/test_vdi_cbt.ml @@ -30,10 +30,11 @@ let register_smapiv2_server (module S : Storage_interface.Server_impl) sr_ref = ; features= [] ; configuration= [] ; required_cluster_stack= [] + ; smapi_version= SMAPIv2 } in - Storage_mux.register sr_ref rpc "" dummy_query_result + Storage_mux_reg.register sr_ref rpc "" dummy_query_result let make_smapiv2_storage_server ?vdi_enable_cbt ?vdi_disable_cbt ?vdi_list_changed_blocks ?vdi_data_destroy ?vdi_snapshot ?vdi_clone:_ () = diff --git a/ocaml/tests/test_xapi_xenops.ml b/ocaml/tests/test_xapi_xenops.ml index 551c7d0d90f..e1f1bf048e2 100644 --- a/ocaml/tests/test_xapi_xenops.ml +++ b/ocaml/tests/test_xapi_xenops.ml @@ -62,8 +62,8 @@ let test_xapi_restart_inner () = in let flags = [ - (Xapi_globs.cpu_info_vendor_key, "AuthenticAMD") - ; (Xapi_globs.cpu_info_features_key, "deadbeef-deadbeef") + (Constants.cpu_info_vendor_key, "AuthenticAMD") + ; (Constants.cpu_info_features_key, "deadbeef-deadbeef") ] in let add_flags vm = diff --git a/ocaml/tests/test_xenopsd_metadata.ml b/ocaml/tests/test_xenopsd_metadata.ml index c052de228fa..14362e73b68 100644 --- a/ocaml/tests/test_xenopsd_metadata.ml +++ b/ocaml/tests/test_xenopsd_metadata.ml @@ -38,8 +38,8 @@ let load_vm_config __context conf = in let flags = [ - (Xapi_globs.cpu_info_vendor_key, "AuthenticAMD") - ; (Xapi_globs.cpu_info_features_key, "deadbeef-deadbeef") + (Constants.cpu_info_vendor_key, "AuthenticAMD") + ; (Constants.cpu_info_features_key, "deadbeef-deadbeef") ] in Db.VM.set_last_boot_CPU_flags ~__context ~self ~value:flags ; diff --git a/ocaml/util/dune b/ocaml/util/dune index 488cf4f444f..6bd1ec5719f 100644 --- a/ocaml/util/dune +++ b/ocaml/util/dune @@ -16,10 +16,3 @@ ) (wrapped false) ) - -(library - (name xapi_host_driver_helpers) - (modules xapi_host_driver_helpers) - (libraries yojson angstrom xapi-stdext-unix) - (wrapped false) -) diff --git a/ocaml/util/xapi_host_driver_helpers.ml b/ocaml/util/xapi_host_driver_helpers.ml deleted file mode 100644 index 4910ed8d11f..00000000000 --- a/ocaml/util/xapi_host_driver_helpers.ml +++ /dev/null @@ -1,131 +0,0 @@ -(* - Copyright (c) Cloud Software Group, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published - by the Free Software Foundation; version 2.1 only. with the special - exception on linking described in file LICENSE. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - *) - -module J = Yojson -open Angstrom - -let int n = Int32.to_int n - -let ( // ) = Filename.concat - -(** Read a (small) file into a string *) -let read path = Xapi_stdext_unix.Unixext.string_of_file path - -type note = {typ: int32; name: string; desc: string} - -module JSON = struct - let note l = - let l = - List.map - (fun d -> - `Assoc - [ - ("type", `Int (int d.typ)) - ; ("name", `String d.name) - ; ("desc", `String d.desc) - ] - ) - l - in - `List l - - let emit json = J.pretty_to_channel stdout json -end - -(** return the smallest k >= n such that k is divisible by 4 *) -let align4 n = - let ( & ) = Int.logand in - n + (-n & 3) - -(** advance the cursor to position n *) -let advance_to n = - let* pos in - advance (max 0 (n - pos)) - -(** align the cursor to a multiple of 4 *) -let align = - let* pos in - advance_to (align4 pos) - -(** parse an ELF note entry; it assumes that name and desc are null - terminated strings. This should be always true for name but desc - depends on the entry. We don't capture the terminating zero for - strings. *) -let note = - let* name_length = LE.any_int32 in - let* desc_length = LE.any_int32 in - let* typ = LE.any_int32 in - let* name = take (int name_length - 1) in - (* skip over terminating null and re-align cursor *) - let* _ = char '\000' in - let* () = align in - let* desc = take (int desc_length - 1) in - (* skip over terminating null and re-align cursor *) - let* _ = char '\000' in - let* () = align in - return {typ; name; desc} - -(** parser for a sequence of note entries *) -let notes = many note - -(** parse a sequence of note entries from a string *) -let parse str = - let consume = Consume.Prefix in - parse_string ~consume notes str - -let get_version path = - let version = - read path - |> parse - |> Result.map - @@ List.filter_map (fun note -> - match (note.typ, note.name) with - | 1l, "XenServer" -> - Some note.desc - | _ -> - None - ) - in - match version with - | Ok (v :: _) -> - Ok v - | _ -> - Error - (Format.sprintf - "Failed to parse %s, didn't find a XenServer driver version notes \ - section" - path - ) - -let get_notes path = - let version = read path |> parse in - match version with - | Ok (_ :: _) as v -> - v - | _ -> - Error - (Format.sprintf "Failed to parse %s, didn't find a notes section" path) - -let dump_notes prefix = - let notes_dir = prefix // "notes" in - try - let lst = - Sys.readdir notes_dir - |> Array.to_list - |> List.map (fun n -> read (notes_dir // n)) - |> List.filter_map (fun note_str -> Result.to_option (parse note_str)) - |> List.map (fun note -> (prefix, JSON.note note)) - in - JSON.emit (`Assoc lst) - with _ -> () diff --git a/ocaml/util/xapi_host_driver_helpers.mli b/ocaml/util/xapi_host_driver_helpers.mli deleted file mode 100644 index 6528d6bec94..00000000000 --- a/ocaml/util/xapi_host_driver_helpers.mli +++ /dev/null @@ -1,28 +0,0 @@ -(* - Copyright (c) Cloud Software Group, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published - by the Free Software Foundation; version 2.1 only. with the special - exception on linking described in file LICENSE. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. - *) - -type note = {typ: int32; name: string; desc: string} - -(* Parse an ELF notes section, returning the specially-encoded driver version. - - The kernel does not reveal the location from where it loaded an active - driver. Hence the name is not sufficient to observe the currently active - version. For this, XS uses ELF notes, with the kernel presenting a particular - note section in `/sys/module//notes/.note.XenServer` *) -val get_version : string -> (string, string) result - -val get_notes : string -> (note list, string) result - -(* Dumps JSON-formatted parsed ELF notes of a driver *) -val dump_notes : string -> unit diff --git a/ocaml/xapi-aux/dune b/ocaml/xapi-aux/dune index d334769d655..60e27cf5b3b 100644 --- a/ocaml/xapi-aux/dune +++ b/ocaml/xapi-aux/dune @@ -1,6 +1,7 @@ (library (name xapi_aux) (modes best) + (modules :standard \ version_test) (libraries astring clock @@ -21,3 +22,11 @@ (wrapped false) ) +; to run this test: dune exec ./version_test.exe +(tests + (names version_test) + (modes (best exe)) + (modules version_test) + (package xapi) + (libraries + xapi_aux alcotest)) diff --git a/ocaml/xapi-aux/kerberos_encryption_types.ml b/ocaml/xapi-aux/kerberos_encryption_types.ml index fd2f67399f7..d88c0905b7e 100644 --- a/ocaml/xapi-aux/kerberos_encryption_types.ml +++ b/ocaml/xapi-aux/kerberos_encryption_types.ml @@ -15,11 +15,29 @@ (* Kerberos support several different encrytion types * winbind support it as strong, legacy and all * details, https://www.samba.org/samba/docs/current/man-html/smb.conf.5.html - * *) + *) module Winbind = struct type t = Strong | Legacy | All + (* + * [X] 0x00000001 DES-CBC-CRC + * [X] 0x00000002 DES-CBC-MD5 + * [X] 0x00000004 RC4-HMAC + * [X] 0x00000008 AES128-CTS-HMAC-SHA1-96 + * [X] 0x00000010 AES256-CTS-HMAC-SHA1-96 + *) + + let des_cbc_crc = 0x1 + + let des_cbc_md5 = 0x2 + + let rc4_hmac = 0x4 + + let aes128_cts_hmac_sha1_96 = 0x8 + + let aes256_cts_hmac_sha1_96 = 0x10 + let to_string = function | Strong -> "strong" @@ -28,6 +46,20 @@ module Winbind = struct | All -> "all" + let ( +++ ) x y = x lor y + + let to_encoding = function + | Strong -> + aes128_cts_hmac_sha1_96 +++ aes256_cts_hmac_sha1_96 + | Legacy -> + rc4_hmac + | All -> + des_cbc_crc + +++ des_cbc_md5 + +++ rc4_hmac + +++ aes128_cts_hmac_sha1_96 + +++ aes256_cts_hmac_sha1_96 + let of_string = function | "all" -> Some All diff --git a/ocaml/xapi-aux/kerberos_encryption_types.mli b/ocaml/xapi-aux/kerberos_encryption_types.mli index 833b6d7bed2..5ef9f833a0e 100644 --- a/ocaml/xapi-aux/kerberos_encryption_types.mli +++ b/ocaml/xapi-aux/kerberos_encryption_types.mli @@ -17,5 +17,7 @@ module Winbind : sig val to_string : t -> string + val to_encoding : t -> int + val of_string : string -> t option end diff --git a/ocaml/xapi-aux/networking_info.ml b/ocaml/xapi-aux/networking_info.ml index 2717338e5da..928ad45322b 100644 --- a/ocaml/xapi-aux/networking_info.ml +++ b/ocaml/xapi-aux/networking_info.ml @@ -17,7 +17,22 @@ module L = Debug.Make (struct let name = __MODULE__ end) let get_hostname () = try Unix.gethostname () with _ -> "" -exception Unexpected_address_type of string +type management_ip_error = + | Interface_missing + | Unexpected_address_type of string + | IP_missing + | Other of exn + +let management_ip_error_to_string = function + | Interface_missing -> + "Management interface is missing" + | IP_missing -> + "Management IP is missing" + | Unexpected_address_type s -> + Printf.sprintf + "Unexpected address type. Expected 'ipv4' or 'ipv6', got %s" s + | Other e -> + Printexc.to_string e (* Try to get all FQDNs, avoid localhost *) let dns_names () = @@ -40,38 +55,68 @@ let dns_names () = ) |> Astring.String.uniquify -let ipaddr_to_cstruct = function +let ipaddr_to_octets = function | Ipaddr.V4 addr -> - Cstruct.of_string (Ipaddr.V4.to_octets addr) + Ipaddr.V4.to_octets addr | Ipaddr.V6 addr -> - Cstruct.of_string (Ipaddr.V6.to_octets addr) - -let list_head lst = List.nth_opt lst 0 + Ipaddr.V6.to_octets addr -let get_management_ip_addr ~dbg = +let get_management_ip_addrs ~dbg = let iface = Inventory.lookup Inventory._management_interface in try if iface = "" || (not @@ Net.Interface.exists dbg iface) then - None + Error Interface_missing else - let addrs = + let ( let* ) = Result.bind in + let* addrs = + let ipv4 = Net.Interface.get_ipv4_addr dbg iface in + let ipv6 = Net.Interface.get_ipv6_addr dbg iface in match String.lowercase_ascii (Inventory.lookup Inventory._management_address_type ~default:"ipv4") with | "ipv4" -> - Net.Interface.get_ipv4_addr dbg iface + Ok (ipv4, ipv6) | "ipv6" -> - Net.Interface.get_ipv6_addr dbg iface + Ok (ipv6, ipv4) | s -> - let msg = Printf.sprintf "Expected 'ipv4' or 'ipv6', got %s" s in - L.error "%s: %s" __FUNCTION__ msg ; - raise (Unexpected_address_type msg) + Error (Unexpected_address_type s) in - addrs - |> List.map (fun (addr, _) -> Ipaddr_unix.of_inet_addr addr) (* Filter out link-local addresses *) - |> List.filter (fun addr -> Ipaddr.scope addr <> Ipaddr.Link) - |> List.map (fun ip -> (Ipaddr.to_string ip, ipaddr_to_cstruct ip)) - |> list_head - with _ -> None + let no_local (addr, _) = + let addr = Ipaddr_unix.of_inet_addr addr in + if Ipaddr.scope addr <> Ipaddr.Link then + Some addr + else + None + in + Ok + ( List.filter_map no_local (fst addrs) + , List.filter_map no_local (snd addrs) + ) + with e -> Error (Other e) + +let get_management_ip_addr ~dbg = + match get_management_ip_addrs ~dbg with + | Ok (preferred, _) -> + List.nth_opt preferred 0 |> Option.map Ipaddr.to_string + | Error _ -> + None + +let get_host_certificate_subjects ~dbg = + let ( let* ) = Result.bind in + let* ips, preferred_ip = + match get_management_ip_addrs ~dbg with + | Error e -> + Error e + | Ok (preferred, others) -> + let ips = List.(rev_append (rev preferred) others) in + Option.fold ~none:(Error IP_missing) + ~some:(fun ip -> Ok (List.map ipaddr_to_octets ips, ip)) + (List.nth_opt ips 0) + in + let dns_names = dns_names () in + let name = + match dns_names with [] -> Ipaddr.to_string preferred_ip | dns :: _ -> dns + in + Ok (name, dns_names, ips) diff --git a/ocaml/xapi-aux/networking_info.mli b/ocaml/xapi-aux/networking_info.mli index 8774e7aba3d..4c8418443ab 100644 --- a/ocaml/xapi-aux/networking_info.mli +++ b/ocaml/xapi-aux/networking_info.mli @@ -14,15 +14,22 @@ val get_hostname : unit -> string (** [get_hostname ()] returns the hostname as returned by Unix.gethostname. If there is an error "" is returned. *) -exception Unexpected_address_type of string +type management_ip_error = + | Interface_missing + | Unexpected_address_type of string + | IP_missing + | Other of exn -val dns_names : unit -> string list -(** [dns_names ()] returns a list of the hostnames that the host may have. - Ignores empty names as well as "localhost" *) +val management_ip_error_to_string : management_ip_error -> string +(** [management_ip_error err] returns a string representation of [err], useful + only for logging. *) -val get_management_ip_addr : dbg:string -> (string * Cstruct.t) option -(** [get_management_ip_addr ~dbg] returns the IP of the management network. - If the system does not have management address None is return. - [Unexpected_address_type] is raised if there is an unexpected address is - stored. The address is return in two formats: human-readable string and - its bytes representation. *) +val get_management_ip_addr : dbg:string -> string option +(** [get_management_ip_addr ~dbg] returns the preferred IP of the management + network, or None. The address is returned in a human-readable string *) + +val get_host_certificate_subjects : + dbg:string + -> (string * string list * string list, management_ip_error) Result.t +(** [get_host_certificate_subjects ~dbg] returns the main, dns names and ip + addresses that identify the host in secure connections. *) diff --git a/ocaml/xapi-aux/version.ml b/ocaml/xapi-aux/version.ml new file mode 100644 index 00000000000..18e7cedc8e1 --- /dev/null +++ b/ocaml/xapi-aux/version.ml @@ -0,0 +1,85 @@ +(* + Copyright (c) Cloud Software Group, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; version 2.1 only. with the special + exception on linking described in file LICENSE. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + *) + +(* Simple abstraction for version information that enforces a simple + format and predicatable semantics *) + +exception Format of string + +(** in decreasing oder of sginificance *) +type t = int list + +let of_string str = + let int str = Scanf.sscanf str "%u%!" Fun.id in + try String.split_on_char '.' str |> List.map int with _ -> raise (Format str) + +let to_string t = + let str int = Printf.sprintf "%d" int in + t |> List.map str |> String.concat "." + +(** Total order over versions; 1.2.3 is equal to 1.2.3.0 *) +let rec compare v1 v2 = + match (v1, v2) with + | [], [] -> + 0 + | 0 :: xs, [] -> + compare xs [] + | _, [] -> + 1 + | [], 0 :: ys -> + compare [] ys + | [], _ -> + -1 + | x :: xs, y :: ys when x = y -> + compare xs ys + | x :: _, y :: _ when x < y -> + -1 + | _ -> + 1 + +let ne x y = compare x y <> 0 + +let eq x y = compare x y = 0 + +let le x y = compare x y <= 0 + +let ge x y = compare x y >= 0 + +let gt x y = compare x y > 0 + +let lt x y = compare x y < 0 + +let is_valid str = + try + ignore (of_string str) ; + true + with Format _ -> false + +module String = struct + let wrap f v1 v2 = f (of_string v1) (of_string v2) + + let compare = wrap compare + + let ne = wrap ne + + let eq = wrap eq + + let le = wrap le + + let ge = wrap ge + + let gt = wrap gt + + let lt = wrap lt +end diff --git a/ocaml/xapi-aux/version.mli b/ocaml/xapi-aux/version.mli new file mode 100644 index 00000000000..b0aed1d6369 --- /dev/null +++ b/ocaml/xapi-aux/version.mli @@ -0,0 +1,76 @@ +(* + Copyright (c) Cloud Software Group, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; version 2.1 only. with the special + exception on linking described in file LICENSE. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + *) + +(** a version, derived from a string representation - see below *) +type t + +(** A version string violates the supported syntax *) +exception Format of string + +val of_string : string -> t +(** Parse a version; may raise [Format]. A version is a sequence of + unsigned integers separated by a dot; for axample "1.2.3" is a legal + version. Must have at least one component. Examples: + - 3 + - 3.10 + - 3.10.4 + - 3.10.4.0.0 + - 3.10.4.0.1 + - 0 + - 0.2 + *) + +val to_string : t -> string +(** represent a version as a string *) + +val compare : t -> t -> int +(** Total order over versions; yields one of -1, 0, 1 as by convention. + - 1.2.3 = 1.2.3.0 + - 1.10.2 > 1.9.1 + - 0.1.0.0 = 0.1 + *) + +(* version equality relations *) +val eq : t -> t -> bool + +val ge : t -> t -> bool + +val gt : t -> t -> bool + +val le : t -> t -> bool + +val lt : t -> t -> bool + +val ne : t -> t -> bool + +(* Validate the format of a version string *) +val is_valid : string -> bool + +(* Operations over version strings for convenience. Each function may + raise [Format] *) +module String : sig + val compare : string -> string -> int + + val ne : string -> string -> bool + + val eq : string -> string -> bool + + val le : string -> string -> bool + + val ge : string -> string -> bool + + val gt : string -> string -> bool + + val lt : string -> string -> bool +end diff --git a/ocaml/xapi-aux/version_test.ml b/ocaml/xapi-aux/version_test.ml new file mode 100644 index 00000000000..4e4351b5392 --- /dev/null +++ b/ocaml/xapi-aux/version_test.ml @@ -0,0 +1,25 @@ +open Alcotest + +let format () = + check bool __LOC__ true (Version.is_valid "3") ; + check bool __LOC__ true (Version.is_valid "0") ; + check bool __LOC__ true (Version.is_valid "3.1") ; + check bool __LOC__ true (Version.is_valid "3.1.4") ; + check bool __LOC__ true (Version.is_valid "3.14") ; + check bool __LOC__ false (Version.is_valid "") ; + check bool __LOC__ false (Version.is_valid "3a") ; + check bool __LOC__ false (Version.is_valid "3.1.4.") ; + check bool __LOC__ false (Version.is_valid "3.1.4.a") ; + check bool __LOC__ false (Version.is_valid "3.1.4a") ; + check bool __LOC__ false (Version.is_valid "3.1:4") ; + check bool __LOC__ false (Version.is_valid "-3.1.4") + +let order () = + check bool __LOC__ true (Version.String.eq "3" "3.0.0") ; + check bool __LOC__ true (Version.String.le "3" "3.0.1") ; + check bool __LOC__ true (Version.String.le "3.1" "3.10") ; + check bool __LOC__ true (Version.String.eq "0" "0.0.0") + +let tests = [test_case "format" `Quick format; test_case "order" `Quick order] + +let () = run __MODULE__ [(__MODULE__, tests)] diff --git a/ocaml/xapi-aux/version_test.mli b/ocaml/xapi-aux/version_test.mli new file mode 100644 index 00000000000..e790aeb70f0 --- /dev/null +++ b/ocaml/xapi-aux/version_test.mli @@ -0,0 +1 @@ +(* empty *) diff --git a/ocaml/xapi-cli-server/cli_frontend.ml b/ocaml/xapi-cli-server/cli_frontend.ml index 538905686b1..8f8117fde0d 100644 --- a/ocaml/xapi-cli-server/cli_frontend.ml +++ b/ocaml/xapi-cli-server/cli_frontend.ml @@ -101,6 +101,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; "sr-uuid" ; "network-uuid" ; "pool-uuid" + ; "public" ] ; help= "Create a binary blob to be associated with an API object" ; implementation= No_fd Cli_operations.blob_create @@ -127,14 +128,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) - ; (* "host-introduce", - { - reqd=["name"; "address"; "remote-port"; "remote-username"; "remote-password"]; - optn=["description"]; - help="Introduce a remote host"; - implementation=No_fd Cli_operations.host_introduce - };*) - ( "pool-enable-binary-storage" + ; ( "pool-enable-binary-storage" , { reqd= [] ; optn= [] @@ -535,6 +529,18 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) + ; ( "pool-get-cpu-features" + , { + reqd= [] + ; optn= [] + ; help= + {|Prints a hexadecimal representation of the pool's physical-CPU + features for PV and HVM VMs. These are combinations of all the + hosts' policies and are used when starting new VMs in a pool.|} + ; implementation= No_fd Cli_operations.pool_get_cpu_features + ; flags= [] + } + ) ; ( "host-is-in-emergency-mode" , { reqd= [] @@ -573,8 +579,10 @@ let rec cmdtable_data : (string * cmd_spec) list = ; ( "host-disable" , { reqd= [] - ; optn= [] - ; help= "Disable the XE host." + ; optn= ["auto-enable"] + ; help= + "Disable the XE host. Setting auto-enable=false will keep the host \ + persistently disabled until manually re-enabled with Host.enable." ; implementation= No_fd Cli_operations.host_disable ; flags= [Host_selectors] } @@ -816,7 +824,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; ( "host-emergency-ha-disable" , { reqd= [] - ; optn= ["force"] + ; optn= ["force"; "soft"] ; help= "Disable HA on the local host. Only to be used to recover a pool \ with a broken HA setup." @@ -958,8 +966,9 @@ let rec cmdtable_data : (string * cmd_spec) list = ; optn= ["args:"] ; help= "Calls the function within the plugin on the given host with \ - optional arguments." - ; implementation= No_fd Cli_operations.host_call_plugin + optional arguments. The syntax args:key:file=/path/file.ext passes \ + the content of /path/file.ext under key to the plugin." + ; implementation= With_fd Cli_operations.host_call_plugin ; flags= [] } ) @@ -1017,8 +1026,10 @@ let rec cmdtable_data : (string * cmd_spec) list = reqd= [] ; optn= ["uuid"] ; help= - "Prints a hexadecimal representation of the host's physical-CPU \ - features." + {|Prints a hexadecimal representation of the host's physical-CPU + features for PV and HVM VMs. features_{hvm,pv} are "maximum" + featuresets the host will accept during migrations, and + features_{hvm,pv}_host will be used to start new VMs.|} ; implementation= No_fd Cli_operations.host_get_cpu_features ; flags= [] } @@ -1050,6 +1061,32 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [Host_selectors] } ) + ; ( "host-enable-ssh" + , { + reqd= [] + ; optn= [] + ; help= + "Enable SSH access on the host. It will start the service sshd only \ + if it is not running. It will also enable the service sshd only if \ + it is not enabled. A newly joined host in the pool or an ejected \ + host from the pool would keep the original status." + ; implementation= No_fd Cli_operations.host_enable_ssh + ; flags= [Host_selectors] + } + ) + ; ( "host-disable-ssh" + , { + reqd= [] + ; optn= [] + ; help= + "Disable SSH access on the host. It will stop the service sshd only \ + if it is running. It will also disable the service sshd only if it \ + is enabled. A newly joined host in the pool or an ejected host from \ + the pool would keep the original status." + ; implementation= No_fd Cli_operations.host_disable_ssh + ; flags= [Host_selectors] + } + ) ; ( "host-emergency-clear-mandatory-guidance" , { reqd= [] @@ -1749,6 +1786,8 @@ let rec cmdtable_data : (string * cmd_spec) list = ; "host-password" ; "type" ; "remote-config" + ; "dry-run" + ; "metadata" ; "url" ; "vdi:" ] @@ -1762,7 +1801,8 @@ let rec cmdtable_data : (string * cmd_spec) list = VDIs will be imported into the Pool's default SR unless an override \ is provided. If the force option is given then any disk data \ checksum failures will be ignored. If the parameter 'url' is \ - specified, xapi will attempt to import from that URL." + specified, xapi will attempt to import from that URL. Only metadata \ + will be imported if 'metadata' is true" ; implementation= With_fd Cli_operations.vm_import ; flags= [Standard] } @@ -1776,6 +1816,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; "compress" ; "metadata" ; "excluded-device-types" + ; "include-snapshots" ] ; help= "Export a VM to ." ; implementation= With_fd Cli_operations.vm_export @@ -1815,6 +1856,21 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) + ; ( "vm-call-host-plugin" + , { + reqd= ["vm-uuid"; "plugin"; "fn"] + ; optn= ["args:"] + ; help= + "Calls function fn within the plugin on the host where the VM is \ + running with arguments (args:key=value). To pass a \"value\" string \ + with special characters in it (e.g. new line), an alternative \ + syntax args:key:file=local_file can be used in place, where the \ + content of local_file will be retrieved and assigned to \"key\" as \ + a whole." + ; implementation= With_fd Cli_operations.vm_call_host_plugin + ; flags= [] + } + ) ; ( "snapshot-export-to-template" , { reqd= ["filename"; "snapshot-uuid"] @@ -2088,7 +2144,7 @@ let rec cmdtable_data : (string * cmd_spec) list = "Create a VBD. Appropriate values for the device field are listed in \ the parameter 'allowed-VBD-devices' on the VM. If no VDI is \ specified, an empty VBD will be created. The type parameter can be \ - 'Disk' or 'CD', and the mode is 'RO' or 'RW'." + 'Disk', 'CD' or 'Floppy', and the mode is 'RO' or 'RW'." ; implementation= No_fd Cli_operations.vbd_create ; flags= [] } @@ -2351,6 +2407,7 @@ let rec cmdtable_data : (string * cmd_spec) list = "name-description" ; "sharable" ; "read-only" + ; "managed" ; "other-config:" ; "xenstore-data:" ; "sm-config:" @@ -2709,6 +2766,15 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) + ; ( "vm-sysprep" + , { + reqd= ["filename"] + ; optn= ["timeout"] + ; help= "Pass and execute sysprep configuration file" + ; implementation= With_fd Cli_operations.vm_sysprep + ; flags= [Vm_selectors] + } + ) ; ( "diagnostic-vm-status" , { reqd= ["uuid"] @@ -2720,17 +2786,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [Standard] } ) - ; (* - "diagnostic-event-deltas", - { - reqd=["class"]; - optn=[]; - help="Print the changes that are happening to all objects of class specified."; - implementation=With_fd Cli_operations.diagnostic_event_deltas; - flags=[]; - }; -*) - ( "diagnostic-license-status" + ; ( "diagnostic-license-status" , { reqd= [] ; optn= [] @@ -2839,7 +2895,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; ( "host-evacuate" , { reqd= [] - ; optn= ["network-uuid"] + ; optn= ["network-uuid"; "batch-size"] ; help= "Migrate all VMs off a host." ; implementation= No_fd Cli_operations.host_evacuate ; flags= [Host_selectors] @@ -2950,35 +3006,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) - ; (* - "alert-create", - { - reqd=["message"]; - optn=["alert-level"]; - help="Create a new alert."; - implementation=No_fd Cli_operations.alert_create; - flags=[]; - }; - "alert-destroy", - { - reqd=["uuid"]; - optn=[]; - help="Destroy an Alert."; - implementation=No_fd Cli_operations.alert_destroy; - flags=[]; - }; -*) - (* - "host-fence", - { - reqd=["host-uuid"]; - optn=[]; - help="Fence a host"; - implementation=No_fd_local_session Cli_operations.host_fence; - flags=[]; - }; -*) - ( "pool-vlan-create" + ; ( "pool-vlan-create" , { reqd= ["pif-uuid"; "vlan"; "network-uuid"] ; optn= [] @@ -3107,6 +3135,28 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) + ; ( "pool-enable-ssh" + , { + reqd= [] + ; optn= [] + ; help= + "Enable SSH access on all hosts in the pool. It's a helper which \ + calls host.enable_ssh for all the hosts in the pool." + ; implementation= No_fd Cli_operations.pool_enable_ssh + ; flags= [] + } + ) + ; ( "pool-disable-ssh" + , { + reqd= [] + ; optn= [] + ; help= + "Disable SSH access on all hosts in the pool. It's a helper which \ + calls host.disable_ssh for all the hosts in the pool." + ; implementation= No_fd Cli_operations.pool_disable_ssh + ; flags= [] + } + ) ; ( "host-ha-xapi-healthcheck" , { reqd= [] @@ -3117,28 +3167,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [Hidden; Neverforward] } ) - ; (* - "host-ha-query", - { - reqd=[]; - optn=[]; - help="Query the HA configuration of the local host."; - implementation=No_fd_local_session Cli_operations.host_ha_query; - flags=[Neverforward]; - }; - -*) - (* - "subject-list", - { - reqd=[]; - optn=[]; - help="Returns a list of subject names that can access the pool"; - implementation=No_fd Cli_operations.subject_list; - flags=[] - }; -*) - ( "subject-add" + ; ( "subject-add" , { reqd= ["subject-name"] ; optn= [] @@ -3184,17 +3213,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; flags= [] } ) - ; (* RBAC 2.0 only - "role-create", - { - reqd=["id";"name"]; - optn=[]; - help="Add a role to the pool"; - implementation=No_fd Cli_operations.role_create; - flags=[] - }; - *) - ( "session-subject-identifier-list" + ; ( "session-subject-identifier-list" , { reqd= [] ; optn= [] @@ -3767,7 +3786,7 @@ let rec cmdtable_data : (string * cmd_spec) list = ; ( "vtpm-create" , { reqd= ["vm-uuid"] - ; optn= [] + ; optn= ["is-unique"] ; help= "Create a VTPM associated with a VM." ; implementation= No_fd Cli_operations.VTPM.create ; flags= [] @@ -4004,7 +4023,6 @@ let rio_help printer minimal cmd = in printer (Cli_printer.PTable [recs]) | None -> - D.log_backtrace () ; error "Responding with Unknown command %s" cmd ; printer (Cli_printer.PList ["Unknown command '" ^ cmd ^ "'"]) in @@ -4031,6 +4049,26 @@ let rio_help printer minimal cmd = let cmds = List.sort (fun (name1, _) (name2, _) -> compare name1 name2) cmds in + let help = + Printf.sprintf + {|Usage: + %s + [ -s ] XenServer host + [ -p ] XenServer port number + [ -u -pw | -pwf ] + User authentication (password or file) + [ --nossl ] Disable SSL/TLS + [ --debug ] Enable debug output + [ --debug-on-fail ] Enable debug output only on failure + [ --traceparent ] Distributed tracing context + [ ... ] Command-specific options + +To get help on a specific command: + %s help + +|} + cmd.argv0 cmd.argv0 + in if List.mem_assoc "all" cmd.params && List.assoc "all" cmd.params = "true" then let cmds = List.map fst cmds in @@ -4040,20 +4078,9 @@ let rio_help printer minimal cmd = let vm_cmds, other = List.partition (fun n -> Astring.String.is_prefix ~affix:"vm-" n) other in - let h = - "Usage: " - ^ cmd.argv0 - ^ " [-s server] [-pw passwd] [-p port] [-u user] [-pwf \ - password-file]\n" - in - let h = h ^ " [command specific arguments]\n\n" in - let h = - h - ^ "To get help on a specific command: " - ^ cmd.argv0 - ^ " help \n\n" - in - let h = h ^ "Full command list\n-----------------" in + let h = help ^ {|Full command list +----------------- +|} in if minimal then printer (Cli_printer.PList cmds) else ( @@ -4070,25 +4097,16 @@ let rio_help printer minimal cmd = in let cmds = List.map fst cmds in let h = - "Usage: " - ^ cmd.argv0 - ^ " [-s server] [-pw passwd] [-p port] [-u user] [-pwf \ - password-file]\n" - in - let h = h ^ " [command specific arguments]\n\n" in - let h = - h - ^ "To get help on a specific command: " - ^ cmd.argv0 - ^ " help \n" - in - let h = - h - ^ "To get a full listing of commands: " - ^ cmd.argv0 - ^ " help --all\n\n" + help + ^ Printf.sprintf + {|To get a full listing of commands: + %s help --all + +Common command list +------------------- +|} + cmd.argv0 in - let h = h ^ "Common command list\n-------------------" in if minimal then printer (Cli_printer.PList cmds) else ( diff --git a/ocaml/xapi-cli-server/cli_operations.ml b/ocaml/xapi-cli-server/cli_operations.ml index af023699f07..ad17ca837dd 100644 --- a/ocaml/xapi-cli-server/cli_operations.ml +++ b/ocaml/xapi-cli-server/cli_operations.ml @@ -2240,6 +2240,9 @@ let print_assert_exception e = "VM requires access to SR: " ^ Cli_util.ref_convert (get_arg 2 params) | Api_errors.Server_error (code, _) when code = Api_errors.host_disabled -> "Host disabled (use 'xe host-enable' to re-enable)" + | Api_errors.Server_error (code, _) + when code = Api_errors.host_disabled_indefinitely -> + "Host disabled indefinitely (use 'xe host-enable' to re-enable)" | Api_errors.Server_error (code, _) when code = Api_errors.host_not_live -> "Host down" | Api_errors.Server_error (code, _) @@ -3490,31 +3493,44 @@ let vm_memory_target_wait printer rpc session_id params = params [] ) +(** This implements the key:file=/path/to/file.txt syntax. The value for + key is the content of a file requested from the client *) +let args_file fd ((k, v) as p) = + match Astring.String.cut ~sep:":" k with + | Some (key, "file") -> ( + match get_client_file fd v with + | Some s -> + (key, s) + | None -> + marshal fd + (Command (PrintStderr (Printf.sprintf "Failed to read file %s\n" v))) ; + raise (ExitWithError 1) + ) + | _ -> + p + let vm_call_plugin fd printer rpc session_id params = let vm_uuid = List.assoc "vm-uuid" params in let vm = Client.VM.get_by_uuid ~rpc ~session_id ~uuid:vm_uuid in let plugin = List.assoc "plugin" params in let fn = List.assoc "fn" params in let args = read_map_params "args" params in - (* Syntax interpretation: args:key:file=filename equals args:key=filename_content *) - let convert ((k, v) as p) = - match Astring.String.cut ~sep:":" k with - | Some (key, "file") -> ( - match get_client_file fd v with - | Some s -> - (key, s) - | None -> - marshal fd - (Command (PrintStderr (Printf.sprintf "Failed to read file %s\n" v))) ; - raise (ExitWithError 1) - ) - | _ -> - p - in - let args = List.map convert args in + let args = List.map (args_file fd) args in let result = Client.VM.call_plugin ~rpc ~session_id ~vm ~plugin ~fn ~args in printer (Cli_printer.PList [result]) +let vm_call_host_plugin fd printer rpc session_id params = + let vm_uuid = List.assoc "vm-uuid" params in + let vm = Client.VM.get_by_uuid ~rpc ~session_id ~uuid:vm_uuid in + let plugin = List.assoc "plugin" params in + let fn = List.assoc "fn" params in + let args = read_map_params "args" params in + let args = List.map (args_file fd) args in + let result = + Client.VM.call_host_plugin ~rpc ~session_id ~vm ~plugin ~fn ~args + in + printer (Cli_printer.PList [result]) + let data_source_to_kvs ds = [ ("name_label", ds.API.data_source_name_label) @@ -3575,6 +3591,34 @@ let vm_data_source_forget printer rpc session_id params = params ["data-source"] ) +let vm_sysprep fd printer rpc session_id params = + let filename = List.assoc "filename" params in + let timeout = + match List.assoc "timeout" params |> float_of_string with + | exception _ -> + 3.0 *. 60.0 (* default in the CLI, no default in the API *) + | s when s < 0.0 -> + 0.0 + | s -> + s + in + let unattend = + match get_client_file fd filename with + | Some xml -> + xml |> SecretString.of_string + | None -> + marshal fd (Command (PrintStderr "Failed to read file.\n")) ; + raise (ExitWithError 1) + in + ignore + (do_vm_op printer rpc session_id + (fun vm -> + Client.VM.sysprep ~rpc ~session_id ~self:(vm.getref ()) ~unattend + ~timeout + ) + params ["filename"; "timeout"] + ) + (* APIs to collect SR level RRDs *) let sr_data_source_list printer rpc session_id params = ignore @@ -5297,9 +5341,8 @@ let with_license_server_changes printer rpc session_id params hosts f = ) hosts ) ; - let now = Unix.gettimeofday () in try f rpc session_id with - | Api_errors.Server_error (name, _) as e + | Api_errors.Server_error (name, [_; msg]) when name = Api_errors.license_checkout_error -> (* Put back original license_server_details *) List.iter @@ -5308,28 +5351,8 @@ let with_license_server_changes printer rpc session_id params hosts f = ~value:license_server ) current_license_servers ; - let alerts = - Client.Message.get_since ~rpc ~session_id - ~since:(Date.of_unix_time (now -. 1.)) - in - let print_if_checkout_error (ref, msg) = - if - false - || msg.API.message_name = fst Api_messages.v6_rejected - || msg.API.message_name = fst Api_messages.v6_comm_error - || msg.API.message_name - = fst Api_messages.v6_license_server_version_obsolete - then ( - Client.Message.destroy ~rpc ~session_id ~self:ref ; - printer (Cli_printer.PStderr (msg.API.message_body ^ "\n")) - ) - in - if alerts = [] then - raise e - else ( - List.iter print_if_checkout_error alerts ; - raise (ExitWithError 1) - ) + printer (Cli_printer.PStderr (msg ^ "\n")) ; + raise (ExitWithError 1) | Api_errors.Server_error (name, _) as e when name = Api_errors.invalid_edition -> let host = get_host_from_session rpc session_id in @@ -5368,13 +5391,21 @@ let host_evacuate _printer rpc session_id params = Client.Network.get_by_uuid ~rpc ~session_id ~uuid ) in + let evacuate_batch_size = + match List.assoc_opt "batch-size" params with + | Some x -> + Scanf.sscanf x "%Lu%!" Fun.id + | None -> + 0L + in ignore (do_host_op rpc session_id ~multiple:false (fun _ host -> Client.Host.evacuate ~rpc ~session_id ~host:(host.getref ()) ~network - ~evacuate_batch_size:0L + ~evacuate_batch_size ) - params ["network-uuid"] + params + ["network-uuid"; "batch-size"] ) let host_get_vms_which_prevent_evacuation printer rpc session_id params = @@ -6537,12 +6568,14 @@ let bond_set_mode _printer rpc session_id params = Client.Bond.set_mode ~rpc ~session_id ~self:bond ~value:mode let host_disable _printer rpc session_id params = + let auto_enable = get_bool_param ~default:true params "auto-enable" in ignore (do_host_op rpc session_id (fun _ host -> Client.Host.disable ~rpc ~session_id ~host:(host.getref ()) + ~auto_enable ) - params [] + params ["auto-enable"] ) let host_sync_data _printer rpc session_id params = @@ -6798,6 +6831,28 @@ let pool_get_guest_secureboot_readiness printer rpc session_id params = (Record_util.pool_guest_secureboot_readiness_to_string result) ) +let cpu_info_features_of feature_keys cpu_info = + let ( let* ) = Option.bind in + List.filter_map + (fun key -> + let* features = List.assoc_opt key cpu_info in + Some (key, features) + ) + feature_keys + +let pool_get_cpu_features printer rpc session_id params = + let pool = get_pool_with_default rpc session_id params "uuid" in + let cpu_info = Client.Pool.get_cpu_info ~rpc ~session_id ~self:pool in + + let feature_keys = + [ + Constants.cpu_info_features_pv_host_key + ; Constants.cpu_info_features_hvm_host_key + ] + in + let features = cpu_info_features_of feature_keys cpu_info in + printer (Cli_printer.PTable [features]) + let pool_sync_bundle fd _printer rpc session_id params = let filename_opt = List.assoc_opt "filename" params in match filename_opt with @@ -6828,6 +6883,14 @@ let pool_sync_bundle fd _printer rpc session_id params = | None -> failwith "Required parameter not found: filename" +let pool_enable_ssh _printer rpc session_id params = + let pool = get_pool_with_default rpc session_id params "uuid" in + Client.Pool.enable_ssh ~rpc ~session_id ~self:pool + +let pool_disable_ssh _printer rpc session_id params = + let pool = get_pool_with_default rpc session_id params "uuid" in + Client.Pool.disable_ssh ~rpc ~session_id ~self:pool + let host_restore fd _printer rpc session_id params = let filename = List.assoc "file-name" params in let op _ host = @@ -6899,12 +6962,13 @@ let host_set_hostname_live _printer rpc session_id params = let hostname = List.assoc "host-name" params in Client.Host.set_hostname_live ~rpc ~session_id ~host ~hostname -let host_call_plugin printer rpc session_id params = +let host_call_plugin fd printer rpc session_id params = let host_uuid = List.assoc "host-uuid" params in let host = Client.Host.get_by_uuid ~rpc ~session_id ~uuid:host_uuid in let plugin = List.assoc "plugin" params in let fn = List.assoc "fn" params in let args = read_map_params "args" params in + let args = List.map (args_file fd) args in let result = Client.Host.call_plugin ~rpc ~session_id ~host ~plugin ~fn ~args in @@ -6958,8 +7022,17 @@ let host_get_cpu_features printer rpc session_id params = get_host_from_session rpc session_id in let cpu_info = Client.Host.get_cpu_info ~rpc ~session_id ~self:host in - let features = List.assoc "features" cpu_info in - printer (Cli_printer.PMsg features) + + let feature_keys = + [ + Constants.cpu_info_features_pv_key + ; Constants.cpu_info_features_hvm_key + ; Constants.cpu_info_features_pv_host_key + ; Constants.cpu_info_features_hvm_host_key + ] + in + let features = cpu_info_features_of feature_keys cpu_info in + printer (Cli_printer.PTable [features]) let host_enable_display printer rpc session_id params = let host = @@ -7207,59 +7280,11 @@ let host_send_debug_keys _printer rpc session_id params = let keys = List.assoc "keys" params in Client.Host.send_debug_keys ~rpc ~session_id ~host ~keys -(* - let host_introduce printer rpc session_id params = - let name = List.assoc "name" params in - let descr = if List.mem_assoc "description" params then List.assoc "description" params else "" in - let address = List.assoc "address" params in - let port = List.assoc "remote-port" params in - let remote_username = List.assoc "remote-username" params in - let remote_password = List.assoc "remote-password" params in - ignore(Client.Credential.create_with_password ~rpc ~session_id name descr address (Int64.of_string port) remote_username remote_password) - *) - let task_cancel _printer rpc session_id params = let uuid = List.assoc "uuid" params in let task = Client.Task.get_by_uuid ~rpc ~session_id ~uuid in Client.Task.cancel ~rpc ~session_id ~task -(* - let alert_create printer rpc session_id params = - let string_to_alert_level s = - match s with - | "info" -> `Info - | "warning" | "warn" -> `Warn - | "error" -> `Error - | _ -> `Info - in - let message = List.assoc "message" params in - let level = if List.mem_assoc "level" params then List.assoc "level" params else "info" in - let level = string_to_alert_level level in - let alert = Client.Alert.create ~rpc ~session_id message [] level in - let uuid = Client.Alert.get_uuid ~rpc ~session_id alert in - printer (Cli_printer.PList [uuid]) - - let alert_destroy printer rpc session_id params = - let uuid = List.assoc "uuid" params in - let alert = Client.Alert.get_by_uuid ~rpc ~session_id uuid in - Client.Alert.destroy ~rpc ~session_id alert - *) - -(* - let subject_list printer rpc session_id params = -(* we get all subjects from the pool *) - let subjects = Client.Subject.get_all_records ~rpc ~session_id in - let table_of_subject (subject,record) = - [ "subject-uuid", record.API.subject_uuid; - "subject-identifier", record.API.subject_subject_identifier; -(* "subject-name", Client.Subject.get_subject_name ~rpc ~session_id subject;*) - ] @ - record.API.subject_other_config - in - let all = List.map table_of_subject subjects in - printer (Cli_printer.PTable all) - *) - let subject_add printer rpc session_id params = let subject_name = List.assoc "subject-name" params in (* let's try to resolve the subject_name to a subject_id using the external directory *) @@ -7350,13 +7375,6 @@ let audit_log_get fd _printer rpc session_id params = download_file_with_task fd rpc session_id filename Constants.audit_log_uri query label label -(* RBAC 2.0 only - let role_create printer rpc session_id params = - (*let id = List.assoc "id" params in*) - let name = List.assoc "name" params in - ignore (Client.Role.create ~rpc ~session_id ~name ~description:"" ~permissions:[] ~is_basic:false ~is_complete:false) -*) - let session_subject_identifier_list printer rpc session_id _params = let subject_identifiers = Client.Session.get_all_subject_identifiers ~rpc ~session_id @@ -7778,6 +7796,26 @@ let host_apply_updates _printer rpc session_id params = params ["hash"] ) +let host_enable_ssh _printer rpc session_id params = + ignore + (do_host_op rpc session_id + (fun _ host -> + let host = host.getref () in + Client.Host.enable_ssh ~rpc ~session_id ~self:host + ) + params [] + ) + +let host_disable_ssh _printer rpc session_id params = + ignore + (do_host_op rpc session_id + (fun _ host -> + let host = host.getref () in + Client.Host.disable_ssh ~rpc ~session_id ~self:host + ) + params [] + ) + module SDN_controller = struct let introduce printer rpc session_id params = let port = @@ -8068,7 +8106,9 @@ module VTPM = struct let create printer rpc session_id params = let vm_uuid = List.assoc "vm-uuid" params in let vM = Client.VM.get_by_uuid ~rpc ~session_id ~uuid:vm_uuid in - let is_unique = get_bool_param params "is_unique" in + let is_unique = + get_bool_param params "is_unique" || get_bool_param params "is-unique" + in let ref = Client.VTPM.create ~rpc ~session_id ~vM ~is_unique in let uuid = Client.VTPM.get_uuid ~rpc ~session_id ~self:ref in printer (Cli_printer.PList [uuid]) diff --git a/ocaml/xapi-cli-server/cli_util.ml b/ocaml/xapi-cli-server/cli_util.ml index 75c4f30360f..b71c9f1f3a3 100644 --- a/ocaml/xapi-cli-server/cli_util.ml +++ b/ocaml/xapi-cli-server/cli_util.ml @@ -91,7 +91,7 @@ let track callback rpc (session_id : API.ref_session) task = | _ -> false in - finished := List.fold_left ( || ) false (List.map matches events) + finished := List.exists matches events done with | Api_errors.Server_error (code, _) diff --git a/ocaml/xapi-cli-server/record_util.ml b/ocaml/xapi-cli-server/record_util.ml index d28b6b5f763..a11b30decb3 100644 --- a/ocaml/xapi-cli-server/record_util.ml +++ b/ocaml/xapi-cli-server/record_util.ml @@ -75,6 +75,7 @@ let vm_operation_table = ; (`csvm, "csvm") ; (`call_plugin, "call_plugin") ; (`create_vtpm, "create_vtpm") + ; (`sysprep, "sysprep") ] (* Intentional shadowing - data_souces_op, assertoperationinvalid, diff --git a/ocaml/xapi-cli-server/records.ml b/ocaml/xapi-cli-server/records.ml index 56e97fbda03..06e9b5e6e47 100644 --- a/ocaml/xapi-cli-server/records.ml +++ b/ocaml/xapi-cli-server/records.ml @@ -20,6 +20,8 @@ let nullref = Ref.string_of Ref.null let nid = "" +let inconsistent = "" + let unknown_time = "" let string_of_float f = Printf.sprintf "%.3f" f @@ -204,6 +206,37 @@ let get_pbds_host rpc session_id pbds = let get_sr_host rpc session_id record = get_pbds_host rpc session_id record.API.sR_PBDs +(** Get consistent field from all hosts, or return a default value if the field + is not the same on all hosts. *) +let get_consistent_field_or_default ~rpc ~session_id ~getter ~transform ~default + = + match Client.Host.get_all ~rpc ~session_id with + | [] -> + default + | hosts -> ( + let result = + List.fold_left + (fun acc host -> + match acc with + | `Inconsistent -> + `Inconsistent + | `NotSet -> + `Value (getter ~rpc ~session_id ~self:host |> transform) + | `Value v -> + let current = getter ~rpc ~session_id ~self:host |> transform in + if v = current then `Value v else `Inconsistent + ) + `NotSet hosts + in + match result with + | `Value v -> + v + | `Inconsistent -> + default + | `NotSet -> + default + ) + let bond_record rpc session_id bond = let _ref = ref bond in let empty_record = @@ -1244,6 +1277,15 @@ let pool_record rpc session_id pool = ; make_field ~name:"ha-overcommitted" ~get:(fun () -> string_of_bool (x ()).API.pool_ha_overcommitted) () + ; make_field ~name:"ha-reboot-vm-on-internal-shutdown" + ~get:(fun () -> + string_of_bool (x ()).API.pool_ha_reboot_vm_on_internal_shutdown + ) + ~set:(fun x -> + Client.Pool.set_ha_reboot_vm_on_internal_shutdown ~rpc ~session_id + ~self:pool ~value:(bool_of_string x) + ) + () ; make_field ~name:"blobs" ~get:(fun () -> get_uuid_map_from_ref_map (x ()).API.pool_blobs) () @@ -1506,6 +1548,53 @@ let pool_record rpc session_id pool = ) ~get_map:(fun () -> (x ()).API.pool_license_server) () + ; make_field ~name:"ssh-enabled" + ~get:(fun () -> + get_consistent_field_or_default ~rpc ~session_id + ~getter:Client.Host.get_ssh_enabled ~transform:string_of_bool + ~default:inconsistent + ) + () + ; make_field ~name:"ssh-enabled-timeout" + ~get:(fun () -> + get_consistent_field_or_default ~rpc ~session_id + ~getter:Client.Host.get_ssh_enabled_timeout + ~transform:Int64.to_string ~default:inconsistent + ) + ~set:(fun value -> + Client.Pool.set_ssh_enabled_timeout ~rpc ~session_id ~self:pool + ~value:(safe_i64_of_string "ssh-enabled-timeout" value) + ) + () + ; make_field ~name:"ssh-expiry" + ~get:(fun () -> + get_consistent_field_or_default ~rpc ~session_id + ~getter:Client.Host.get_ssh_expiry ~transform:Date.to_rfc3339 + ~default:inconsistent + ) + () + ; make_field ~name:"console-idle-timeout" + ~get:(fun () -> + get_consistent_field_or_default ~rpc ~session_id + ~getter:Client.Host.get_console_idle_timeout + ~transform:Int64.to_string ~default:inconsistent + ) + ~set:(fun value -> + Client.Pool.set_console_idle_timeout ~rpc ~session_id ~self:pool + ~value:(safe_i64_of_string "console-idle-timeout" value) + ) + () + ; make_field ~name:"ssh-auto-mode" + ~get:(fun () -> + get_consistent_field_or_default ~rpc ~session_id + ~getter:Client.Host.get_ssh_auto_mode ~transform:string_of_bool + ~default:inconsistent + ) + ~set:(fun value -> + Client.Pool.set_ssh_auto_mode ~rpc ~session_id ~self:pool + ~value:(safe_bool_of_string "ssh-auto-mode" value) + ) + () ] } @@ -2416,6 +2505,18 @@ let vm_record rpc session_id vm = (xgm ()) ) () + ; make_field ~name:"services" + ~get:(fun () -> + Option.fold ~none:nid + ~some:(fun m -> get_from_map m.API.vM_guest_metrics_services) + (xgm ()) + ) + ~get_map:(fun () -> + Option.fold ~none:[] + ~some:(fun m -> m.API.vM_guest_metrics_services) + (xgm ()) + ) + () ; make_field ~name:"PV-drivers-detected" ~get:(fun () -> Option.fold ~none:nid @@ -3265,6 +3366,33 @@ let host_record rpc session_id host = ; make_field ~name:"last-update-hash" ~get:(fun () -> (x ()).API.host_last_update_hash) () + ; make_field ~name:"ssh-enabled" + ~get:(fun () -> string_of_bool (x ()).API.host_ssh_enabled) + () + ; make_field ~name:"ssh-enabled-timeout" + ~get:(fun () -> Int64.to_string (x ()).API.host_ssh_enabled_timeout) + ~set:(fun value -> + Client.Host.set_ssh_enabled_timeout ~rpc ~session_id ~self:host + ~value:(safe_i64_of_string "ssh-enabled-timeout" value) + ) + () + ; make_field ~name:"ssh-expiry" + ~get:(fun () -> Date.to_rfc3339 (x ()).API.host_ssh_expiry) + () + ; make_field ~name:"console-idle-timeout" + ~get:(fun () -> Int64.to_string (x ()).API.host_console_idle_timeout) + ~set:(fun value -> + Client.Host.set_console_idle_timeout ~rpc ~session_id ~self:host + ~value:(safe_i64_of_string "console-idle-timeout" value) + ) + () + ; make_field ~name:"ssh-auto-mode" + ~get:(fun () -> string_of_bool (x ()).API.host_ssh_auto_mode) + ~set:(fun value -> + Client.Host.set_ssh_auto_mode ~rpc ~session_id ~self:host + ~value:(safe_bool_of_string "ssh-auto-mode" value) + ) + () ] } @@ -5111,6 +5239,9 @@ let cluster_record rpc session_id cluster = ; make_field ~name:"live-hosts" ~get:(fun () -> Int64.to_string (x ()).API.cluster_live_hosts) () + ; make_field ~name:"expected-hosts" + ~get:(fun () -> Int64.to_string (x ()).API.cluster_expected_hosts) + () ] } diff --git a/ocaml/xapi-client/event_helper.ml b/ocaml/xapi-client/event_helper.ml index 3ec6e7f9236..cbbeb978ba2 100644 --- a/ocaml/xapi-client/event_helper.ml +++ b/ocaml/xapi-client/event_helper.ml @@ -43,96 +43,132 @@ type event_record = | VMPP of [`VMPP] Ref.t * API.vMPP_t option | VMSS of [`VMSS] Ref.t * API.vMSS_t option -let maybe f x = match x with Some x -> Some (f x) | None -> None - let record_of_event ev = let rpc = ev.Event_types.snapshot in match ev.Event_types.ty with | "session" -> Session ( Ref.of_secret_string ev.Event_types.reference - , maybe API.session_t_of_rpc rpc + , Option.map API.session_t_of_rpc rpc ) | "task" -> - Task (Ref.of_string ev.Event_types.reference, maybe API.task_t_of_rpc rpc) + Task + ( Ref.of_string ev.Event_types.reference + , Option.map API.task_t_of_rpc rpc + ) | "event" -> Event - (Ref.of_string ev.Event_types.reference, maybe API.event_t_of_rpc rpc) + ( Ref.of_string ev.Event_types.reference + , Option.map API.event_t_of_rpc rpc + ) | "vm" -> - VM (Ref.of_string ev.Event_types.reference, maybe API.vM_t_of_rpc rpc) + VM (Ref.of_string ev.Event_types.reference, Option.map API.vM_t_of_rpc rpc) | "vm_metrics" -> VM_metrics ( Ref.of_string ev.Event_types.reference - , maybe API.vM_metrics_t_of_rpc rpc + , Option.map API.vM_metrics_t_of_rpc rpc ) | "vm_guest_metrics" -> VM_guest_metrics ( Ref.of_string ev.Event_types.reference - , maybe API.vM_guest_metrics_t_of_rpc rpc + , Option.map API.vM_guest_metrics_t_of_rpc rpc ) | "host" -> - Host (Ref.of_string ev.Event_types.reference, maybe API.host_t_of_rpc rpc) + Host + ( Ref.of_string ev.Event_types.reference + , Option.map API.host_t_of_rpc rpc + ) | "host_metrics" -> Host_metrics ( Ref.of_string ev.Event_types.reference - , maybe API.host_metrics_t_of_rpc rpc + , Option.map API.host_metrics_t_of_rpc rpc ) | "host_cpu" -> Host_cpu - (Ref.of_string ev.Event_types.reference, maybe API.host_cpu_t_of_rpc rpc) + ( Ref.of_string ev.Event_types.reference + , Option.map API.host_cpu_t_of_rpc rpc + ) | "network" -> Network - (Ref.of_string ev.Event_types.reference, maybe API.network_t_of_rpc rpc) + ( Ref.of_string ev.Event_types.reference + , Option.map API.network_t_of_rpc rpc + ) | "vif" -> - VIF (Ref.of_string ev.Event_types.reference, maybe API.vIF_t_of_rpc rpc) + VIF + (Ref.of_string ev.Event_types.reference, Option.map API.vIF_t_of_rpc rpc) | "vif_metrics" -> VIF_metrics ( Ref.of_string ev.Event_types.reference - , maybe API.vIF_metrics_t_of_rpc rpc + , Option.map API.vIF_metrics_t_of_rpc rpc ) | "pif" -> - PIF (Ref.of_string ev.Event_types.reference, maybe API.pIF_t_of_rpc rpc) + PIF + (Ref.of_string ev.Event_types.reference, Option.map API.pIF_t_of_rpc rpc) | "pif_metrics" -> PIF_metrics ( Ref.of_string ev.Event_types.reference - , maybe API.pIF_metrics_t_of_rpc rpc + , Option.map API.pIF_metrics_t_of_rpc rpc ) | "sr" -> - SR (Ref.of_string ev.Event_types.reference, maybe API.sR_t_of_rpc rpc) + SR (Ref.of_string ev.Event_types.reference, Option.map API.sR_t_of_rpc rpc) | "vdi" -> - VDI (Ref.of_string ev.Event_types.reference, maybe API.vDI_t_of_rpc rpc) + VDI + (Ref.of_string ev.Event_types.reference, Option.map API.vDI_t_of_rpc rpc) | "vbd" -> - VBD (Ref.of_string ev.Event_types.reference, maybe API.vBD_t_of_rpc rpc) + VBD + (Ref.of_string ev.Event_types.reference, Option.map API.vBD_t_of_rpc rpc) | "vbd_metrics" -> VBD_metrics ( Ref.of_string ev.Event_types.reference - , maybe API.vBD_metrics_t_of_rpc rpc + , Option.map API.vBD_metrics_t_of_rpc rpc ) | "pbd" -> - PBD (Ref.of_string ev.Event_types.reference, maybe API.pBD_t_of_rpc rpc) + PBD + (Ref.of_string ev.Event_types.reference, Option.map API.pBD_t_of_rpc rpc) | "crashdump" -> Crashdump ( Ref.of_string ev.Event_types.reference - , maybe API.crashdump_t_of_rpc rpc + , Option.map API.crashdump_t_of_rpc rpc ) | "vtpm" -> - VTPM (Ref.of_string ev.Event_types.reference, maybe API.vTPM_t_of_rpc rpc) + VTPM + ( Ref.of_string ev.Event_types.reference + , Option.map API.vTPM_t_of_rpc rpc + ) | "console" -> Console - (Ref.of_string ev.Event_types.reference, maybe API.console_t_of_rpc rpc) + ( Ref.of_string ev.Event_types.reference + , Option.map API.console_t_of_rpc rpc + ) | "user" -> - User (Ref.of_string ev.Event_types.reference, maybe API.user_t_of_rpc rpc) + User + ( Ref.of_string ev.Event_types.reference + , Option.map API.user_t_of_rpc rpc + ) | "pool" -> - Pool (Ref.of_string ev.Event_types.reference, maybe API.pool_t_of_rpc rpc) + Pool + ( Ref.of_string ev.Event_types.reference + , Option.map API.pool_t_of_rpc rpc + ) | "message" -> Message - (Ref.of_string ev.Event_types.reference, maybe API.message_t_of_rpc rpc) + ( Ref.of_string ev.Event_types.reference + , Option.map API.message_t_of_rpc rpc + ) | "secret" -> Secret - (Ref.of_string ev.Event_types.reference, maybe API.secret_t_of_rpc rpc) + ( Ref.of_string ev.Event_types.reference + , Option.map API.secret_t_of_rpc rpc + ) | "vmpp" -> - VMPP (Ref.of_string ev.Event_types.reference, maybe API.vMPP_t_of_rpc rpc) + VMPP + ( Ref.of_string ev.Event_types.reference + , Option.map API.vMPP_t_of_rpc rpc + ) | "vmss" -> - VMSS (Ref.of_string ev.Event_types.reference, maybe API.vMSS_t_of_rpc rpc) + VMSS + ( Ref.of_string ev.Event_types.reference + , Option.map API.vMSS_t_of_rpc rpc + ) | _ -> failwith "unknown event type" diff --git a/ocaml/xapi-client/tasks.ml b/ocaml/xapi-client/tasks.ml index c62f681d602..a9da21890ec 100644 --- a/ocaml/xapi-client/tasks.ml +++ b/ocaml/xapi-client/tasks.ml @@ -23,7 +23,7 @@ module TaskSet = Set.Make (struct end) (* Return once none of the tasks have a `pending status. *) -let wait_for_all_inner ~rpc ~session_id ~all_timeout ~tasks = +let wait_for_all_inner ~rpc ~session_id ~all_timeout ~tasks ~callback = let classes = List.map (fun task -> Printf.sprintf "task/%s" (Ref.string_of task)) tasks in @@ -36,7 +36,12 @@ let wait_for_all_inner ~rpc ~session_id ~all_timeout ~tasks = in let timer = Mtime_clock.counter () in let timeout = 5.0 in - let rec wait ~token ~task_set = + let get_new_classes task_set = + TaskSet.fold + (fun task l -> Printf.sprintf "task/%s" (Ref.string_of task) :: l) + task_set [] + in + let rec wait ~token ~task_set ~completed_task_count ~classes = if TaskSet.is_empty task_set then true else @@ -58,24 +63,39 @@ let wait_for_all_inner ~rpc ~session_id ~all_timeout ~tasks = List.map Event_helper.record_of_event event_from.events in (* If any records indicate that a task is no longer pending, remove that task from the set. *) - let pending_task_set = + let pending_task_set, completed_task_count, classes = List.fold_left - (fun task_set' record -> + (fun (task_set', completed_task_count, _) record -> match record with | Event_helper.Task (t, Some t_rec) -> if TaskSet.mem t task_set' && t_rec.API.task_status <> `pending then - TaskSet.remove t task_set' + let new_task_set = TaskSet.remove t task_set' in + let completed_task_count = completed_task_count + 1 in + + (* Call the callback function, wait for new tasks if any *) + let tasks_to_add = callback completed_task_count t in + let new_task_set = + List.fold_left + (fun task_set task -> TaskSet.add task task_set) + new_task_set tasks_to_add + in + ( new_task_set + , completed_task_count + , get_new_classes new_task_set + ) else - task_set' + (task_set', completed_task_count, classes) | _ -> - task_set' + (task_set', completed_task_count, classes) ) - task_set records + (task_set, completed_task_count, classes) + records in wait ~token:event_from.Event_types.token ~task_set:pending_task_set + ~completed_task_count ~classes in let token = "" in let task_set = @@ -83,17 +103,27 @@ let wait_for_all_inner ~rpc ~session_id ~all_timeout ~tasks = (fun task_set' task -> TaskSet.add task task_set') TaskSet.empty tasks in - wait ~token ~task_set + wait ~token ~task_set ~completed_task_count:0 ~classes let wait_for_all ~rpc ~session_id ~tasks = - wait_for_all_inner ~rpc ~session_id ~all_timeout:None ~tasks |> ignore + wait_for_all_inner ~rpc ~session_id ~all_timeout:None ~tasks + ~callback:(fun _ _ -> [] + ) + |> ignore + +let wait_for_all_with_callback ~rpc ~session_id ~tasks ~callback = + wait_for_all_inner ~rpc ~session_id ~all_timeout:None ~tasks ~callback + |> ignore let with_tasks_destroy ~rpc ~session_id ~timeout ~tasks = let wait_or_cancel () = D.info "Waiting for %d tasks, timeout: %.3fs" (List.length tasks) timeout ; if not - (wait_for_all_inner ~rpc ~session_id ~all_timeout:(Some timeout) ~tasks) + (wait_for_all_inner ~rpc ~session_id ~all_timeout:(Some timeout) ~tasks + ~callback:(fun _ _ -> [] + ) + ) then ( D.info "Canceling tasks" ; List.iter @@ -104,6 +134,8 @@ let with_tasks_destroy ~rpc ~session_id ~timeout ~tasks = tasks ; (* cancel is not immediate, give it a reasonable chance to take effect *) wait_for_all_inner ~rpc ~session_id ~all_timeout:(Some 60.) ~tasks + ~callback:(fun _ _ -> [] + ) |> ignore ; false ) else diff --git a/ocaml/xapi-client/tasks.mli b/ocaml/xapi-client/tasks.mli index 8989b01716f..a396c569aef 100644 --- a/ocaml/xapi-client/tasks.mli +++ b/ocaml/xapi-client/tasks.mli @@ -12,6 +12,8 @@ * GNU Lesser General Public License for more details. *) +module TaskSet : Set.S with type elt = API.ref_task + val wait_for_all : rpc:(Rpc.call -> Rpc.response) -> session_id:API.ref_session @@ -20,6 +22,27 @@ val wait_for_all : (** [wait_for_all ~rpc ~session_id ~tasks] returns when all of [tasks] are in some non-pending state. *) +val wait_for_all_with_callback : + rpc:(Rpc.call -> Rpc.response) + -> session_id:API.ref_session + -> tasks:API.ref_task list + -> callback:(int -> API.ref_task -> API.ref_task list) + -> unit +(** [wait_for_all_with_callback ~rpc ~session_id ~tasks ~callback] returns when + all of [tasks] are in some non-pending state. When one of the [tasks] is + completed, [callback overall_completed_task_count] is invoked, which returns + a list of tasks that need to be added to [tasks] and waited on as well. + + This allows, for example, to implement a system where tasks are processed + in batches of *constant* size X, with new tasks being started as soon as at + least one slot in the batch is freed, instead of waiting for the whole batch + to finish (and potentially being slowed down by a single worst performer). + + The callback could instead just perform some side-effect (set the progress + of the overall task representing progress of individual units, for example) + and return an empty list. + *) + val with_tasks_destroy : rpc:(Rpc.call -> Rpc.response) -> session_id:API.ref_session diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 93ee90eb88f..2a1b9b58b72 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -113,6 +113,8 @@ let host_disabled = add_error "HOST_DISABLED" let host_disabled_until_reboot = add_error "HOST_DISABLED_UNTIL_REBOOT" +let host_disabled_indefinitely = add_error "HOST_DISABLED_INDEFINITELY" + let host_not_disabled = add_error "HOST_NOT_DISABLED" let host_not_live = add_error "HOST_NOT_LIVE" @@ -440,14 +442,7 @@ let vm_old_pv_drivers = add_error "VM_OLD_PV_DRIVERS" let vm_lacks_feature = add_error "VM_LACKS_FEATURE" -let vm_lacks_feature_shutdown = add_error "VM_LACKS_FEATURE_SHUTDOWN" - -let vm_lacks_feature_suspend = add_error "VM_LACKS_FEATURE_SUSPEND" - -let vm_lacks_feature_vcpu_hotplug = add_error "VM_LACKS_FEATURE_VCPU_HOTPLUG" - -let vm_lacks_feature_static_ip_setting = - add_error "VM_LACKS_FEATURE_STATIC_IP_SETTING" +let vm_non_suspendable = add_error "VM_NON_SUSPENDABLE" let vm_cannot_delete_default_template = add_error "VM_CANNOT_DELETE_DEFAULT_TEMPLATE" @@ -757,6 +752,12 @@ let pool_joining_host_ca_certificates_conflict = let pool_joining_sm_features_incompatible = add_error "POOL_JOINING_SM_FEATURES_INCOMPATIBLE" +let pool_joining_pool_cannot_enable_clustering_on_vlan_network = + add_error "POOL_JOINING_POOL_CANNOT_ENABLE_CLUSTERING_ON_VLAN_NETWORK" + +let pool_joining_host_must_have_only_one_IP_on_clustering_network = + add_error "POOL_JOINING_HOST_MUST_HAVE_ONLY_ONE_IP_ON_CLUSTERING_NETWORK" + (*workload balancing*) let wlb_not_initialized = add_error "WLB_NOT_INITIALIZED" @@ -1317,6 +1318,8 @@ let configure_repositories_in_progress = let invalid_base_url = add_error "INVALID_BASE_URL" +let blocked_repo_url = add_error "BLOCKED_REPO_URL" + let invalid_gpgkey_path = add_error "INVALID_GPGKEY_PATH" let repository_already_exists = add_error "REPOSITORY_ALREADY_EXISTS" @@ -1412,7 +1415,26 @@ let illegal_in_fips_mode = add_error "ILLEGAL_IN_FIPS_MODE" let too_many_groups = add_error "TOO_MANY_GROUPS" +let enable_ssh_failed = add_error "ENABLE_SSH_FAILED" + +let disable_ssh_failed = add_error "DISABLE_SSH_FAILED" + +let enable_ssh_partially_failed = add_error "ENABLE_SSH_PARTIALLY_FAILED" + +let disable_ssh_partially_failed = add_error "DISABLE_SSH_PARTIALLY_FAILED" + +let set_ssh_timeout_partially_failed = + add_error "SET_SSH_TIMEOUT_PARTIALLY_FAILED" + +let set_console_timeout_partially_failed = + add_error "SET_CONSOLE_TIMEOUT_PARTIALLY_FAILED" + +let set_ssh_auto_mode_partially_failed = + add_error "SET_SSH_AUTO_MODE_PARTIALLY_FAILED" + let host_driver_no_hardware = add_error "HOST_DRIVER_NO_HARDWARE" let tls_verification_not_enabled_in_pool = add_error "TLS_VERIFICATION_NOT_ENABLED_IN_POOL" + +let sysprep = add_error "SYSPREP" diff --git a/ocaml/xapi-consts/constants.ml b/ocaml/xapi-consts/constants.ml index 3072a459c00..524d7ab07a5 100644 --- a/ocaml/xapi-consts/constants.ml +++ b/ocaml/xapi-consts/constants.ml @@ -177,6 +177,20 @@ let hvm_boot_params_order = "order" let hvm_default_boot_order = "cd" +(** Keys for different CPUID policies in {Host,Pool}.cpu_info *) + +let cpu_info_vendor_key = "vendor" + +let cpu_info_features_key = "features" + +let cpu_info_features_pv_key = "features_pv" + +let cpu_info_features_hvm_key = "features_hvm" + +let cpu_info_features_pv_host_key = "features_pv_host" + +let cpu_info_features_hvm_host_key = "features_hvm_host" + (* Key we put in VM.other_config when we upgrade a VM from Zurich/Geneva to Rio *) let vm_upgrade_time = "upgraded at" @@ -219,6 +233,11 @@ let master_scripts = "master_scripts" This will prevent anyone from re-enabling the host and starting VMs on it during shutdown. *) let host_disabled_until_reboot = "host_disabled_until_reboot" +(* This flag is set to false when the host is forcibly disabled in a + persistent way - it will not be re-enabled on startup (even after reboots) + until manually directed by the user *) +let host_auto_enable = "host_auto_enable" + (* Set when shutting down and rebooting. If we come up and finds no new crashdump and HA is enabled, we assume the host was fenced. *) let host_restarted_cleanly = "host_restarted_cleanly" @@ -422,3 +441,11 @@ let observer_components_all = let tgroups_enabled = ref false let when_tgroups_enabled f = if !tgroups_enabled then f () else () + +let default_ssh_enabled = true + +let default_ssh_enabled_timeout = 0L + +let default_console_idle_timeout = 0L + +let default_ssh_auto_mode = false diff --git a/ocaml/xapi-guard/lib/server_interface.ml b/ocaml/xapi-guard/lib/server_interface.ml index 8a64a576897..6e81ce77f2d 100644 --- a/ocaml/xapi-guard/lib/server_interface.ml +++ b/ocaml/xapi-guard/lib/server_interface.ml @@ -58,7 +58,7 @@ let with_xapi ~cache ?(timeout = 120.) f = let serve_forever_lwt path callback = let conn_closed _ = () in let on_exn e = - log_backtrace () ; + log_backtrace e ; warn "Exception: %s" (Printexc.to_string e) in let stop, do_stop = Lwt.task () in diff --git a/ocaml/xapi-guard/src/main.ml b/ocaml/xapi-guard/src/main.ml index 67e0b7f1d0b..c7bbe3a1157 100644 --- a/ocaml/xapi-guard/src/main.ml +++ b/ocaml/xapi-guard/src/main.ml @@ -299,7 +299,7 @@ let make_message_switch_server () = (* best effort resume *) let* () = Lwt.catch (resume ~vtpm_read_write ~uefi_read_write) (fun e -> - D.log_backtrace () ; + D.log_backtrace e ; D.warn "Resume failed: %s" (Printexc.to_string e) ; Lwt.return_unit ) @@ -321,7 +321,7 @@ let main log_level = let old_hook = !Lwt.async_exception_hook in (Lwt.async_exception_hook := fun exn -> - D.log_backtrace () ; + D.log_backtrace exn ; D.error "Lwt caught async exception: %s" (Printexc.to_string exn) ; old_hook exn ) ; diff --git a/ocaml/xapi-guard/test/xapi_guard_test.ml b/ocaml/xapi-guard/test/xapi_guard_test.ml index 280d9f4d627..475a96d3a27 100644 --- a/ocaml/xapi-guard/test/xapi_guard_test.ml +++ b/ocaml/xapi-guard/test/xapi_guard_test.ml @@ -68,7 +68,7 @@ let () = let old_hook = !Lwt.async_exception_hook in Lwt.async_exception_hook := fun exn -> - D.log_backtrace () ; + D.log_backtrace exn ; D.error "Lwt caught async exception: %s" (Printexc.to_string exn) ; old_hook exn diff --git a/ocaml/xapi-idl/cluster/cli-help.t b/ocaml/xapi-idl/cluster/cli-help.t new file mode 100644 index 00000000000..abe729544da --- /dev/null +++ b/ocaml/xapi-idl/cluster/cli-help.t @@ -0,0 +1,119 @@ + $ ./cluster_cli.exe --help=plain + NAME + cluster_cli - A CLI for the cluster API. This tool is not intended to + be used as an end user tool + + SYNOPSIS + cluster_cli [COMMAND] … + + COMMANDS + Observer.create [OPTION]… dbg uuid name_label dict endpoints bool + + Observer.destroy [OPTION]… dbg uuid + + Observer.init [OPTION]… dbg + + Observer.set_attributes [OPTION]… dbg uuid dict + + Observer.set_compress_tracing_files [OPTION]… dbg bool + + Observer.set_enabled [OPTION]… dbg uuid bool + + Observer.set_endpoints [OPTION]… dbg uuid endpoints + + Observer.set_export_chunk_size [OPTION]… dbg int + + Observer.set_export_interval [OPTION]… dbg float + + Observer.set_host_id [OPTION]… dbg string + + Observer.set_max_depth [OPTION]… dbg int + + Observer.set_max_file_size [OPTION]… dbg int + + Observer.set_max_spans [OPTION]… dbg int + + Observer.set_max_traces [OPTION]… dbg int + + Observer.set_trace_log_dir [OPTION]… dbg string + + UPDATES.get [OPTION]… dbg timeout + Get updates from corosync-notifyd, this blocking call will return + when there is an update from corosync-notifyd or it is timed out + after timeout_p seconds + + create [OPTION]… dbg init_config + Creates the cluster. The call takes the initial config of the + initial host to add to the cluster. This will be the address on + which the rings will be created. + + declare-changed-addrs [OPTION]… dbg changed_members + Declare that one or more hosts in the cluster have changed + address. Only use this command if unable to rejoin the cluster + using `enable` because the IPv4 addresses of all nodes this node + previously saw are now invalid. If any one of these addresses + remains valid on an enabled node then this action is unnecessary. + + declare-dead [OPTION]… dbg dead_members + Declare that some hosts in the cluster are permanently dead. + Removes the hosts from the cluster. If the hosts do attempt to + rejoin the cluster in future, this may lead to fencing of other + hosts and/or data loss or data corruption. + + destroy [OPTION]… dbg + Destroys a created cluster + + diagnostics [OPTION]… dbg + Returns diagnostic information about the cluster + + disable [OPTION]… dbg + Stop the cluster on this host; leave the rest of the cluster + enabled. The cluster can be reenabled either by restarting the + host, or by calling the `enable` API call. + + enable [OPTION]… dbg init_config + Rejoins the cluster following a call to `disable`. The parameter + passed is the cluster config to use (optional fields set to None + unless updated) in case it changed while the host was disabled. + (Note that changing optional fields isn't yet supported, TODO) + + join [OPTION]… dbg token new_member tls_config existing_members + Adds a node to an initialised cluster. Takes the IPv4 address of + the new member and a list of the addresses of all the existing + members. + + leave [OPTION]… dbg + Causes this host to permanently leave the cluster, but leaves the + rest of the cluster enabled. This is not a temporary removal - if + the admin wants the hosts to rejoin the cluster again, he will + have to call `join` rather than `enable`. + + set-tls-verification [OPTION]… dbg server_pem_path + trusted_bundle_path cn enabled + Enable or disable TLS verification for xapi/clusterd + communication. The trusted_bundle_path is ignored when + verification is disabled and can be empty + + switch-cluster-stack [OPTION]… dbg cluster_stack + Switch cluster stack version to the target + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + cluster_cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + diff --git a/ocaml/xapi-idl/cluster/cluster_interface.ml b/ocaml/xapi-idl/cluster/cluster_interface.ml index e83e69363f2..d537cf0f99e 100644 --- a/ocaml/xapi-idl/cluster/cluster_interface.ml +++ b/ocaml/xapi-idl/cluster/cluster_interface.ml @@ -159,6 +159,9 @@ type optional_path = string option [@@deriving rpcty] type quorum_info = { is_quorate: bool ; total_votes: int + (* number of nodes that the cluster stack thinks are currently in the cluster *) + ; expected_votes: int + (* number of nodes that the cluster stack is expecting to be in the cluster *) ; quorum: int (** number of nodes required to form a quorum *) ; quorum_members: all_members option } @@ -179,6 +182,7 @@ type diagnostics = { ; is_quorate: bool ; is_running: bool ; total_votes: int + ; expected_votes: int ; quorum: int ; quorum_members: all_members option ; startup_finished: bool @@ -380,80 +384,5 @@ module LocalAPI (R : RPC) = struct (debug_info_p @-> timeout_p @-> returning result_p err) end - module Observer = struct - open TypeCombinators - - let endpoints_p = Param.mk ~name:"endpoints" (list Types.string) - - let bool_p = Param.mk ~name:"bool" Types.bool - - let uuid_p = Param.mk ~name:"uuid" Types.string - - let name_label_p = Param.mk ~name:"name_label" Types.string - - let dict_p = Param.mk ~name:"dict" dict - - let string_p = Param.mk ~name:"string" Types.string - - let int_p = Param.mk ~name:"int" Types.int - - let float_p = Param.mk ~name:"float" Types.float - - let create = - declare "Observer.create" [] - (debug_info_p - @-> uuid_p - @-> name_label_p - @-> dict_p - @-> endpoints_p - @-> bool_p - @-> returning unit_p err - ) - - let destroy = - declare "Observer.destroy" [] - (debug_info_p @-> uuid_p @-> returning unit_p err) - - let set_enabled = - declare "Observer.set_enabled" [] - (debug_info_p @-> uuid_p @-> bool_p @-> returning unit_p err) - - let set_attributes = - declare "Observer.set_attributes" [] - (debug_info_p @-> uuid_p @-> dict_p @-> returning unit_p err) - - let set_endpoints = - declare "Observer.set_endpoints" [] - (debug_info_p @-> uuid_p @-> endpoints_p @-> returning unit_p err) - - let init = declare "Observer.init" [] (debug_info_p @-> returning unit_p err) - - let set_trace_log_dir = - declare "Observer.set_trace_log_dir" [] - (debug_info_p @-> string_p @-> returning unit_p err) - - let set_export_interval = - declare "Observer.set_export_interval" [] - (debug_info_p @-> float_p @-> returning unit_p err) - - let set_host_id = - declare "Observer.set_host_id" [] - (debug_info_p @-> string_p @-> returning unit_p err) - - let set_max_traces = - declare "Observer.set_max_traces" [] - (debug_info_p @-> int_p @-> returning unit_p err) - - let set_max_spans = - declare "Observer.set_max_spans" [] - (debug_info_p @-> int_p @-> returning unit_p err) - - let set_max_file_size = - declare "Observer.set_max_file_size" [] - (debug_info_p @-> int_p @-> returning unit_p err) - - let set_compress_tracing_files = - declare "Observer.set_compress_tracing_files" [] - (debug_info_p @-> bool_p @-> returning unit_p err) - end + module Observer = Observer_helpers.ObserverAPI (R) end diff --git a/ocaml/xapi-idl/cluster/dune b/ocaml/xapi-idl/cluster/dune index 50777aeb2b3..f1ec6e321de 100644 --- a/ocaml/xapi-idl/cluster/dune +++ b/ocaml/xapi-idl/cluster/dune @@ -27,8 +27,6 @@ xapi-idl xapi-idl.cluster)) -(rule - (alias runtest) - (deps (:x cluster_cli.exe)) +(cram (package xapi-idl) - (action (run %{x} --help=plain))) + (deps cluster_cli.exe)) diff --git a/ocaml/xapi-idl/example/cli-help.t b/ocaml/xapi-idl/example/cli-help.t new file mode 100644 index 00000000000..c38ea73040c --- /dev/null +++ b/ocaml/xapi-idl/example/cli-help.t @@ -0,0 +1,90 @@ + $ ./example.exe --help=plain + NAME + Example-service + + SYNOPSIS + Example-service [OPTION]… + + DESCRIPTION + This is an example service which demonstrates the configuration + mechanism. + + OPTIONS + --config=VAL (absent=/etc/example.exe.conf) + Location of configuration file + + --config-dir=VAL (absent=/etc/example.exe.conf.d) + Location of directory containing configuration file fragments + + --default-format=VAL (absent=vhd) + Default format for disk files + + --disable-logging-for=VAL + A space-separated list of debug modules to suppress logging from + + --inventory=VAL (absent=/etc/xensource-inventory) + Location of the inventory file + + --log=VAL (absent=syslog:daemon) + Where to write log messages + + --loglevel=VAL (absent=debug) + Log level + + --ls=VAL (absent=/bin/ls) + program used to list things + + --pidfile=VAL (absent=/var/run/example.exe.pid) + Filename to write process PID + + --queue-name=VAL (absent=org.xen.xapi.ffs) + Comma-separated list of queue names to listen on + + --search-path=VAL + Search path for resources + + --sh=VAL (absent=/bin/sh) + interpreter for arcane programming language + + --socket-path=VAL (absent=/var/xapi/socket) + Path of listening socket + + --sr-mount-path=VAL (absent=/mnt) + Default mountpoint for mounting remote filesystems + + --switch-path=VAL (absent=/var/run/message-switch/sock) + Unix domain socket path on localhost where the message switch is + listening + + --timeslice=VAL (absent=0.05) + timeslice in seconds + + --use-switch=VAL (absent=true) + true if the message switch is to be enabled + + COMMON OPTIONS + These options are common to all services. + + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + Example-service exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + + BUGS + Check bug reports at http://github.com/xapi-project/xen-api + + diff --git a/ocaml/xapi-idl/example/dune b/ocaml/xapi-idl/example/dune index cf27e69dcf3..db360ff8030 100644 --- a/ocaml/xapi-idl/example/dune +++ b/ocaml/xapi-idl/example/dune @@ -9,8 +9,6 @@ ) (preprocess (pps ppx_deriving_rpc))) -(rule - (alias runtest) - (deps (:x example.exe)) +(cram (package xapi-idl) - (action (run %{x} --help=plain))) + (deps example.exe)) diff --git a/ocaml/xapi-idl/gpumon/cli-help.t b/ocaml/xapi-idl/gpumon/cli-help.t new file mode 100644 index 00000000000..afe309b014f --- /dev/null +++ b/ocaml/xapi-idl/gpumon/cli-help.t @@ -0,0 +1,58 @@ + $ ./gpumon_cli.exe --help=plain + NAME + gpumon_cli - A CLI for the GPU monitoring API. This allows scripting + of the gpumon daemon for testing and debugging. This tool is not + intended to be used as an end user tool + + SYNOPSIS + gpumon_cli [COMMAND] … + + COMMANDS + get_pgpu_metadata [OPTION]… debug_info pgpu_address + Gets the metadata for a pGPU, given its address (PCI bus ID). + + get_pgpu_vgpu_compatibility [OPTION]… debug_info + nvidia_pgpu_metadata nvidia_vgpu_metadata_list + Checks compatibility between a pGPU (on a host) and a list of + vGPUs (assigned to a VM). Note: A VM may use several vGPUs. The + use case is VM.suspend/VM.resume: before VM.resume + [nvidia_vgpu_metadata] of the suspended VM is checked against the + [nvidia_pgpu_metadata] on the host where the VM is resumed. + + get_pgpu_vm_compatibility [OPTION]… debug_info pgpu_address domid + nvidia_pgpu_metadata + Checks compatibility between a VM's vGPU(s) and another pGPU. + + get_vgpu_metadata [OPTION]… debug_info domid pgpu_address vgpu_uuid + Obtains metadata for all vGPUs running in a domain. + + nvml_attach [OPTION]… debug_info + Attach nVidia cards to Gpumon for metrics and compatibility + checking. + + nvml_detach [OPTION]… debug_info + Detach nVidia cards from Gpumon + + nvml_is_attached [OPTION]… debug_info + Return true if nVidia cards are currently attached. + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + gpumon_cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + diff --git a/ocaml/xapi-idl/gpumon/dune b/ocaml/xapi-idl/gpumon/dune index de10e06dae6..269a6690eeb 100644 --- a/ocaml/xapi-idl/gpumon/dune +++ b/ocaml/xapi-idl/gpumon/dune @@ -27,8 +27,6 @@ xapi-idl xapi-idl.gpumon)) -(rule - (alias runtest) - (deps (:x gpumon_cli.exe)) +(cram (package xapi-idl) - (action (run %{x} --help=plain))) + (deps gpumon_cli.exe)) diff --git a/ocaml/xapi-idl/gpumon/gpumon_interface.ml b/ocaml/xapi-idl/gpumon/gpumon_interface.ml index 465acf91f6d..ab02b38260c 100644 --- a/ocaml/xapi-idl/gpumon/gpumon_interface.ml +++ b/ocaml/xapi-idl/gpumon/gpumon_interface.ml @@ -87,7 +87,6 @@ let gpu_err = def= gpu_errors ; raiser= (fun e -> - log_backtrace () ; let exn = Gpumon_error e in error "%s (%s)" (Printexc.to_string exn) __LOC__ ; raise exn diff --git a/ocaml/xapi-idl/guard/privileged/cli-help.t b/ocaml/xapi-idl/guard/privileged/cli-help.t new file mode 100644 index 00000000000..0e990ca9490 --- /dev/null +++ b/ocaml/xapi-idl/guard/privileged/cli-help.t @@ -0,0 +1,53 @@ + $ ./xapiguard_cli.exe --help=plain + NAME + xapiguard_cli - A CLI for the deprivileged socket spawning API. This + allows scripting of the varstored and SWTPM deprivileging daemon for + testing and debugging. This tool is not intended to be used as an end + user tool + + SYNOPSIS + xapiguard_cli [COMMAND] … + + COMMANDS + varstore_create [OPTION]… dbg vm_uuid gid path + Create a deprivileged varstore socket that only accepts API calls + for a specific VM. The socket will be writable only to the + specified group. + + varstore_destroy [OPTION]… dbg gid path + Stop listening on varstore sockets for the specified group + + vtpm_create [OPTION]… dbg vm_uuid gid path + Create a deprivileged vtpm socket that only accepts API calls for + a specific VM. The socket will be writable only to the specified + group. + + vtpm_destroy [OPTION]… dbg gid path + Stop listening on vtpm sockets for the specified group + + vtpm_get_contents [OPTION]… dbg vtpm_uuid + Get vTPM contents blob + + vtpm_set_contents [OPTION]… dbg vtpm_uuid string + Set vTPM contents blob + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + xapiguard_cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + diff --git a/ocaml/xapi-idl/guard/privileged/dune b/ocaml/xapi-idl/guard/privileged/dune index cdb888692d1..b5de6b38b8a 100644 --- a/ocaml/xapi-idl/guard/privileged/dune +++ b/ocaml/xapi-idl/guard/privileged/dune @@ -18,7 +18,7 @@ (package varstored-guard) (libraries cmdliner - + rpclib.cmdliner rpclib.core rpclib.markdown @@ -26,8 +26,6 @@ xapi-idl.guard.privileged )) -(rule - (alias runtest) - (deps xapiguard_cli.exe) +(cram (package varstored-guard) - (action (run %{deps} --help=plain))) + (deps xapiguard_cli.exe)) diff --git a/ocaml/xapi-idl/guard/varstored/cli-help.t b/ocaml/xapi-idl/guard/varstored/cli-help.t new file mode 100644 index 00000000000..6f36f4bf5bd --- /dev/null +++ b/ocaml/xapi-idl/guard/varstored/cli-help.t @@ -0,0 +1,49 @@ + $ ./varstored_cli.exe --help=plain + NAME + varstored_cli - debug CLI + + SYNOPSIS + varstored_cli [COMMAND] … + + COMMANDS + VM.get_NVRAM [--socket=SOCKET] [OPTION]… string string + Get the current VM's NVRAM contents + + VM.get_by_uuid [--socket=SOCKET] [OPTION]… string string + Dummy, for wire compatibility with XAPI + + VM.set_NVRAM_EFI_variables [--socket=SOCKET] [OPTION]… string string + string + Set the current VM's NVRAM contents + + message.create [--socket=SOCKET] [OPTION]… string string int64 + string string string + Send an alert when booting a UEFI guest fails + + session.login_with_password [--socket=SOCKET] [OPTION]… string + string string string + Dummy, for wire compatibility with XAPI + + session.logout [--socket=SOCKET] [OPTION]… string + Dummy, for wire compatibility with XAPI + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + varstored_cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + diff --git a/ocaml/xapi-idl/guard/varstored/dune b/ocaml/xapi-idl/guard/varstored/dune index abded2e1c17..6957b6c7a78 100644 --- a/ocaml/xapi-idl/guard/varstored/dune +++ b/ocaml/xapi-idl/guard/varstored/dune @@ -17,7 +17,7 @@ (modules varstored_cli) (libraries cmdliner - + rpclib.cmdliner rpclib.core rpclib.markdown @@ -25,8 +25,6 @@ xapi-idl.guard.varstored )) -(rule - (alias runtest) - (deps varstored_cli.exe) +(cram (package xapi-idl) - (action (run %{deps} --help=plain))) + (deps varstored_cli.exe)) diff --git a/ocaml/xapi-idl/lib/coverage/enabled.ml b/ocaml/xapi-idl/lib/coverage/enabled.ml index 461221db512..11ac510f2d3 100644 --- a/ocaml/xapi-idl/lib/coverage/enabled.ml +++ b/ocaml/xapi-idl/lib/coverage/enabled.ml @@ -103,12 +103,12 @@ module Dispatcher = struct |> (* filter out ourselves *) List.filter (fun q -> self <> q) |> (* best-effort: collect and return all non-failed results, log - errors *) + errors *) List.rev_map (rpc_ignore_err ~t ~body) |> (* multiple return values converted to a single string, suitable - for use in a command like: mv $(message-cli call - org.xen.xapi.coverage.dispatch --timeout 60 --body 'dump - {jobid}') /tmp/coverage/ *) + for use in a command like: mv $(message-cli call + org.xen.xapi.coverage.dispatch --timeout 60 --body 'dump + {jobid}') /tmp/coverage/ *) String.concat " " |> ok ) diff --git a/ocaml/xapi-idl/lib/debug_info.ml b/ocaml/xapi-idl/lib/debug_info.ml index 5483d6bc451..e3845fa080d 100644 --- a/ocaml/xapi-idl/lib/debug_info.ml +++ b/ocaml/xapi-idl/lib/debug_info.ml @@ -76,13 +76,14 @@ let to_log_string t = t.log (* Sets the logging context based on `dbg`. Also adds a new tracing span, linked to the parent span from `dbg`, if available. *) -let with_dbg ?(with_thread = false) ~module_name ~name ~dbg f = +let with_dbg ?attributes ?(with_thread = false) ?(module_name = "") ~name ~dbg f + = let di = of_string dbg in let f_with_trace () = let name = match module_name with "" -> name | _ -> module_name ^ "." ^ name in - Tracing.with_tracing ~parent:di.tracing ~name (fun span -> + Tracing.with_tracing ?attributes ~parent:di.tracing ~name (fun span -> match span with Some _ -> f {di with tracing= span} | None -> f di ) in diff --git a/ocaml/xapi-idl/lib/debug_info.mli b/ocaml/xapi-idl/lib/debug_info.mli index fa2f6ff5d6a..2b0244ac94a 100644 --- a/ocaml/xapi-idl/lib/debug_info.mli +++ b/ocaml/xapi-idl/lib/debug_info.mli @@ -23,8 +23,9 @@ val to_string : t -> string val to_log_string : t -> string val with_dbg : - ?with_thread:bool - -> module_name:string + ?attributes:(string * string) list + -> ?with_thread:bool + -> ?module_name:string -> name:string -> dbg:string -> (t -> 'a) diff --git a/ocaml/xapi-idl/lib/dune b/ocaml/xapi-idl/lib/dune index 8f0d7ca27de..4f29504a97a 100644 --- a/ocaml/xapi-idl/lib/dune +++ b/ocaml/xapi-idl/lib/dune @@ -40,7 +40,7 @@ (wrapped false) (preprocess (per_module - ((pps ppx_deriving_rpc) Xcp_channel Xcp_channel_protocol TypeCombinators) + ((pps ppx_deriving_rpc) Xcp_channel Xcp_channel_protocol TypeCombinators Observer_helpers Observer_skeleton) ((pps ppx_sexp_conv ppx_deriving_rpc) Xcp_pci)))) (library diff --git a/ocaml/xapi-idl/lib/observer_helpers.ml b/ocaml/xapi-idl/lib/observer_helpers.ml new file mode 100644 index 00000000000..c2ea58bb8d3 --- /dev/null +++ b/ocaml/xapi-idl/lib/observer_helpers.ml @@ -0,0 +1,266 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Rpc +open Idl + +module D = Debug.Make (struct let name = "observer_interface" end) + +open D + +let service_name = "observer" + +let queue_name = Xcp_service.common_prefix ^ service_name + +let default_sockets_dir = "/var/lib/xcp" + +let default_path = Filename.concat default_sockets_dir service_name + +let uri () = "file:" ^ default_path + +module Errors = struct + type error = + | Internal_error of string + | Unimplemented of string + | Unknown_error + [@@default Unknown_error] [@@deriving rpcty] +end + +exception Observer_error of Errors.error + +let err = + let open Error in + { + def= Errors.error + ; raiser= + (fun e -> + let exn = Observer_error e in + error "%s (%s)" (Printexc.to_string exn) __LOC__ ; + raise exn + ) + ; matcher= + (function + | Observer_error e as exn -> + error "%s (%s)" (Printexc.to_string exn) __LOC__ ; + Some e + | exn -> + error "%s (%s)" (Printexc.to_string exn) __LOC__ ; + Some (Internal_error (Printexc.to_string exn)) + ) + } + +(** An uninterpreted string associated with the operation. *) +type debug_info = string [@@deriving rpcty] + +module ObserverAPI (R : RPC) = struct + open R + open TypeCombinators + + let description = + let open Interface in + { + name= "Observer" + ; namespace= None + ; description= + [ + "This interface is used to create, update and destroy Observers to \ + control the use of tracing in different xapi components" + ] + ; version= (1, 0, 0) + } + + let implementation = implement description + + let dbg_p = Param.mk ~name:"dbg" Types.string + + let unit_p = Param.mk ~name:"unit" Types.unit + + let endpoints_p = Param.mk ~name:"endpoints" (list Types.string) + + let bool_p = Param.mk ~name:"bool" Types.bool + + let uuid_p = Param.mk ~name:"uuid" Types.string + + let name_label_p = Param.mk ~name:"name_label" Types.string + + let dict_p = Param.mk ~name:"dict" dict + + let string_p = Param.mk ~name:"string" Types.string + + let int_p = Param.mk ~name:"int" Types.int + + let float_p = Param.mk ~name:"float" Types.float + + let create = + declare "Observer.create" [] + (dbg_p + @-> uuid_p + @-> name_label_p + @-> dict_p + @-> endpoints_p + @-> bool_p + @-> returning unit_p err + ) + + let destroy = + declare "Observer.destroy" [] (dbg_p @-> uuid_p @-> returning unit_p err) + + let set_enabled = + declare "Observer.set_enabled" [] + (dbg_p @-> uuid_p @-> bool_p @-> returning unit_p err) + + let set_attributes = + declare "Observer.set_attributes" [] + (dbg_p @-> uuid_p @-> dict_p @-> returning unit_p err) + + let set_endpoints = + declare "Observer.set_endpoints" [] + (dbg_p @-> uuid_p @-> endpoints_p @-> returning unit_p err) + + let init = declare "Observer.init" [] (dbg_p @-> returning unit_p err) + + let set_trace_log_dir = + declare "Observer.set_trace_log_dir" [] + (dbg_p @-> string_p @-> returning unit_p err) + + let set_export_interval = + declare "Observer.set_export_interval" [] + (dbg_p @-> float_p @-> returning unit_p err) + + let set_export_chunk_size = + declare "Observer.set_export_chunk_size" [] + (dbg_p @-> int_p @-> returning unit_p err) + + let set_max_spans = + declare "Observer.set_max_spans" [] + (dbg_p @-> int_p @-> returning unit_p err) + + let set_max_traces = + declare "Observer.set_max_traces" [] + (dbg_p @-> int_p @-> returning unit_p err) + + let set_max_depth = + declare "Observer.set_max_depth" [] + (dbg_p @-> int_p @-> returning unit_p err) + + let set_max_file_size = + declare "Observer.set_max_file_size" [] + (dbg_p @-> int_p @-> returning unit_p err) + + let set_host_id = + declare "Observer.set_host_id" [] + (dbg_p @-> string_p @-> returning unit_p err) + + let set_compress_tracing_files = + declare "Observer.set_compress_tracing_files" [] + (dbg_p @-> bool_p @-> returning unit_p err) +end + +module type Server_impl = sig + type context = unit + + val create : + context + -> dbg:debug_info + -> uuid:string + -> name_label:string + -> attributes:(string * string) list + -> endpoints:string list + -> enabled:bool + -> unit + + val destroy : context -> dbg:debug_info -> uuid:string -> unit + + val set_enabled : + context -> dbg:debug_info -> uuid:string -> enabled:bool -> unit + + val set_attributes : + context + -> dbg:debug_info + -> uuid:string + -> attributes:(string * string) list + -> unit + + val set_endpoints : + context -> dbg:debug_info -> uuid:string -> endpoints:string list -> unit + + val init : context -> dbg:debug_info -> unit + + val set_trace_log_dir : context -> dbg:debug_info -> dir:string -> unit + + val set_export_interval : context -> dbg:debug_info -> interval:float -> unit + + val set_export_chunk_size : context -> dbg:debug_info -> size:int -> unit + + val set_max_spans : context -> dbg:debug_info -> spans:int -> unit + + val set_max_traces : context -> dbg:debug_info -> traces:int -> unit + + val set_max_depth : context -> dbg:debug_info -> depth:int -> unit + + val set_max_file_size : context -> dbg:debug_info -> file_size:int -> unit + + val set_host_id : context -> dbg:debug_info -> host_id:string -> unit + + val set_compress_tracing_files : + context -> dbg:debug_info -> enabled:bool -> unit +end + +module Server (Impl : Server_impl) () = struct + module S = ObserverAPI (Idl.Exn.GenServer ()) + + let _ = + S.create (fun dbg uuid name_label attributes endpoints enabled -> + Impl.create () ~dbg ~uuid ~name_label ~attributes ~endpoints ~enabled + ) ; + S.destroy (fun dbg uuid -> Impl.destroy () ~dbg ~uuid) ; + S.set_enabled (fun dbg uuid enabled -> + Impl.set_enabled () ~dbg ~uuid ~enabled + ) ; + S.set_attributes (fun dbg uuid attributes -> + Impl.set_attributes () ~dbg ~uuid ~attributes + ) ; + S.set_endpoints (fun dbg uuid endpoints -> + Impl.set_endpoints () ~dbg ~uuid ~endpoints + ) ; + S.init (fun dbg -> Impl.init () ~dbg) ; + S.set_trace_log_dir (fun dbg dir -> Impl.set_trace_log_dir () ~dbg ~dir) ; + S.set_export_interval (fun dbg interval -> + Impl.set_export_interval () ~dbg ~interval + ) ; + S.set_export_chunk_size (fun dbg size -> + Impl.set_export_chunk_size () ~dbg ~size + ) ; + S.set_max_spans (fun dbg spans -> Impl.set_max_spans () ~dbg ~spans) ; + S.set_max_traces (fun dbg traces -> Impl.set_max_traces () ~dbg ~traces) ; + S.set_max_depth (fun dbg depth -> Impl.set_max_depth () ~dbg ~depth) ; + S.set_max_file_size (fun dbg file_size -> + Impl.set_max_file_size () ~dbg ~file_size + ) ; + S.set_host_id (fun dbg host_id -> Impl.set_host_id () ~dbg ~host_id) ; + S.set_compress_tracing_files (fun dbg enabled -> + Impl.set_compress_tracing_files () ~dbg ~enabled + ) + + (* Bind all *) + let process call = Idl.Exn.server S.implementation call +end + +module Client = ObserverAPI (Idl.Exn.GenClient (struct + let rpc call = + Xcp_client.( + retry_and_switch_rpc call ~use_switch:!use_switch ~queue_name + ~dststr:queue_name ~uri + ) +end)) diff --git a/ocaml/xapi-idl/lib/observer_helpers.mli b/ocaml/xapi-idl/lib/observer_helpers.mli new file mode 100644 index 00000000000..489310a0847 --- /dev/null +++ b/ocaml/xapi-idl/lib/observer_helpers.mli @@ -0,0 +1,248 @@ +val queue_name : string + +val default_path : string + +module Errors : sig + type error = + | Internal_error of string + | Unimplemented of string + | Unknown_error + + val typ_of_error : error Rpc.Types.typ + + val error : error Rpc.Types.def +end + +exception Observer_error of Errors.error + +type debug_info = string + +(** ObserverAPI contains the declarations for the RPCs which are sent to + Observer modules when the corresponding function is called on the Observer + see ocaml/libs/tracing/ and ocaml/xapi/xapi_observer.ml *) +module ObserverAPI : functor (R : Idl.RPC) -> sig + val description : Idl.Interface.description + + val implementation : R.implementation + + val create : + ( debug_info + -> string + -> string + -> (string * string) list + -> string list + -> bool + -> (unit, Errors.error) R.comp + ) + R.res + (** [create dbg uuid name attributes endpoints enabled] notifies the + forwarder that an Observer with [uuid] has been created. The subsequent + parameters are the fields the Observer was created with. *) + + val destroy : (debug_info -> string -> (unit, Errors.error) R.comp) R.res + (** [destroy dbg uuid] notifies the forwarder that an Observer with [uuid] + has been destroyed. *) + + val set_enabled : + (debug_info -> string -> bool -> (unit, Errors.error) R.comp) R.res + (** [set_enabled dbg uuid enabled] notifies the fowarder that the Observer + with [uuid] has had its enabled field set to [enabled]. *) + + val set_attributes : + ( debug_info + -> string + -> (string * string) list + -> (unit, Errors.error) R.comp + ) + R.res + (** [set_attributes dbg uuid attributes] notifies the fowarder that the + Observer with [uuid] has had its attributes field set to [attributes]. *) + + val set_endpoints : + (debug_info -> string -> string list -> (unit, Errors.error) R.comp) R.res + (** [set_endpoints dbg uuid endpoints] notifies the fowarder that the Observer + with [uuid] has had its endpoints field set to [endpoints]. *) + + val init : (debug_info -> (unit, Errors.error) R.comp) R.res + (** [init dbg] notifies the forwarder that it should perform any tracing + initialisation. *) + + val set_trace_log_dir : + (debug_info -> string -> (unit, Errors.error) R.comp) R.res + (** [set_trace_log_dir dbg dir] notifies the fowarder that the trace_log_dir + has been set to [dir]. *) + + val set_export_interval : + (debug_info -> float -> (unit, Errors.error) R.comp) R.res + (** [set_export_interval dbg interval] notifies the fowarder that the interval + between trace exports has been set to [interval]. *) + + val set_export_chunk_size : + (debug_info -> int -> (unit, Errors.error) R.comp) R.res + (** [set_export_chunk_size dbg size] notifies the fowarder that the max size + of each chunk of finished spans exported has been set to [size]. *) + + val set_max_spans : (debug_info -> int -> (unit, Errors.error) R.comp) R.res + (** [set_max_spans dbg spans] notifies the fowarder that the max number of + spans has been set to [spans]. *) + + val set_max_traces : (debug_info -> int -> (unit, Errors.error) R.comp) R.res + (** [set_max_traces dbg traces] notifies the fowarder that the max number of + traces has been set to [traces]. *) + + val set_max_depth : (debug_info -> int -> (unit, Errors.error) R.comp) R.res + (** [set_max_depth dbg depth] notifies the fowarder that the max depth of + a span in a trace has been set to [depth]. *) + + val set_max_file_size : + (debug_info -> int -> (unit, Errors.error) R.comp) R.res + (** [set_max_file_size dbg file_size] notifies the fowarder that the max file + size has been set to [file_size]. *) + + val set_host_id : (debug_info -> string -> (unit, Errors.error) R.comp) R.res + (** [set_host_id dbg host_id] notifies the fowarder that the host to be traced + has been set to [host_id]. *) + + val set_compress_tracing_files : + (debug_info -> bool -> (unit, Errors.error) R.comp) R.res + (** [set_compress_tracing_files dbg enabled] notifies the fowarder that the + compression of tracing files has been set to [enabled]. *) +end + +(** A Server_impl module will define how the Server responds to ObserverAPI calls *) +module type Server_impl = sig + type context = unit + + val create : + context + -> dbg:debug_info + -> uuid:string + -> name_label:string + -> attributes:(string * string) list + -> endpoints:string list + -> enabled:bool + -> unit + + val destroy : context -> dbg:debug_info -> uuid:string -> unit + + val set_enabled : + context -> dbg:debug_info -> uuid:string -> enabled:bool -> unit + + val set_attributes : + context + -> dbg:debug_info + -> uuid:string + -> attributes:(string * string) list + -> unit + + val set_endpoints : + context -> dbg:debug_info -> uuid:string -> endpoints:string list -> unit + + val init : context -> dbg:debug_info -> unit + + val set_trace_log_dir : context -> dbg:debug_info -> dir:string -> unit + + val set_export_interval : context -> dbg:debug_info -> interval:float -> unit + + val set_export_chunk_size : context -> dbg:debug_info -> size:int -> unit + + val set_max_spans : context -> dbg:debug_info -> spans:int -> unit + + val set_max_traces : context -> dbg:debug_info -> traces:int -> unit + + val set_max_depth : context -> dbg:debug_info -> depth:int -> unit + + val set_max_file_size : context -> dbg:debug_info -> file_size:int -> unit + + val set_host_id : context -> dbg:debug_info -> host_id:string -> unit + + val set_compress_tracing_files : + context -> dbg:debug_info -> enabled:bool -> unit +end + +(** A Server for receiving ObserverAPI calls *) +module Server : functor (_ : Server_impl) () -> sig + module S : sig + val create : + ( debug_info + -> string + -> string + -> (string * string) list + -> string list + -> bool + -> unit + ) + -> unit + + val destroy : (debug_info -> string -> unit) -> unit + + val set_enabled : (debug_info -> string -> bool -> unit) -> unit + + val set_attributes : + (debug_info -> string -> (string * string) list -> unit) -> unit + + val set_endpoints : (debug_info -> string -> string list -> unit) -> unit + + val init : (debug_info -> unit) -> unit + + val set_trace_log_dir : (debug_info -> string -> unit) -> unit + + val set_export_interval : (debug_info -> float -> unit) -> unit + + val set_export_chunk_size : (debug_info -> int -> unit) -> unit + + val set_max_spans : (debug_info -> int -> unit) -> unit + + val set_max_traces : (debug_info -> int -> unit) -> unit + + val set_max_depth : (debug_info -> int -> unit) -> unit + + val set_max_file_size : (debug_info -> int -> unit) -> unit + + val set_host_id : (debug_info -> string -> unit) -> unit + + val set_compress_tracing_files : (debug_info -> bool -> unit) -> unit + end + + val process : Rpc.call -> Rpc.response +end + +(** A client for sending ObserverAPI calls to the above queue_name *) +module Client : sig + val create : + debug_info + -> string + -> string + -> (string * string) list + -> string list + -> bool + -> unit + + val destroy : debug_info -> string -> unit + + val set_enabled : debug_info -> string -> bool -> unit + + val set_attributes : debug_info -> string -> (string * string) list -> unit + + val set_endpoints : debug_info -> string -> string list -> unit + + val init : debug_info -> unit + + val set_trace_log_dir : debug_info -> string -> unit + + val set_export_interval : debug_info -> float -> unit + + val set_export_chunk_size : debug_info -> int -> unit + + val set_max_spans : debug_info -> int -> unit + + val set_max_traces : debug_info -> int -> unit + + val set_max_depth : debug_info -> int -> unit + + val set_max_file_size : debug_info -> int -> unit + + val set_host_id : debug_info -> string -> unit + + val set_compress_tracing_files : debug_info -> bool -> unit +end diff --git a/ocaml/xapi-idl/lib/observer_skeleton.ml b/ocaml/xapi-idl/lib/observer_skeleton.ml new file mode 100644 index 00000000000..59df66d246e --- /dev/null +++ b/ocaml/xapi-idl/lib/observer_skeleton.ml @@ -0,0 +1,52 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) +[@@@ocaml.warning "-27"] + +let unimplemented x = + raise Observer_helpers.(Observer_error (Errors.Unimplemented x)) + +module Observer = struct + type context = unit + + let create ctx ~dbg ~uuid ~name_label ~attributes ~endpoints ~enabled = + unimplemented __FUNCTION__ + + let destroy ctx ~dbg ~uuid = unimplemented __FUNCTION__ + + let set_enabled ctx ~dbg ~uuid ~enabled = unimplemented __FUNCTION__ + + let set_attributes ctx ~dbg ~uuid ~attributes = unimplemented __FUNCTION__ + + let set_endpoints ctx ~dbg ~uuid ~endpoints = unimplemented __FUNCTION__ + + let init ctx ~dbg = unimplemented __FUNCTION__ + + let set_trace_log_dir ctx ~dbg ~dir = unimplemented __FUNCTION__ + + let set_export_interval ctx ~dbg ~interval = unimplemented __FUNCTION__ + + let set_export_chunk_size ctx ~dbg ~size = unimplemented __FUNCTION__ + + let set_max_spans ctx ~dbg ~spans = unimplemented __FUNCTION__ + + let set_max_traces ctx ~dbg ~traces = unimplemented __FUNCTION__ + + let set_max_depth ctx ~dbg ~depth = unimplemented __FUNCTION__ + + let set_max_file_size ctx ~dbg ~file_size = unimplemented __FUNCTION__ + + let set_host_id ctx ~dbg ~host_id = unimplemented __FUNCTION__ + + let set_compress_tracing_files ctx ~dbg ~enabled = unimplemented __FUNCTION__ +end diff --git a/ocaml/xapi-idl/lib/observer_skeleton.mli b/ocaml/xapi-idl/lib/observer_skeleton.mli new file mode 100644 index 00000000000..2b914ada71d --- /dev/null +++ b/ocaml/xapi-idl/lib/observer_skeleton.mli @@ -0,0 +1,50 @@ +(** This module provides dummy implementations for each Observer function. + These are intended to be used to fill in the functions that the module will + not ever use, as they will raise an Unimplemented error if called *) +module Observer : sig + type context = unit + + val create : + context + -> dbg:string + -> uuid:string + -> name_label:string + -> attributes:(string * string) list + -> endpoints:string list + -> enabled:bool + -> unit + + val destroy : context -> dbg:string -> uuid:string -> unit + + val set_enabled : context -> dbg:string -> uuid:string -> enabled:bool -> unit + + val set_attributes : + context + -> dbg:string + -> uuid:string + -> attributes:(string * string) list + -> unit + + val set_endpoints : + context -> dbg:string -> uuid:string -> endpoints:string list -> unit + + val init : context -> dbg:string -> unit + + val set_trace_log_dir : context -> dbg:string -> dir:string -> unit + + val set_export_interval : context -> dbg:string -> interval:float -> unit + + val set_export_chunk_size : context -> dbg:string -> size:int -> unit + + val set_max_spans : context -> dbg:string -> spans:int -> unit + + val set_max_traces : context -> dbg:string -> traces:int -> unit + + val set_max_depth : context -> dbg:string -> depth:int -> unit + + val set_max_file_size : context -> dbg:string -> file_size:int -> unit + + val set_host_id : context -> dbg:string -> host_id:string -> unit + + val set_compress_tracing_files : context -> dbg:string -> enabled:bool -> unit +end diff --git a/ocaml/xapi-idl/lib/updates.ml b/ocaml/xapi-idl/lib/updates.ml index 93904f2b65b..f6420da6834 100644 --- a/ocaml/xapi-idl/lib/updates.ml +++ b/ocaml/xapi-idl/lib/updates.ml @@ -66,6 +66,7 @@ functor ) let inject_barrier id filterfn t = + let filterfn key _ = filterfn key in ( { map= t.map ; barriers= diff --git a/ocaml/xapi-idl/lib/updates.mli b/ocaml/xapi-idl/lib/updates.mli index a054c5581d8..9b678a28839 100644 --- a/ocaml/xapi-idl/lib/updates.mli +++ b/ocaml/xapi-idl/lib/updates.mli @@ -64,7 +64,7 @@ module Updates : functor (Interface : INTERFACE) -> sig (* [inject_barrier n p t] Inject a barrier identified by [n] into [t]. The barrier will contain a snapshot of all current updates that match the predicate [p]. *) - val inject_barrier : int -> (Interface.Dynamic.id -> int -> bool) -> t -> unit + val inject_barrier : int -> (Interface.Dynamic.id -> bool) -> t -> unit (* Removes a barrier *) val remove_barrier : int -> t -> unit diff --git a/ocaml/xapi-idl/lib/xcp_client.ml b/ocaml/xapi-idl/lib/xcp_client.ml index 3ea0006b59c..a7ebd1f996a 100644 --- a/ocaml/xapi-idl/lib/xcp_client.ml +++ b/ocaml/xapi-idl/lib/xcp_client.ml @@ -38,10 +38,35 @@ let switch_rpc ?timeout queue_name string_of_call response_of_string = get_ok (Message_switch_unix.Protocol_unix.Client.connect ~switch:!switch_path ()) in - fun call -> + fun (call : Rpc.call) -> + let _span_parent = + call.params + |> List.find_map (function Rpc.Dict kv_list -> Some kv_list | _ -> None) + |> Fun.flip Option.bind + (List.find_map (function + | "debug_info", Rpc.String debug_info -> + let di = debug_info |> Debug_info.of_string in + di.tracing + | _ -> + None + ) + ) + in + let rpc_service = "message_switch" in + Tracing.with_tracing + ~attributes: + [ + ("rpc.system", "ocaml-rpc") + ; ("rpc.service", rpc_service) + ; ("server.address", queue_name) + ; ("rpc.method", call.name) + ] + ~parent:_span_parent + ~name:(rpc_service ^ "/" ^ call.name) + @@ fun _span_parent -> response_of_string (get_ok - (Message_switch_unix.Protocol_unix.Client.rpc ~t ?timeout + (Message_switch_unix.Protocol_unix.Client.rpc ?_span_parent ~t ?timeout ~queue:queue_name ~body:(string_of_call call) () ) ) @@ -165,3 +190,21 @@ let binary_rpc string_of_call response_of_string ?(srcstr = "unset") let json_binary_rpc = binary_rpc Jsonrpc.string_of_call Jsonrpc.response_of_string + +let rec retry_econnrefused f = + try f () with + | Unix.Unix_error (Unix.ECONNREFUSED, "connect", _) -> + (* debug "Caught ECONNREFUSED; retrying in 5s"; *) + Thread.delay 5. ; retry_econnrefused f + | e -> + (* error "Caught %s: does the service need restarting?" + (Printexc.to_string e); *) + raise e + +let retry_and_switch_rpc call ~use_switch ~queue_name ~dststr ~uri = + retry_econnrefused (fun () -> + if use_switch then + json_switch_rpc queue_name call + else + xml_http_rpc ~srcstr:(get_user_agent ()) ~dststr uri call + ) diff --git a/ocaml/xapi-idl/lib/xcp_service.ml b/ocaml/xapi-idl/lib/xcp_service.ml index 817825c44fe..8250842689b 100644 --- a/ocaml/xapi-idl/lib/xcp_service.ml +++ b/ocaml/xapi-idl/lib/xcp_service.ml @@ -168,7 +168,7 @@ let setify = This needs to be as small as possible to reduce latency. Too small values reduce performance due to context switching overheads - + 4ms = 1/HZ in Dom0 seems like a good default, a better value will be written by a boot time service. *) @@ -357,7 +357,7 @@ let command_of ?(name = Sys.argv.(0)) ?(version = "unknown") ; `S _common_options ; `P "These options are common to all services." ; `S "BUGS" - ; `P "Check bug reports at http://github.com/xapi-project/xcp-idl" + ; `P "Check bug reports at http://github.com/xapi-project/xen-api" ] in Cmd.v diff --git a/ocaml/xapi-idl/lib_test/updates_test.ml b/ocaml/xapi-idl/lib_test/updates_test.ml index 66c5f09450e..790e72854c1 100644 --- a/ocaml/xapi-idl/lib_test/updates_test.ml +++ b/ocaml/xapi-idl/lib_test/updates_test.ml @@ -84,7 +84,7 @@ let test_inject_barrier () = let u = M.empty scheduler in M.add update_a u ; M.add update_b u ; - M.inject_barrier 1 (fun _ _ -> true) u ; + M.inject_barrier 1 (fun _ -> true) u ; M.add update_a u ; M.add update_c u ; let barriers, updates, _id = M.get "dbg" None (Some 1) u in @@ -107,7 +107,7 @@ let test_remove_barrier () = let u = M.empty scheduler in M.add update_a u ; M.add update_b u ; - M.inject_barrier 1 (fun _ _ -> true) u ; + M.inject_barrier 1 (fun _ -> true) u ; M.add update_a u ; M.add update_c u ; M.remove_barrier 1 u ; @@ -125,7 +125,7 @@ let test_inject_barrier_rpc () = let u = M.empty scheduler in M.add update_a u ; M.add update_b u ; - M.inject_barrier 1 (fun _ _ -> true) u ; + M.inject_barrier 1 (fun _ -> true) u ; M.add update_a u ; M.add update_c u ; let barriers, updates, _id = M.get "dbg" None (Some 1) u in @@ -175,7 +175,7 @@ let test_filter () = let test_dump () = let u = M.empty scheduler in M.add update_a u ; - M.inject_barrier 1 (fun _ _ -> true) u ; + M.inject_barrier 1 (fun _ -> true) u ; let dump = M.Dump.make u in let dumped_rpc = M.Dump.rpc_of_dump dump in let expected_rpc = diff --git a/ocaml/xapi-idl/memory/cli-help.t b/ocaml/xapi-idl/memory/cli-help.t new file mode 100644 index 00000000000..ff85cda4f0d --- /dev/null +++ b/ocaml/xapi-idl/memory/cli-help.t @@ -0,0 +1,80 @@ + $ ./memory_cli.exe --help=plain + NAME + memory_cli - A CLI for the memory API. This allows scripting of the + squeeze daemon for testing and debugging. This tool is not intended to + be used as an end user tool + + SYNOPSIS + memory_cli [COMMAND] … + + COMMANDS + balance_memory [OPTION]… string + Forces a rebalance of the hosts memory. Blocks until the system is + in a stable state. + + delete_reservation [OPTION]… string string reservation_id + Deletes a reservation. Note that memory rebalancing is not done + synchronously after the operation has completed. + + get_diagnostics [OPTION]… string + Gets diagnostic information from the server + + get_domain_zero_policy [OPTION]… string + Gets the ballooning policy for domain zero. + + get_host_initial_free_memory [OPTION]… string + Gets the amount of initial free memory in a host + + get_host_reserved_memory [OPTION]… string + Gets the amount of reserved memory in a host. This is the lower + limit of memory that squeezed will ensure remains unused by any + domain or reservation. + + login [OPTION]… string string + Logs into the squeeze daemon. Any reservations previously made + with the specified service name not yet associated with a domain + will be removed. + + query_reservation_of_domain [OPTION]… string string int + Queries the reservation_id associated with a domain + + reserve_memory [OPTION]… string string int64 + [reserve_memory dbg session size] reserves memory for a domain. If + necessary, other domains will be ballooned down to ensure [size] + is available. The call returns a reservation_id that can later be + transferred to a domain. + + reserve_memory_range [OPTION]… string string int64 int64 + [reserve_memory_range dbg session min max] reserves memory for a + domain. If necessary, other domains will be ballooned down to + ensure enough memory is available. The amount of memory will be + between [min] and [max] according to the policy in operation. The + call returns a reservation_id and the actual memory amount that + can later be transferred to a domain. + + transfer_reservation_to_domain [OPTION]… string string + reservation_id int + Transfers a reservation to a domain. This is called when the + domain has been created for the VM for which the reservation was + initially made. + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + memory_cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + diff --git a/ocaml/xapi-idl/memory/dune b/ocaml/xapi-idl/memory/dune index f0f70e0a69e..7df6724a299 100644 --- a/ocaml/xapi-idl/memory/dune +++ b/ocaml/xapi-idl/memory/dune @@ -25,8 +25,6 @@ xapi-idl.memory )) -(rule - (alias runtest) - (deps (:x memory_cli.exe)) +(cram (package xapi-idl) - (action (run %{x} --help=plain))) + (deps memory_cli.exe)) diff --git a/ocaml/xapi-idl/memory/memory_interface.ml b/ocaml/xapi-idl/memory/memory_interface.ml index 7f31f13eeb0..f8e39496ef5 100644 --- a/ocaml/xapi-idl/memory/memory_interface.ml +++ b/ocaml/xapi-idl/memory/memory_interface.ml @@ -87,7 +87,6 @@ let err = def= errors ; raiser= (fun e -> - log_backtrace () ; let exn = MemoryError e in error "%s (%s)" (Printexc.to_string exn) __LOC__ ; raise exn diff --git a/ocaml/xapi-idl/network/cli-help.t b/ocaml/xapi-idl/network/cli-help.t new file mode 100644 index 00000000000..b8878a9040d --- /dev/null +++ b/ocaml/xapi-idl/network/cli-help.t @@ -0,0 +1,156 @@ + $ ./network_cli.exe --help=plain + NAME + network_cli - A CLI for the network API. This allows scripting of the + xcp-networkd daemon for testing and debugging. This tool is not + intended to be used as an end user tool + + SYNOPSIS + network_cli [COMMAND] … + + COMMANDS + Network.Bridge.add_port [OPTION]… string bridge name interfaces + Add port + + Network.Bridge.create [OPTION]… string name + Create bridge + + Network.Bridge.destroy [OPTION]… string force name + Destroy bridge + + Network.Bridge.get_all [OPTION]… string + Get all bridges + + Network.Bridge.get_all_bonds [OPTION]… string from_cache + get all bonds + + Network.Bridge.get_all_ports [OPTION]… string from_cache + Get all ports + + Network.Bridge.get_interfaces [OPTION]… string name + Get interfaces + + Network.Bridge.get_kind [OPTION]… string + Get backend kind + + Network.Bridge.get_physical_interfaces [OPTION]… string name + Get physical interfaces + + Network.Bridge.make_config [OPTION]… string conservative config + Make bridge configuration + + Network.Bridge.remove_port [OPTION]… string bridge name + Remove port + + Network.Bridge.set_persistent [OPTION]… string name value + Make bridge to persistent or not + + Network.Interface.bring_down [OPTION]… string name + Bring PIF down + + Network.Interface.exists [OPTION]… string name + Check interface existence + + Network.Interface.get_all [OPTION]… string + Get list of all interface names + + Network.Interface.get_capabilities [OPTION]… string name + Get capabilities on the interface + + Network.Interface.get_dns [OPTION]… string name + Get DNS + + Network.Interface.get_ipv4_addr [OPTION]… string name + Get list of IPv4 addresses of the interface + + Network.Interface.get_ipv4_gateway [OPTION]… string name + Get IPv4 gateway + + Network.Interface.get_ipv6_addr [OPTION]… string name + Get IPv6 address + + Network.Interface.get_ipv6_gateway [OPTION]… string name + Get IPv6 gateway + + Network.Interface.get_mac [OPTION]… string name + Get Mac address of the interface + + Network.Interface.get_mtu [OPTION]… string name + Get MTU + + Network.Interface.get_pci_bus_path [OPTION]… string name + Get PCI bus path of the interface + + Network.Interface.has_vlan [OPTION]… string name vlan + Check whether interface has vlan + + Network.Interface.is_connected [OPTION]… string name + Check whether interface is connected + + Network.Interface.is_physical [OPTION]… string name + Check whether interface is physical + + Network.Interface.is_up [OPTION]… string name + Check whether the interface is up + + Network.Interface.make_config [OPTION]… string conservative config + Make interface configuration + + Network.Interface.set_ipv4_conf [OPTION]… string name ipv4 + Set IPv4 configuration + + Network.Interface.set_persistent [OPTION]… string name value + Make PIF to persistent or not + + Network.PVS_proxy.configure_site [OPTION]… string t + Configure site + + Network.PVS_proxy.remove_site [OPTION]… string string + Remove site + + Network.Sriov.disable [OPTION]… string name + Disable SR-IOV + + Network.Sriov.enable [OPTION]… string name + Enable SR-IOV + + Network.Sriov.make_vf_config [OPTION]… string address sriov_pci_t + Make SR-IOV vf config + + Network.clear_state [OPTION]… + Clear configuration state then lock the writing of the state to + disk + + Network.reset_state [OPTION]… + Reset configuration state + + Network.set_dns_interface [OPTION]… string name + Set dns interface + + Network.set_gateway_interface [OPTION]… string name + Set gateway interface + + Network.sync_state [OPTION]… + Allow for the config state to be written to disk then perform a + write + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + network_cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + + diff --git a/ocaml/xapi-idl/network/dune b/ocaml/xapi-idl/network/dune index a9a4869945d..d1016ae8821 100644 --- a/ocaml/xapi-idl/network/dune +++ b/ocaml/xapi-idl/network/dune @@ -21,7 +21,7 @@ (modules network_cli) (libraries cmdliner - + rpclib.cmdliner rpclib.core rpclib.markdown @@ -29,8 +29,6 @@ xapi-idl.network )) -(rule - (alias runtest) - (deps (:x network_cli.exe)) +(cram (package xapi-idl) - (action (run %{x} --help=plain))) + (deps network_cli.exe)) diff --git a/ocaml/xapi-idl/network/network_interface.ml b/ocaml/xapi-idl/network/network_interface.ml index 6b27e31f5bf..06d38ff1a87 100644 --- a/ocaml/xapi-idl/network/network_interface.ml +++ b/ocaml/xapi-idl/network/network_interface.ml @@ -158,7 +158,10 @@ type interface_config_t = { ; ipv6_conf: ipv6 [@default None6] ; ipv6_gateway: Unix.inet_addr option [@default None] ; ipv4_routes: ipv4_route_t list [@default []] - ; dns: Unix.inet_addr list * string list [@default [], []] + ; dns: (Unix.inet_addr list * string list) option [@default None] + (** the list + of nameservers and domains to persist in /etc/resolv.conf. Must be None when + using a DHCP mode *) ; mtu: int [@default 1500] ; ethtool_settings: (string * string) list [@default []] ; ethtool_offload: (string * string) list [@default [("lro", "off")]] @@ -200,7 +203,7 @@ let default_interface = ; ipv6_conf= None6 ; ipv6_gateway= None ; ipv4_routes= [] - ; dns= ([], []) + ; dns= None ; mtu= 1500 ; ethtool_settings= [] ; ethtool_offload= [("lro", "off")] @@ -294,7 +297,6 @@ let err = def= errors ; raiser= (fun e -> - log_backtrace () ; let exn = Network_error e in error "%s (%s)" (Printexc.to_string exn) __LOC__ ; raise exn diff --git a/ocaml/xapi-idl/rrd/cli-help.t b/ocaml/xapi-idl/rrd/cli-help.t new file mode 100644 index 00000000000..1a15779d7f7 --- /dev/null +++ b/ocaml/xapi-idl/rrd/cli-help.t @@ -0,0 +1,188 @@ + $ ./rrd_cli.exe --help=plain + NAME + rrd-cli - A CLI for the Db monitoring API. This allows scripting of + the Rrd daemon for testing and debugging. This tool is not intended to + be used as an end user tool + + SYNOPSIS + rrd-cli [COMMAND] … + + COMMANDS + Deprecated.load_rrd [OPTION]… uuid timescale + Deprecated call. + + HA.disable [OPTION]… + Disables the HA metrics. + + HA.enable_and_update [OPTION]… statefile_latencies heartbeat_latency + xapi_latency + Enables the gathering of HA metrics, a built-in function of + xcp-rrdd. + + Plugin.Local.deregister [OPTION]… uid + Deregisters a plugin by uid + + Plugin.Local.next_reading [OPTION]… uid + Returns the number of seconds until the next reading will be + taken. + + Plugin.Local.register [OPTION]… uid info protocol + [Plugin.Local.register uid info protocol] registers a plugin as a + source of a set of data-sources. [uid] is a unique identifier for + the plugin, often the name of the plugin. [info] is the RRD + frequency, and [protocol] specifies whether the plugin will be + using V1 or V2 of the protocol. + + Plugin.deregister [OPTION]… uid + Preserved for backwards compatibility. Deregesters a local plugin. + + Plugin.get_header [OPTION]… + Returns header string. This string should be copied exactly to the + start of the shared memory containing the data source + + Plugin.get_path [OPTION]… uid + Returns path in the local filesystem to place the data source file + + Plugin.next_reading [OPTION]… uid + Returns the time until the next reading. + + Plugin.register [OPTION]… uid frequency + Preserved for backwards compatibility. Equivalent to a Local + plugin registration with V1 protocol. + + add_host_ds [OPTION]… ds_name + Adds a host data source to the host RRD. This causes the data + source to be recorded if it wasn't a default data source. + + add_sr_ds [OPTION]… sr_uuid ds_name + Adds an SR data source to the SR RRD. This causes the data source + to be recorded if it wasn't a default data source. + + add_vm_ds [OPTION]… vm_uuid domid ds_name + Adds a VM data source to the VM RRD. This causes the data source + to be recorded if it wasn't a default data source. + + archive_rrd [OPTION]… vm_uuid + Sends the VM RRD either to local disk or the remote address if + specified, and removes it from memory. Called on VM + shutdown/suspend. + + archive_sr_rrd [OPTION]… sr_uuid + Saves the SR RRD to the local disk. Returns the path to the saved + RRD so it can be copied onto the SR before it is detached. + + backup_rrds [OPTION]… + Backs up RRD data to disk. This should be done periodically to + ensure that if the host crashes we don't lose too much data. + + forget_host_ds [OPTION]… ds_name + Forgets the recorded archives for the named data source. Note that + if the data source is marked as default, new data coming in will + cause the archive to be recreated. + + forget_sr_ds [OPTION]… sr_uuid ds_name + Forgets the recorded archives for the named SR data source. Note + that if the data source is marked as default, new data coming in + will cause the archive to be recreated. + + forget_vm_ds [OPTION]… vm_uuid ds_name + Forgets the recorded archives for the named VM data source. Note + that if the data source is marked as default, new data coming in + will cause the archive to be recreated. + + has_vm_rrd [OPTION]… vm_uuid + Returns `true` if xcp-rrdd has an RRD for the specified VM in + memory + + migrate_rrd [OPTION]… remote_address vm_uuid host_uuid + Migrate_push - used by the migrate code to push an RRD directly to + a remote host without going via the master. If the host is on a + different pool, you must pass both the remote_address and + session_id parameters. + + push_rrd_local [OPTION]… vm_uuid domid + Loads a VM RRD from local storage, associates it with the + specified domid, and starts recording all data sources related to + the VM to that RRD + + push_rrd_remote [OPTION]… vm_uuid remote_address + Loads a VM RRD from local storage and pushes it to a remote host + + push_sr_rrd [OPTION]… sr_uuid path + Loads the RRD from the path specified on the local disk. + Overwrites any RRD already in memory for the SR. Data sources will + subsequently be recorded to this RRD. + + query_host_ds [OPTION]… ds_name + Returns the current value of the named host data source. Note this + returns the raw data source value, not the smoothed last value of + the RRA. + + query_possible_host_dss [OPTION]… + Returns list of possible host DSs. This will include data sources + not currently being recorded into archives. + + query_possible_sr_dss [OPTION]… sr_uuid + Returns list of possible SR DSs. This will include data sources + not currently being recorded into archives. + + query_possible_vm_dss [OPTION]… vm_uuid + Returns list of possible VM DSs. This will include data sources + not currently being recorded into archives. + + query_sr_ds [OPTION]… sr_uuid ds_name + Returns the current value of the named VM data source. Note this + returns the raw data source value, not the smoothed last value of + the RRA. + + query_vm_ds [OPTION]… vm_uuid ds_name + Returns the current value of the named VM data source. Note this + returns the raw data source value, not the smoothed last value of + the RRA. + + remove_rrd [OPTION]… uuid + Removes a VM RRD from the local filesystem, if it exists. + + save_rrds [OPTION]… + Backs up RRD data to disk on localhost. This should be done + periodically to ensure that if the host crashes we don't lose too + much data. + + send_host_rrd_to_master [OPTION]… master_address + Called on host shutdown/reboot to send the Host RRD to the master + for backup. + + set_cache_sr [OPTION]… sr_uuid + Sets the uuid of the cache SR. If this is set, statistics about + the usage of the cache will be recorded into the host SR. + + unset_cache_sr [OPTION]… + Unsets the cache_sr. No futher data will be gathered about cache + usage, but existing archive data will not be deleted. + + update_use_min_max [OPTION]… value + Set the value of the `use_min_max` variable. If this is `true`, + when creating a new RRD, archives for the minimum and maximum + observed values will be created alongside the standard archive of + average values + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + rrd-cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + diff --git a/ocaml/xapi-idl/rrd/dune b/ocaml/xapi-idl/rrd/dune index f7b2a8e7b70..e0e8693c13f 100644 --- a/ocaml/xapi-idl/rrd/dune +++ b/ocaml/xapi-idl/rrd/dune @@ -49,7 +49,7 @@ (modes exe) (libraries cmdliner - + rpclib.cmdliner rpclib.core rpclib.markdown @@ -57,9 +57,6 @@ xapi-idl.rrd )) -(rule - (alias runtest) - (deps (:x rrd_cli.exe)) +(cram (package xapi-tools) - (action (run %{x} --help=plain))) - + (deps rrd_cli.exe)) diff --git a/ocaml/xapi-idl/rrd/rrd_client.ml b/ocaml/xapi-idl/rrd/rrd_client.ml index abb12a118de..08a9b731f71 100644 --- a/ocaml/xapi-idl/rrd/rrd_client.ml +++ b/ocaml/xapi-idl/rrd/rrd_client.ml @@ -13,26 +13,12 @@ *) open Rrd_interface -open Xcp_client - -let rec retry_econnrefused f = - try f () with - | Unix.Unix_error (Unix.ECONNREFUSED, "connect", _) -> - (* debug "Caught ECONNREFUSED; retrying in 5s"; *) - Thread.delay 5. ; retry_econnrefused f - | e -> - (* error "Caught %s: does the rrd service need restarting?" - (Printexc.to_string e); *) - raise e +(* TODO: use_switch=false as the message switch doesn't handle raw HTTP very well *) let rpc call = - retry_econnrefused (fun () -> - (* TODO: the message switch doesn't handle raw HTTP very well *) - if (* !use_switch *) false then - json_switch_rpc !queue_name call - else - xml_http_rpc ~srcstr:(get_user_agent ()) ~dststr:"rrd" Rrd_interface.uri - call + Xcp_client.( + retry_and_switch_rpc call ~use_switch:false ~queue_name:!queue_name + ~dststr:"rrd" ~uri ) module Client = RPC_API (Idl.Exn.GenClient (struct let rpc = rpc end)) diff --git a/ocaml/xapi-idl/rrd/rrd_interface.ml b/ocaml/xapi-idl/rrd/rrd_interface.ml index bee3c646d34..066912eacf2 100644 --- a/ocaml/xapi-idl/rrd/rrd_interface.ml +++ b/ocaml/xapi-idl/rrd/rrd_interface.ml @@ -29,6 +29,8 @@ let default_sockets_dir = "/var/lib/xcp" let daemon_name = "xcp-rrdd" +let max_supported_vms = 1024 + let default_path = ref (Filename.concat default_sockets_dir daemon_name) let forwarded_path = @@ -410,18 +412,6 @@ module RPC_API (R : RPC) = struct ] (value_p @-> returning unit_p rrd_err) - let update_vm_memory_target = - let target_p = - Param.mk ~name:"target" ~description:["VM memory target"] Types.int64 - in - declare "update_vm_memory_target" - [ - "Sets the `memory_target` value for a VM. This is called by xapi when \ - it is told by" - ; "xenopsd that squeezed has changed the target for a VM." - ] - (domid_p @-> target_p @-> returning unit_p rrd_err) - let set_cache_sr = declare "set_cache_sr" [ diff --git a/ocaml/xapi-idl/storage/storage_client.ml b/ocaml/xapi-idl/storage/storage_client.ml index b66636daf6a..eeb0e765170 100644 --- a/ocaml/xapi-idl/storage/storage_client.ml +++ b/ocaml/xapi-idl/storage/storage_client.ml @@ -13,25 +13,11 @@ *) open Storage_interface -open Xcp_client -let rec retry_econnrefused f = - try f () with - | Unix.Unix_error (Unix.ECONNREFUSED, "connect", _) -> - (* debug "Caught ECONNREFUSED; retrying in 5s"; *) - Thread.delay 5. ; retry_econnrefused f - | e -> - (* error "Caught %s: does the storage service need restarting?" - (Printexc.to_string e); *) - raise e - -module Client = Storage_interface.StorageAPI (Idl.Exn.GenClient (struct +module Client = StorageAPI (Idl.Exn.GenClient (struct let rpc call = - retry_econnrefused (fun () -> - if !use_switch then - json_switch_rpc !queue_name call - else - xml_http_rpc ~srcstr:(get_user_agent ()) ~dststr:"storage" - Storage_interface.uri call + Xcp_client.( + retry_and_switch_rpc call ~use_switch:!use_switch ~queue_name:!queue_name + ~dststr:"storage" ~uri ) end)) diff --git a/ocaml/xapi-idl/storage/storage_interface.ml b/ocaml/xapi-idl/storage/storage_interface.ml index 542312c6448..eaabacc9e8f 100644 --- a/ocaml/xapi-idl/storage/storage_interface.ml +++ b/ocaml/xapi-idl/storage/storage_interface.ml @@ -175,6 +175,9 @@ let parse_nbd_uri nbd = | _ -> fail () +let parse_nbd_uri_opt nbd = + try Some (parse_nbd_uri nbd) with Failure _e -> None + (** Separates the implementations of the given backend returned from the VDI.attach2 SMAPIv2 call based on their type *) let implementations_of_backend backend = @@ -192,6 +195,16 @@ let implementations_of_backend backend = ) ([], [], [], []) backend.implementations +let nbd_export_of_attach_info (backend : backend) = + let _, _, _, nbds = implementations_of_backend backend in + match nbds with + | [] -> + debug "%s no nbd uri found" __FUNCTION__ ; + None + | uri :: _ -> + debug "%s found nbd uri %s" __FUNCTION__ uri.uri ; + parse_nbd_uri_opt uri |> Option.map snd + (** Uniquely identifies the contents of a VDI *) type content_id = string [@@deriving rpcty] @@ -291,12 +304,42 @@ module Mirror = struct } [@@deriving rpcty] - type mirror_receive_result = Vhd_mirror of mirror_receive_result_vhd_t + type mirror_receive_result_smapiv3_t = { + mirror_vdi: vdi_info + ; mirror_datapath: dp + ; nbd_export: string + } + [@@deriving rpcty] + + (* The variant of the mirror receive result depends on the SMAPI version being used, + rather than the VDI image type. We call the new variant SMAPIv3_mirror to reflect + this, but keep the old one Vhd_mirror for backwards compatability reasons. *) + type mirror_receive_result = + | Vhd_mirror of mirror_receive_result_vhd_t + | SMAPIv3_mirror of mirror_receive_result_smapiv3_t [@@deriving rpcty] type similars = content_id list [@@deriving rpcty] + + type copy_operation_v1 = string [@@deriving rpcty, show {with_path= false}] + + type mirror_operation_v1 = string [@@deriving rpcty, show {with_path= false}] + + (* SMAPIv3 mirror operation *) + type operation = + | CopyV1 of copy_operation_v1 + | MirrorV1 of mirror_operation_v1 + [@@deriving rpcty, show {with_path= false}] + + (* status of SMAPIv3 mirror *) + type status = {failed: bool; complete: bool; progress: float option} + [@@deriving rpcty] end +type operation = Mirror.operation + +type status = Mirror.status + type async_result_t = Vdi_info of vdi_info | Mirror_id of Mirror.id [@@deriving rpcty] @@ -362,6 +405,19 @@ module Errors = struct | No_storage_plugin_for_sr of string | Content_ids_do_not_match of (string * string) | Missing_configuration_parameter of string + (* raised when preparing the environment for SXM, for example, when the dest + host creates VDIs for data mirroring (SMAPIv1 and v3) *) + | Migration_preparation_failure of string + (* happens when passing fds to tapdisks for mirroring (SMAPIv1 only) *) + | Migration_mirror_fd_failure of string + (* raised when taking a snapshot as the base image before copying it over to + the destination (SMAPIv1 only) *) + | Migration_mirror_snapshot_failure of string + (* mirror_copy_failure: raised when copying of the base image fails (SMAPIv1 only) *) + | Migration_mirror_copy_failure of string + (* mirror_failure: raised when there is any issues that causes the mirror to crash + during SXM (SMAPIv1 and SMAPIv3 *) + | Migration_mirror_failure of string | Internal_error of string | Unknown_error [@@default Unknown_error] [@@deriving rpcty] @@ -369,6 +425,8 @@ end exception Storage_error of Errors.error +let unimplemented x = raise (Storage_error (Errors.Unimplemented x)) + let () = (* register printer *) let sprintf = Printf.sprintf in @@ -404,6 +462,9 @@ let err = ) } +type smapi_version = SMAPIv1 | SMAPIv2 | SMAPIv3 +[@@deriving rpcty, show {with_path= false}] + type query_result = { driver: string ; name: string @@ -415,6 +476,7 @@ type query_result = { ; features: string list ; configuration: (string * string) list ; required_cluster_stack: string list + ; smapi_version: smapi_version } [@@deriving rpcty] @@ -996,41 +1058,97 @@ module StorageAPI (R : RPC) = struct @-> returning result_p err ) + let operation_p = Param.mk ~name:"operation" Mirror.operation + + let mirror = + declare "DATA.mirror" [] + (dbg_p + @-> sr_p + @-> vdi_p + @-> vm_p + @-> url_p + @-> returning operation_p err + ) + + let stat = + let status_p = Param.mk ~name:"status" Mirror.status in + declare "DATA.stat" [] + (dbg_p + @-> sr_p + @-> vdi_p + @-> vm_p + @-> operation_p + @-> returning status_p err + ) + + (** [import_activate dbg dp sr vdi vm] returns a server socket address to + which a fd can be passed via SCM_RIGHTS for mirroring purposes.*) + let import_activate = + declare "DATA.import_activate" [] + (dbg_p + @-> dp_p + @-> sr_p + @-> vdi_p + @-> vm_p + @-> returning sock_path_p err + ) + + (** [get_nbd_server dbg dp sr vdi vm] returns the address of a generic nbd + server that can be connected to. Depending on the backend, this will either + be a nbd server backed by tapdisk or qemu-dp. Note this is different + from [import_activate] as the returned server does not accept fds. *) + let get_nbd_server = + declare "DATA.get_nbd_server" [] + (dbg_p + @-> dp_p + @-> sr_p + @-> vdi_p + @-> vm_p + @-> returning sock_path_p err + ) + module MIRROR = struct let mirror_vm_p = Param.mk ~name:"mirror_vm" Vm.t let copy_vm_p = Param.mk ~name:"copy_vm" Vm.t - (** [start task sr vdi url sr2] creates a VDI in remote [url]'s [sr2] and - writes data synchronously. It returns the id of the VDI.*) - let start = - declare "DATA.MIRROR.start" [] + let live_vm_p = Param.mk ~name:"live_vm" Vm.t + + let id_p = Param.mk ~name:"id" Mirror.id + + (** [send_start dbg dp task src_sr vdi mirror_vm mirror_id local_vdi copy_vm + live_vm url remote_mirror dest_sr verify_dest] + takes the remote mirror [remote_mirror] prepared by the destination host + and initiates the mirroring of [vdi] from the source *) + let send_start = + let recv_result_p = + Param.mk ~name:"recv_result" Mirror.mirror_receive_result + in + let local_vdi_p = Param.mk ~name:"local_vdi" vdi_info in + let src_sr_p = Param.mk ~name:"src_sr" Sr.t in + let dest_sr_p = Param.mk ~name:"dest_sr" Sr.t in + declare "DATA.MIRROR.send_start" [] (dbg_p - @-> sr_p - @-> vdi_p @-> dp_p + @-> task_id_p + @-> src_sr_p + @-> vdi_p @-> mirror_vm_p + @-> id_p + @-> local_vdi_p @-> copy_vm_p + @-> live_vm_p @-> url_p - @-> dest_p + @-> recv_result_p + @-> dest_sr_p @-> verify_dest_p - @-> returning task_id_p err + @-> returning unit_p err ) - let id_p = Param.mk ~name:"id" Mirror.id - - (** [stop task sr vdi] stops mirroring local [vdi] *) - let stop = - declare "DATA.MIRROR.stop" [] (dbg_p @-> id_p @-> returning unit_p err) - - let stat = - let result_p = Param.mk ~name:"result" Mirror.t in - declare "DATA.MIRROR.stat" [] (dbg_p @-> id_p @-> returning result_p err) - (** Called on the receiving end @deprecated This function is deprecated, and is only here to keep backward compatibility with old xapis that call Remote.DATA.MIRROR.receive_start during SXM. - Use the receive_start2 function instead. + Use the receive_start3 function instead. *) let receive_start = let similar_p = Param.mk ~name:"similar" Mirror.similars in @@ -1044,8 +1162,11 @@ module StorageAPI (R : RPC) = struct @-> returning result err ) - (** Called on the receiving end to prepare for receipt of the storage. This - function should be used in conjunction with [receive_finalize2]*) + (** Called on the receiving end + @deprecated This function is deprecated, and is only here to keep backward + compatibility with old xapis that call Remote.DATA.MIRROR.receive_start2 during SXM. + Use the receive_start3 function instead. + *) let receive_start2 = let similar_p = Param.mk ~name:"similar" Mirror.similars in let result = Param.mk ~name:"result" Mirror.mirror_receive_result in @@ -1059,60 +1180,91 @@ module StorageAPI (R : RPC) = struct @-> returning result err ) + (** Called on the receiving end to prepare for receipt of the storage. This + function should be used in conjunction with [receive_finalize3]*) + let receive_start3 = + let similar_p = Param.mk ~name:"similar" Mirror.similars in + let result = Param.mk ~name:"result" Mirror.mirror_receive_result in + declare "DATA.MIRROR.receive_start3" [] + (dbg_p + @-> sr_p + @-> VDI.vdi_info_p + @-> id_p + @-> similar_p + @-> vm_p + @-> url_p + @-> verify_dest_p + @-> returning result err + ) + (** Called on the receiving end @deprecated This function is deprecated, and is only here to keep backward compatibility with old xapis that call Remote.DATA.MIRROR.receive_finalize - during SXM. Use the receive_finalize2 function instead. + during SXM. Use the receive_finalize3 function instead. *) let receive_finalize = declare "DATA.MIRROR.receive_finalize" [] (dbg_p @-> id_p @-> returning unit_p err) - (** [receive_finalize2 dbg id] will stop the mirroring process and compose - the snapshot VDI with the mirror VDI. It also cleans up the storage resources - used by mirroring. It is called after the the source VM is paused. This fucntion - should be used in conjunction with [receive_start2] *) + (** Called on the receiving end + @deprecated This function is deprecated, and is only here to keep backward + compatibility with old xapis that call Remote.DATA.MIRROR.receive_finalize2 + during SXM. Use the receive_finalize3 function instead. + *) let receive_finalize2 = declare "DATA.MIRROR.receive_finalize2" [] (dbg_p @-> id_p @-> returning unit_p err) + (** [receive_finalize3 dbg id] will stop the mirroring process and compose + the snapshot VDI with the mirror VDI. It also cleans up the storage resources + used by mirroring. It is called after the the source VM is paused. This fucntion + should be used in conjunction with [receive_start3] *) + let receive_finalize3 = + declare "DATA.MIRROR.receive_finalize3" [] + (dbg_p + @-> id_p + @-> sr_p + @-> url_p + @-> verify_dest_p + @-> returning unit_p err + ) + (** [receive_cancel dbg id] is called in the case of migration failure to - do the clean up.*) + do the clean up. + @deprecated This function is deprecated, and is only here to keep backward + compatibility with old xapis that call Remote.DATA.MIRROR.receive_cancel + during SXM. Use the receive_cancel2 function instead. + *) let receive_cancel = declare "DATA.MIRROR.receive_cancel" [] (dbg_p @-> id_p @-> returning unit_p err) + (** [receive_cancel2 dbg mirror_id url verify_dest] cleans up the side effects + done by [receive_start3] on the destination host when the migration fails. *) + let receive_cancel2 = + declare "DATA.MIRROR.receive_cancel2" [] + (dbg_p @-> id_p @-> url_p @-> verify_dest_p @-> returning unit_p err) + + let pre_deactivate_hook = + declare "DATA.MIRROR.pre_deactivate_hook" [] + (dbg_p @-> dp_p @-> sr_p @-> vdi_p @-> returning unit_p err) + + let has_mirror_failed = + let mirror_failed_p = + Param.mk ~name:"mirror_failed_p" ~description:[] Types.bool + in + declare "DATA.MIRROR.has_mirror_failed" [] + (dbg_p @-> id_p @-> sr_p @-> returning mirror_failed_p err) + let list = let result_p = Param.mk ~name:"mirrors" TypeCombinators.(list (pair Mirror.(id, t))) in declare "DATA.MIRROR.list" [] (dbg_p @-> returning result_p err) - (** [import_activate dbg dp sr vdi vm] returns a server socket address to - which a fd can be passed via SCM_RIGHTS for mirroring purposes.*) - let import_activate = - declare "DATA.MIRROR.import_activate" [] - (dbg_p - @-> dp_p - @-> sr_p - @-> vdi_p - @-> vm_p - @-> returning sock_path_p err - ) - - (** [get_nbd_server dbg dp sr vdi vm] returns the address of a generic nbd - server that can be connected to. Depending on the backend, this will either - be a nbd server backed by tapdisk or qemu-dp. Note this is different - from [import_activate] as the returned server does not accept fds. *) - let get_nbd_server = - declare "DATA.MIRROR.get_nbd_server" [] - (dbg_p - @-> dp_p - @-> sr_p - @-> vdi_p - @-> vm_p - @-> returning sock_path_p err - ) + let stat = + let result_p = Param.mk ~name:"result" Mirror.t in + declare "DATA.MIRROR.stat" [] (dbg_p @-> id_p @-> returning result_p err) end end @@ -1157,6 +1309,92 @@ module StorageAPI (R : RPC) = struct end end +module type MIRROR = sig + type context = unit + + val send_start : + context + -> dbg:debug_info + -> task_id:Task.id + -> dp:dp + -> sr:sr + -> vdi:vdi + -> mirror_vm:vm + -> mirror_id:Mirror.id + -> local_vdi:vdi_info + -> copy_vm:vm + -> live_vm:vm + -> url:string + -> remote_mirror:Mirror.mirror_receive_result + -> dest_sr:sr + -> verify_dest:bool + -> unit + + val receive_start : + context + -> dbg:debug_info + -> sr:sr + -> vdi_info:vdi_info + -> id:Mirror.id + -> similar:Mirror.similars + -> Mirror.mirror_receive_result + + val receive_start2 : + context + -> dbg:debug_info + -> sr:sr + -> vdi_info:vdi_info + -> id:Mirror.id + -> similar:Mirror.similars + -> vm:vm + -> Mirror.mirror_receive_result + + val receive_start3 : + context + -> dbg:debug_info + -> sr:sr + -> vdi_info:vdi_info + -> mirror_id:Mirror.id + -> similar:Mirror.similars + -> vm:vm + -> url:string + -> verify_dest:bool + -> Mirror.mirror_receive_result + + val receive_finalize : context -> dbg:debug_info -> id:Mirror.id -> unit + + val receive_finalize2 : context -> dbg:debug_info -> id:Mirror.id -> unit + + val receive_finalize3 : + context + -> dbg:debug_info + -> mirror_id:Mirror.id + -> sr:sr + -> url:string + -> verify_dest:bool + -> unit + + val receive_cancel : context -> dbg:debug_info -> id:Mirror.id -> unit + + val receive_cancel2 : + context + -> dbg:debug_info + -> mirror_id:Mirror.id + -> url:string + -> verify_dest:bool + -> unit + + val pre_deactivate_hook : + context -> dbg:debug_info -> dp:dp -> sr:sr -> vdi:vdi -> unit + + val has_mirror_failed : + context -> dbg:debug_info -> mirror_id:Mirror.id -> sr:Sr.t -> bool + + val list : context -> dbg:debug_info -> (Mirror.id * Mirror.t) list + + val stat : context -> dbg:debug_info -> id:Mirror.id -> Mirror.t +end + module type Server_impl = sig type context = unit @@ -1413,69 +1651,43 @@ module type Server_impl = sig -> verify_dest:bool -> Task.id - module MIRROR : sig - val start : - context - -> dbg:debug_info - -> sr:sr - -> vdi:vdi - -> dp:dp - -> mirror_vm:vm - -> copy_vm:vm - -> url:string - -> dest:sr - -> verify_dest:bool - -> Task.id - - val stop : context -> dbg:debug_info -> id:Mirror.id -> unit - - val stat : context -> dbg:debug_info -> id:Mirror.id -> Mirror.t - - val receive_start : - context - -> dbg:debug_info - -> sr:sr - -> vdi_info:vdi_info - -> id:Mirror.id - -> similar:Mirror.similars - -> Mirror.mirror_receive_result - - val receive_start2 : - context - -> dbg:debug_info - -> sr:sr - -> vdi_info:vdi_info - -> id:Mirror.id - -> similar:Mirror.similars - -> vm:vm - -> Mirror.mirror_receive_result - - val receive_finalize : context -> dbg:debug_info -> id:Mirror.id -> unit - - val receive_finalize2 : context -> dbg:debug_info -> id:Mirror.id -> unit - - val receive_cancel : context -> dbg:debug_info -> id:Mirror.id -> unit - - val list : context -> dbg:debug_info -> (Mirror.id * Mirror.t) list - - val import_activate : - context - -> dbg:debug_info - -> dp:dp - -> sr:sr - -> vdi:vdi - -> vm:vm - -> sock_path - - val get_nbd_server : - context - -> dbg:debug_info - -> dp:dp - -> sr:sr - -> vdi:vdi - -> vm:vm - -> sock_path - end + val mirror : + context + -> dbg:debug_info + -> sr:sr + -> vdi:vdi + -> vm:vm + -> dest:string + -> operation + + val stat : + context + -> dbg:debug_info + -> sr:sr + -> vdi:vdi + -> vm:vm + -> key:operation + -> status + + val import_activate : + context + -> dbg:debug_info + -> dp:dp + -> sr:sr + -> vdi:vdi + -> vm:vm + -> sock_path + + val get_nbd_server : + context + -> dbg:debug_info + -> dp:dp + -> sr:sr + -> vdi:vdi + -> vm:vm + -> sock_path + + module MIRROR : MIRROR end module Policy : sig @@ -1631,34 +1843,73 @@ module Server (Impl : Server_impl) () = struct S.DATA.copy (fun dbg sr vdi vm url dest verify_dest -> Impl.DATA.copy () ~dbg ~sr ~vdi ~vm ~url ~dest ~verify_dest ) ; - S.DATA.MIRROR.start - (fun dbg sr vdi dp mirror_vm copy_vm url dest verify_dest -> - Impl.DATA.MIRROR.start () ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url - ~dest ~verify_dest + S.DATA.mirror (fun dbg sr vdi vm dest -> + Impl.DATA.mirror () ~dbg ~sr ~vdi ~vm ~dest + ) ; + S.DATA.stat (fun dbg sr vdi vm key -> + Impl.DATA.stat () ~dbg ~sr ~vdi ~vm ~key + ) ; + S.DATA.MIRROR.send_start + (fun + dbg + task_id + dp + sr + vdi + mirror_vm + mirror_id + local_vdi + copy_vm + live_vm + url + remote_mirror + dest_sr + verify_dest + -> + Impl.DATA.MIRROR.send_start () ~dbg ~task_id ~dp ~sr ~vdi ~mirror_vm + ~mirror_id ~local_vdi ~copy_vm ~live_vm ~url ~remote_mirror ~dest_sr + ~verify_dest ) ; - S.DATA.MIRROR.stop (fun dbg id -> Impl.DATA.MIRROR.stop () ~dbg ~id) ; - S.DATA.MIRROR.stat (fun dbg id -> Impl.DATA.MIRROR.stat () ~dbg ~id) ; S.DATA.MIRROR.receive_start (fun dbg sr vdi_info id similar -> Impl.DATA.MIRROR.receive_start () ~dbg ~sr ~vdi_info ~id ~similar ) ; S.DATA.MIRROR.receive_start2 (fun dbg sr vdi_info id similar vm -> Impl.DATA.MIRROR.receive_start2 () ~dbg ~sr ~vdi_info ~id ~similar ~vm ) ; + S.DATA.MIRROR.receive_start3 + (fun dbg sr vdi_info mirror_id similar vm url verify_dest -> + Impl.DATA.MIRROR.receive_start3 () ~dbg ~sr ~vdi_info ~mirror_id + ~similar ~vm ~url ~verify_dest + ) ; S.DATA.MIRROR.receive_cancel (fun dbg id -> Impl.DATA.MIRROR.receive_cancel () ~dbg ~id ) ; + S.DATA.MIRROR.receive_cancel2 (fun dbg mirror_id url verify_dest -> + Impl.DATA.MIRROR.receive_cancel2 () ~dbg ~mirror_id ~url ~verify_dest + ) ; S.DATA.MIRROR.receive_finalize (fun dbg id -> Impl.DATA.MIRROR.receive_finalize () ~dbg ~id ) ; S.DATA.MIRROR.receive_finalize2 (fun dbg id -> Impl.DATA.MIRROR.receive_finalize2 () ~dbg ~id ) ; + S.DATA.MIRROR.receive_finalize3 (fun dbg mirror_id sr url verify_dest -> + Impl.DATA.MIRROR.receive_finalize3 () ~dbg ~mirror_id ~sr ~url + ~verify_dest + ) ; + S.DATA.MIRROR.pre_deactivate_hook (fun dbg dp sr vdi -> + Impl.DATA.MIRROR.pre_deactivate_hook () ~dbg ~dp ~sr ~vdi + ) ; + S.DATA.MIRROR.has_mirror_failed (fun dbg mirror_id sr -> + Impl.DATA.MIRROR.has_mirror_failed () ~dbg ~mirror_id ~sr + ) ; S.DATA.MIRROR.list (fun dbg -> Impl.DATA.MIRROR.list () ~dbg) ; - S.DATA.MIRROR.import_activate (fun dbg dp sr vdi vm -> - Impl.DATA.MIRROR.import_activate () ~dbg ~dp ~sr ~vdi ~vm + S.DATA.MIRROR.stat (fun dbg id -> Impl.DATA.MIRROR.stat () ~dbg ~id) ; + S.DATA.import_activate (fun dbg dp sr vdi vm -> + Impl.DATA.import_activate () ~dbg ~dp ~sr ~vdi ~vm ) ; - S.DATA.MIRROR.get_nbd_server (fun dbg dp sr vdi vm -> - Impl.DATA.MIRROR.get_nbd_server () ~dbg ~dp ~sr ~vdi ~vm + S.DATA.get_nbd_server (fun dbg dp sr vdi vm -> + Impl.DATA.get_nbd_server () ~dbg ~dp ~sr ~vdi ~vm ) ; S.Policy.get_backend_vm (fun dbg vm sr vdi -> Impl.Policy.get_backend_vm () ~dbg ~vm ~sr ~vdi diff --git a/ocaml/xapi-idl/storage/storage_skeleton.ml b/ocaml/xapi-idl/storage/storage_skeleton.ml index ab84ed7712e..a2d2d04ab08 100644 --- a/ocaml/xapi-idl/storage/storage_skeleton.ml +++ b/ocaml/xapi-idl/storage/storage_skeleton.ml @@ -13,8 +13,6 @@ *) [@@@ocaml.warning "-27"] -let u x = raise Storage_interface.(Storage_error (Errors.Unimplemented x)) - type context = unit module UPDATES = struct @@ -27,175 +25,231 @@ module UPDATES = struct end module Query = struct - let query ctx ~dbg = u "Query.query" + let query ctx ~dbg = Storage_interface.unimplemented __FUNCTION__ - let diagnostics ctx ~dbg = u "Query.diagnostics" + let diagnostics ctx ~dbg = Storage_interface.unimplemented __FUNCTION__ end module DP = struct - let create ctx ~dbg ~id = u "DP.create" + let create ctx ~dbg ~id = Storage_interface.unimplemented __FUNCTION__ - let destroy ctx ~dbg ~dp ~allow_leak = u "DP.destroy" + let destroy ctx ~dbg ~dp ~allow_leak = + Storage_interface.unimplemented __FUNCTION__ - let destroy2 ctx ~dbg ~dp ~sr ~vdi ~vm ~allow_leak = u "DP.destroy2" + let destroy2 ctx ~dbg ~dp ~sr ~vdi ~vm ~allow_leak = + Storage_interface.unimplemented __FUNCTION__ - let attach_info ctx ~dbg ~sr ~vdi ~dp ~vm = u "DP.attach_info" + let attach_info ctx ~dbg ~sr ~vdi ~dp ~vm = + Storage_interface.unimplemented __FUNCTION__ - let diagnostics ctx () = u "DP.diagnostics" + let diagnostics ctx () = Storage_interface.unimplemented __FUNCTION__ - let stat_vdi ctx ~dbg ~sr ~vdi () = u "DP.stat_vdi" + let stat_vdi ctx ~dbg ~sr ~vdi () = + Storage_interface.unimplemented __FUNCTION__ end module SR = struct let create ctx ~dbg ~sr ~name_label ~name_description ~device_config ~physical_size = - u "SR.create" + Storage_interface.unimplemented __FUNCTION__ - let attach ctx ~dbg ~sr ~device_config = u "SR.attach" + let attach ctx ~dbg ~sr ~device_config = + Storage_interface.unimplemented __FUNCTION__ - let set_name_label ctx ~dbg ~sr ~new_name_label = u "SR.set_name_label" + let set_name_label ctx ~dbg ~sr ~new_name_label = + Storage_interface.unimplemented __FUNCTION__ let set_name_description ctx ~dbg ~sr ~new_name_description = - u "SR.set_name_description" + Storage_interface.unimplemented __FUNCTION__ - let detach ctx ~dbg ~sr = u "SR.detach" + let detach ctx ~dbg ~sr = Storage_interface.unimplemented __FUNCTION__ - let reset ctx ~dbg ~sr = u "SR.reset" + let reset ctx ~dbg ~sr = Storage_interface.unimplemented __FUNCTION__ - let destroy ctx ~dbg ~sr = u "SR.destroy" + let destroy ctx ~dbg ~sr = Storage_interface.unimplemented __FUNCTION__ - let probe ctx ~dbg ~queue ~device_config ~sm_config = u "SR.probe" + let probe ctx ~dbg ~queue ~device_config ~sm_config = + Storage_interface.unimplemented __FUNCTION__ - let scan ctx ~dbg ~sr = u "SR.scan" + let scan ctx ~dbg ~sr = Storage_interface.unimplemented __FUNCTION__ - let scan2 ctx ~dbg ~sr = u "SR.scan2" + let scan2 ctx ~dbg ~sr = Storage_interface.unimplemented __FUNCTION__ let update_snapshot_info_src ctx ~dbg ~sr ~vdi ~url ~dest ~dest_vdi ~snapshot_pairs = - u "SR.update_snapshot_info_src" + Storage_interface.unimplemented __FUNCTION__ let update_snapshot_info_dest ctx ~dbg ~sr ~vdi ~src_vdi ~snapshot_pairs = - u "SR.update_snapshot_info_dest" + Storage_interface.unimplemented __FUNCTION__ - let stat ctx ~dbg ~sr = u "SR.stat" + let stat ctx ~dbg ~sr = Storage_interface.unimplemented __FUNCTION__ - let list ctx ~dbg = u "SR.list" + let list ctx ~dbg = Storage_interface.unimplemented __FUNCTION__ end module VDI = struct - let create ctx ~dbg ~sr ~vdi_info = u "VDI.create" + let create ctx ~dbg ~sr ~vdi_info = + Storage_interface.unimplemented __FUNCTION__ - let set_name_label ctx ~dbg ~sr ~vdi ~new_name_label = u "VDI.set_name_label" + let set_name_label ctx ~dbg ~sr ~vdi ~new_name_label = + Storage_interface.unimplemented __FUNCTION__ let set_name_description ctx ~dbg ~sr ~vdi ~new_name_description = - u "VDI.set_name_description" + Storage_interface.unimplemented __FUNCTION__ - let snapshot ctx ~dbg ~sr ~vdi_info = u "VDI.snapshot" + let snapshot ctx ~dbg ~sr ~vdi_info = + Storage_interface.unimplemented __FUNCTION__ - let clone ctx ~dbg ~sr ~vdi_info = u "VDI.clone" + let clone ctx ~dbg ~sr ~vdi_info = + Storage_interface.unimplemented __FUNCTION__ - let resize ctx ~dbg ~sr ~vdi ~new_size = u "VDI.resize" + let resize ctx ~dbg ~sr ~vdi ~new_size = + Storage_interface.unimplemented __FUNCTION__ - let destroy ctx ~dbg ~sr ~vdi = u "VDI.destroy" + let destroy ctx ~dbg ~sr ~vdi = Storage_interface.unimplemented __FUNCTION__ - let stat ctx ~dbg ~sr ~vdi = u "VDI.stat" + let stat ctx ~dbg ~sr ~vdi = Storage_interface.unimplemented __FUNCTION__ - let introduce ctx ~dbg ~sr ~uuid ~sm_config ~location = u "VDI.introduce" + let introduce ctx ~dbg ~sr ~uuid ~sm_config ~location = + Storage_interface.unimplemented __FUNCTION__ - let set_persistent ctx ~dbg ~sr ~vdi ~persistent = u "VDI.set_persistent" + let set_persistent ctx ~dbg ~sr ~vdi ~persistent = + Storage_interface.unimplemented __FUNCTION__ let epoch_begin ctx ~dbg ~sr ~vdi ~vm ~persistent = () - let attach ctx ~dbg ~dp ~sr ~vdi ~read_write = u "VDI.attach" + let attach ctx ~dbg ~dp ~sr ~vdi ~read_write = + Storage_interface.unimplemented __FUNCTION__ - let attach2 ctx ~dbg ~dp ~sr ~vdi ~read_write = u "VDI.attach2" + let attach2 ctx ~dbg ~dp ~sr ~vdi ~read_write = + Storage_interface.unimplemented __FUNCTION__ - let attach3 ctx ~dbg ~dp ~sr ~vdi ~vm ~read_write = u "VDI.attach3" + let attach3 ctx ~dbg ~dp ~sr ~vdi ~vm ~read_write = + Storage_interface.unimplemented __FUNCTION__ - let activate ctx ~dbg ~dp ~sr ~vdi = u "VDI.activate" + let activate ctx ~dbg ~dp ~sr ~vdi = + Storage_interface.unimplemented __FUNCTION__ - let activate3 ctx ~dbg ~dp ~sr ~vdi ~vm = u "VDI.activate3" + let activate3 ctx ~dbg ~dp ~sr ~vdi ~vm = + Storage_interface.unimplemented __FUNCTION__ - let activate_readonly ctx ~dbg ~dp ~sr ~vdi ~vm = u "VDI.activate_readonly" + let activate_readonly ctx ~dbg ~dp ~sr ~vdi ~vm = + Storage_interface.unimplemented __FUNCTION__ - let deactivate ctx ~dbg ~dp ~sr ~vdi ~vm = u "VDI.deactivate" + let deactivate ctx ~dbg ~dp ~sr ~vdi ~vm = + Storage_interface.unimplemented __FUNCTION__ - let detach ctx ~dbg ~dp ~sr ~vdi ~vm = u "VDI.detach" + let detach ctx ~dbg ~dp ~sr ~vdi ~vm = + Storage_interface.unimplemented __FUNCTION__ let epoch_end ctx ~dbg ~sr ~vdi ~vm = () - let get_url ctx ~dbg ~sr ~vdi = u "VDI.get_url" + let get_url ctx ~dbg ~sr ~vdi = Storage_interface.unimplemented __FUNCTION__ - let similar_content ctx ~dbg ~sr ~vdi = u "VDI.similar_content" + let similar_content ctx ~dbg ~sr ~vdi = + Storage_interface.unimplemented __FUNCTION__ - let get_by_name ctx ~dbg ~sr ~name = u "VDI.get_by_name" + let get_by_name ctx ~dbg ~sr ~name = + Storage_interface.unimplemented __FUNCTION__ - let set_content_id ctx ~dbg ~sr ~vdi ~content_id = u "VDI.set_content_id" + let set_content_id ctx ~dbg ~sr ~vdi ~content_id = + Storage_interface.unimplemented __FUNCTION__ - let compose ctx ~dbg ~sr ~vdi1 ~vdi2 = u "VDI.compose" + let compose ctx ~dbg ~sr ~vdi1 ~vdi2 = + Storage_interface.unimplemented __FUNCTION__ - let add_to_sm_config ctx ~dbg ~sr ~vdi ~key ~value = u "VDI.add_to_sm_config" + let add_to_sm_config ctx ~dbg ~sr ~vdi ~key ~value = + Storage_interface.unimplemented __FUNCTION__ let remove_from_sm_config ctx ~dbg ~sr ~vdi ~key = - u "VDI.remove_from_sm_config" + Storage_interface.unimplemented __FUNCTION__ - let enable_cbt ctx ~dbg ~sr ~vdi = u "VDI.enable_cbt" + let enable_cbt ctx ~dbg ~sr ~vdi = + Storage_interface.unimplemented __FUNCTION__ - let disable_cbt ctx ~dbg ~sr ~vdi = u "VDI.disable_cbt" + let disable_cbt ctx ~dbg ~sr ~vdi = + Storage_interface.unimplemented __FUNCTION__ - let data_destroy ctx ~dbg ~sr ~vdi = u "VDI.data_destroy" + let data_destroy ctx ~dbg ~sr ~vdi = + Storage_interface.unimplemented __FUNCTION__ let list_changed_blocks ctx ~dbg ~sr ~vdi_from ~vdi_to = - u "VDI.list_changed_blocks" + Storage_interface.unimplemented __FUNCTION__ end -let get_by_name ctx ~dbg ~name = u "get_by_name" +let get_by_name ctx ~dbg ~name = Storage_interface.unimplemented __FUNCTION__ module DATA = struct - let copy ctx ~dbg ~sr ~vdi ~vm ~url ~dest = u "DATA.copy" + let copy ctx ~dbg ~sr ~vdi ~vm ~url ~dest = + Storage_interface.unimplemented __FUNCTION__ - module MIRROR = struct - (** [start task sr vdi url sr2] creates a VDI in remote [url]'s [sr2] and - writes data synchronously. It returns the id of the VDI.*) - let start ctx ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest = - u "DATA.MIRROR.start" + let mirror ctx ~dbg ~sr ~vdi ~vm ~dest = + Storage_interface.unimplemented __FUNCTION__ + + let stat ctx ~dbg ~sr ~vdi ~vm ~key = + Storage_interface.unimplemented __FUNCTION__ + + let import_activate ctx ~dbg ~dp ~sr ~vdi ~vm = + Storage_interface.unimplemented __FUNCTION__ + + let get_nbd_server ctx ~dbg ~dp ~sr ~vdi ~vm = + Storage_interface.unimplemented __FUNCTION__ - let stop ctx ~dbg ~id = u "DATA.MIRROR.stop" + module MIRROR = struct + type context = unit - let stat ctx ~dbg ~id = u "DATA.MIRROR.stat" + let send_start ctx ~dbg ~task_id ~dp ~sr ~vdi ~mirror_vm ~mirror_id + ~local_vdi ~copy_vm ~live_vm ~url ~remote_mirror ~dest_sr ~verify_dest = + Storage_interface.unimplemented __FUNCTION__ let receive_start ctx ~dbg ~sr ~vdi_info ~id ~similar = - u "DATA.MIRROR.receive_start" + Storage_interface.unimplemented __FUNCTION__ let receive_start2 ctx ~dbg ~sr ~vdi_info ~id ~similar ~vm = - u "DATA.MIRROR.receive_start2" + Storage_interface.unimplemented __FUNCTION__ + + let receive_start3 ctx ~dbg ~sr ~vdi_info ~mirror_id ~similar ~vm ~url + ~verify_dest = + Storage_interface.unimplemented __FUNCTION__ + + let receive_finalize ctx ~dbg ~id = + Storage_interface.unimplemented __FUNCTION__ + + let receive_finalize2 ctx ~dbg ~id = + Storage_interface.unimplemented __FUNCTION__ + + let receive_finalize3 ctx ~dbg ~mirror_id ~sr ~url ~verify_dest = + Storage_interface.unimplemented __FUNCTION__ - let receive_finalize ctx ~dbg ~id = u "DATA.MIRROR.receive_finalize" + let receive_cancel ctx ~dbg ~id = + Storage_interface.unimplemented __FUNCTION__ - let receive_finalize2 ctx ~dbg ~id = u "DATA.MIRROR.receive_finalize2" + let receive_cancel2 ctx ~dbg ~mirror_id ~url ~verify_dest = + Storage_interface.unimplemented __FUNCTION__ - let receive_cancel ctx ~dbg ~id = u "DATA.MIRROR.receive_cancel" + let pre_deactivate_hook ctx ~dbg ~dp ~sr ~vdi = + Storage_interface.unimplemented __FUNCTION__ - let list ctx ~dbg = u "DATA.MIRROR.list" + let has_mirror_failed ctx ~dbg ~mirror_id ~sr = + Storage_interface.unimplemented __FUNCTION__ - let import_activate ctx ~dbg ~dp ~sr ~vdi ~vm = - u "DATA.MIRROR.import_activate" + let list ctx ~dbg = Storage_interface.unimplemented __FUNCTION__ - let get_nbd_server ctx ~dbg ~dp ~sr ~vdi ~vm = - u "DATA.MIRROR.get_nbd_server" + let stat ctx ~dbg ~id = Storage_interface.unimplemented __FUNCTION__ end end module Policy = struct - let get_backend_vm ctx ~dbg ~vm ~sr ~vdi = u "Policy.get_backend_vm" + let get_backend_vm ctx ~dbg ~vm ~sr ~vdi = + Storage_interface.unimplemented __FUNCTION__ end module TASK = struct - let stat ctx ~dbg ~task = u "TASK.stat" + let stat ctx ~dbg ~task = Storage_interface.unimplemented __FUNCTION__ - let cancel ctx ~dbg ~task = u "TASK.cancel" + let cancel ctx ~dbg ~task = Storage_interface.unimplemented __FUNCTION__ - let destroy ctx ~dbg ~task = u "TASK.destroy" + let destroy ctx ~dbg ~task = Storage_interface.unimplemented __FUNCTION__ - let list ctx ~dbg = u "TASK.list" + let list ctx ~dbg = Storage_interface.unimplemented __FUNCTION__ end diff --git a/ocaml/xapi-idl/storage/storage_test.ml b/ocaml/xapi-idl/storage/storage_test.ml index f4145ceccc2..d86c6b69df5 100644 --- a/ocaml/xapi-idl/storage/storage_test.ml +++ b/ocaml/xapi-idl/storage/storage_test.ml @@ -63,7 +63,7 @@ let names = let vdi_exists sr vdi = let all = Client.SR.scan dbg sr in - List.fold_left (fun acc vdi_info -> acc || vdi_info.vdi = vdi) false all + List.exists (fun vdi_info -> vdi_info.vdi = vdi) all let create sr name_label = let vdi_info = diff --git a/ocaml/xapi-idl/storage/vdi_automaton.ml b/ocaml/xapi-idl/storage/vdi_automaton.ml index e36de90e2ba..3192fd585d9 100644 --- a/ocaml/xapi-idl/storage/vdi_automaton.ml +++ b/ocaml/xapi-idl/storage/vdi_automaton.ml @@ -94,15 +94,9 @@ let ( + ) state operation = let superstate states = let activated = - List.fold_left - (fun acc s -> acc || s = Activated RO || s = Activated RW) - false states - in - let rw = - List.fold_left - (fun acc s -> acc || s = Activated RW || s = Attached RW) - false states + List.exists (fun s -> s = Activated RO || s = Activated RW) states in + let rw = List.exists (fun s -> s = Activated RW || s = Attached RW) states in if states = [] then Detached else if activated then diff --git a/ocaml/xapi-idl/v6/cli-help.t b/ocaml/xapi-idl/v6/cli-help.t new file mode 100644 index 00000000000..ed7d3b47ba5 --- /dev/null +++ b/ocaml/xapi-idl/v6/cli-help.t @@ -0,0 +1,40 @@ + $ ./v6_cli.exe --help=plain + NAME + licensing_cli - A CLI for the V6d API. This allows scripting of the + licensing daemon for testing and debugging. This tool is not intended + to be used as an end user tool + + SYNOPSIS + licensing_cli [COMMAND] … + + COMMANDS + apply_edition [OPTION]… debug_info string string_pair_lst + Checks license info to ensures enabled features are compatible. + + get_editions [OPTION]… debug_info + Gets list of accepted editions. + + get_version [OPTION]… debug_info + Gets list of version-related string pairs + + COMMON OPTIONS + --help[=FMT] (default=auto) + Show this help in format FMT. The value FMT must be one of auto, + pager, groff or plain. With auto, the format is pager or plain + whenever the TERM env var is dumb or undefined. + + --version + Show version information. + + EXIT STATUS + licensing_cli exits with: + + 0 on success. + + 123 on indiscriminate errors reported on standard error. + + 124 on command line parsing errors. + + 125 on unexpected internal errors (bugs). + + diff --git a/ocaml/xapi-idl/v6/dune b/ocaml/xapi-idl/v6/dune index 79751c08794..3fb2579af06 100644 --- a/ocaml/xapi-idl/v6/dune +++ b/ocaml/xapi-idl/v6/dune @@ -19,7 +19,7 @@ (modules v6_cli) (libraries cmdliner - + rpclib.cmdliner rpclib.core rpclib.markdown @@ -28,8 +28,6 @@ xapi-log )) -(rule - (alias runtest) - (deps (:x v6_cli.exe)) +(cram (package xapi-idl) - (action (run %{x} --help=plain))) + (deps v6_cli.exe)) diff --git a/ocaml/xapi-idl/v6/v6_interface.ml b/ocaml/xapi-idl/v6/v6_interface.ml index 74b8201dbc7..3098713c598 100644 --- a/ocaml/xapi-idl/v6/v6_interface.ml +++ b/ocaml/xapi-idl/v6/v6_interface.ml @@ -78,7 +78,8 @@ type errors = (** Thrown by license_check when expiry date matches or precedes current date *) | License_processing_error (** License could not be processed *) - | License_checkout_error of string (** License could not be checked out *) + | License_checkout_error of string * string + (** License could not be checked out *) | Missing_connection_details (** Thrown if connection port or address parameter not supplied to check_license *) @@ -111,7 +112,6 @@ let err = def= errors ; raiser= (fun e -> - log_backtrace () ; let exn = V6_error e in error "%s (%s)" (Printexc.to_string exn) __LOC__ ; raise exn diff --git a/ocaml/xapi-idl/xen/xenops_interface.ml b/ocaml/xapi-idl/xen/xenops_interface.ml index 083c345f149..41eb44e0875 100644 --- a/ocaml/xapi-idl/xen/xenops_interface.ml +++ b/ocaml/xapi-idl/xen/xenops_interface.ml @@ -303,6 +303,7 @@ module Vbd = struct ; extra_private_keys: (string * string) list [@default []] ; qos: qos option [@default None] ; persistent: bool [@default true] + ; can_attach_early: bool [@default false] } [@@deriving rpcty] @@ -444,16 +445,6 @@ module Dynamic = struct type barrier = int * id list [@@deriving rpcty] - type t = - | Vm_t of Vm.id * (Vm.t * Vm.state) option - | Vbd_t of Vbd.id * (Vbd.t * Vbd.state) option - | Vif_t of Vif.id * (Vif.t * Vif.state) option - | Pci_t of Pci.id * (Pci.t * Pci.state) option - | Vgpu_t of Vgpu.id * (Vgpu.t * Vgpu.state) option - | Vusb_t of Vusb.id * (Vusb.t * Vusb.state) option - | Task_t of Task.id * Task.t option - [@@deriving rpcty] - let rpc_of_id = Rpcmarshal.marshal id.Rpc.Types.ty end @@ -496,11 +487,16 @@ module Host = struct [@@deriving rpcty] type numa_affinity_policy = - | Any (** VMs may run on any NUMA nodes. This is the default in 8.2CU1 *) + | Any (** VMs may run on any NUMA nodes. *) | Best_effort - (** best effort placement on the smallest number of NUMA nodes where possible *) + (** Best-effort placement. Assigns the memory of the VM to a single + node, and soft-pins its VCPUs to the node, if possible. Otherwise + behaves like Any. *) + | Best_effort_hard (** Like Best_effort, but hard-pins the VCPUs *) [@@deriving rpcty] + type numa_affinity_policy_opt = numa_affinity_policy option [@@deriving rpcty] + type guest_agent_feature_list = guest_agent_feature list [@@deriving rpcty] end @@ -657,7 +653,7 @@ module XenopsAPI (R : RPC) = struct let numa_affinity_policy_p = Param.mk ~description:["Host NUMA affinity policy"] - ~name:"numa_affinity_policy" Host.numa_affinity_policy + ~name:"numa_affinity_policy" Host.numa_affinity_policy_opt in declare "HOST.set_numa_affinity_policy" ["Sets the host's NUMA aware VM scheduling policy"] @@ -716,6 +712,11 @@ module XenopsAPI (R : RPC) = struct ~description:["when true, verify remote server certificate"] Types.bool in + let localhost_migration = + Param.mk ~name:"localhost_migration" + ~description:["when true, localhost migration is being performed"] + Types.bool + in declare "VM.migrate" [] (debug_info_p @-> vm_id_p @@ -725,6 +726,7 @@ module XenopsAPI (R : RPC) = struct @-> xenops_url @-> compress @-> verify_dest + @-> localhost_migration @-> returning task_id_p err ) @@ -1144,80 +1146,5 @@ module XenopsAPI (R : RPC) = struct (debug_info_p @-> unit_p @-> returning unit_p err) end - module Observer = struct - open TypeCombinators - - let endpoints_p = Param.mk ~name:"endpoints" (list Types.string) - - let bool_p = Param.mk ~name:"bool" Types.bool - - let uuid_p = Param.mk ~name:"uuid" Types.string - - let name_label_p = Param.mk ~name:"name_label" Types.string - - let dict_p = Param.mk ~name:"dict" dict - - let string_p = Param.mk ~name:"string" Types.string - - let int_p = Param.mk ~name:"int" Types.int - - let float_p = Param.mk ~name:"float" Types.float - - let create = - declare "Observer.create" [] - (debug_info_p - @-> uuid_p - @-> name_label_p - @-> dict_p - @-> endpoints_p - @-> bool_p - @-> returning unit_p err - ) - - let destroy = - declare "Observer.destroy" [] - (debug_info_p @-> uuid_p @-> returning unit_p err) - - let set_enabled = - declare "Observer.set_enabled" [] - (debug_info_p @-> uuid_p @-> bool_p @-> returning unit_p err) - - let set_attributes = - declare "Observer.set_attributes" [] - (debug_info_p @-> uuid_p @-> dict_p @-> returning unit_p err) - - let set_endpoints = - declare "Observer.set_endpoints" [] - (debug_info_p @-> uuid_p @-> endpoints_p @-> returning unit_p err) - - let init = declare "Observer.init" [] (debug_info_p @-> returning unit_p err) - - let set_trace_log_dir = - declare "Observer.set_trace_log_dir" [] - (debug_info_p @-> string_p @-> returning unit_p err) - - let set_export_interval = - declare "Observer.set_export_interval" [] - (debug_info_p @-> float_p @-> returning unit_p err) - - let set_host_id = - declare "Observer.set_host_id" [] - (debug_info_p @-> string_p @-> returning unit_p err) - - let set_max_traces = - declare "Observer.set_max_traces" [] - (debug_info_p @-> int_p @-> returning unit_p err) - - let set_max_spans = - declare "Observer.set_max_spans" [] - (debug_info_p @-> int_p @-> returning unit_p err) - - let set_max_file_size = - declare "Observer.set_max_file_size" [] - (debug_info_p @-> int_p @-> returning unit_p err) - - let set_compress_tracing_files = - declare "Observer.set_compress_tracing_files" [] - (debug_info_p @-> bool_p @-> returning unit_p err) - end + module Observer = Observer_helpers.ObserverAPI (R) end diff --git a/ocaml/xapi-storage-cli/dune b/ocaml/xapi-storage-cli/dune index 624f2f727e1..c59c5c1fad4 100644 --- a/ocaml/xapi-storage-cli/dune +++ b/ocaml/xapi-storage-cli/dune @@ -5,6 +5,7 @@ xapi-idl xapi-idl.storage xapi-idl.storage.interface + xapi_internal re re.str rpclib.core diff --git a/ocaml/xapi-storage-cli/main.ml b/ocaml/xapi-storage-cli/main.ml index c64a4f6fcd9..f581d6b6b48 100644 --- a/ocaml/xapi-storage-cli/main.ml +++ b/ocaml/xapi-storage-cli/main.ml @@ -315,6 +315,8 @@ let mirror_vm = Vm.of_string "SXM_mirror" let copy_vm = Vm.of_string "SXM_copy" +let live_vm = Vm.of_string "live_vm" + let mirror_start common_opts sr vdi dp url dest verify_dest = on_vdi' (fun sr vdi -> @@ -323,9 +325,10 @@ let mirror_start common_opts sr vdi dp url dest verify_dest = let url = get_opt url "Need a URL" in let dest = get_opt dest "Need a destination SR" in let task = - Client.DATA.MIRROR.start dbg sr vdi dp mirror_vm copy_vm url - (Storage_interface.Sr.of_string dest) - verify_dest + Storage_migrate.start ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~live_vm + ~url + ~dest:(Storage_interface.Sr.of_string dest) + ~verify_dest in Printf.printf "Task id: %s\n" task ) @@ -335,7 +338,7 @@ let mirror_stop common_opts id = wrap common_opts (fun () -> match id with | Some id -> - Client.DATA.MIRROR.stop dbg id + Storage_migrate.stop ~dbg ~id | None -> failwith "Need an ID" ) diff --git a/ocaml/xapi-storage-script/dune b/ocaml/xapi-storage-script/dune index e1391aed2ca..e60413bf36c 100644 --- a/ocaml/xapi-storage-script/dune +++ b/ocaml/xapi-storage-script/dune @@ -41,6 +41,7 @@ sexplib sexplib0 uri + threads.posix xapi-backtrace xapi-consts xapi-consts.xapi_version @@ -76,20 +77,19 @@ (files (xapi-storage-script.8 as man8/xapi-storage-script.8)) ) -(rule +(cram (alias runtest-python) + (runtest_alias false) (package xapi-storage-script) (deps - (:x main.exe) + main.exe + ../xapi-storage/python/xapi/storage/api/v5/datapath.py + ../xapi-storage/python/xapi/storage/api/v5/plugin.py + ../xapi-storage/python/xapi/storage/api/v5/task.py + ../xapi-storage/python/xapi/storage/api/v5/volume.py + (source_tree ../xapi-storage/python/xapi) (source_tree test/volume) - (:p - ../xapi-storage/python/xapi/storage/api/v5/datapath.py - ../xapi-storage/python/xapi/storage/api/v5/plugin.py - ../xapi-storage/python/xapi/storage/api/v5/task.py - ../xapi-storage/python/xapi/storage/api/v5/volume.py - ) ) - (action (bash "export PYTHONPATH=../xapi-storage/python/; echo $PYTHONPATH; ./%{x} --root=$PWD/test --self-test-only=true")) ) (data_only_dirs test examples) diff --git a/ocaml/xapi-storage-script/main.ml b/ocaml/xapi-storage-script/main.ml index 4b678fa72de..1eccd3867fd 100644 --- a/ocaml/xapi-storage-script/main.ml +++ b/ocaml/xapi-storage-script/main.ml @@ -16,6 +16,7 @@ module Plugin_client = Xapi_storage.Plugin.Plugin (Rpc_lwt.GenClient ()) module Volume_client = Xapi_storage.Control.Volume (Rpc_lwt.GenClient ()) module Sr_client = Xapi_storage.Control.Sr (Rpc_lwt.GenClient ()) module Datapath_client = Xapi_storage.Data.Datapath (Rpc_lwt.GenClient ()) +module Data_client = Xapi_storage.Data.Data (Rpc_lwt.GenClient ()) open Private.Lib let ( >>= ) = Lwt.bind @@ -410,19 +411,6 @@ let observer_config_dir = in dir // component // "enabled" -(** Determine if SM API observation is enabled from the - filesystem. Ordinarily, determining if a component is enabled - would consist of querying the 'components' field of an observer - from the xapi database. *) -let observer_is_component_enabled () = - let is_enabled () = - let is_config_file path = Filename.check_suffix path ".observer.conf" in - let* files = Sys.readdir observer_config_dir in - Lwt.return (List.exists is_config_file files) - in - let* result = Deferred.try_with is_enabled in - Lwt.return (Option.value (Result.to_option result) ~default:false) - (** Call the script named after the RPC method in the [script_dir] directory. The arguments (not the whole JSON-RPC call) are passed as JSON to its stdin, and stdout is returned. In case of a non-zero exit code, @@ -948,6 +936,7 @@ module QueryImpl (M : META) = struct ; configuration= response.Xapi_storage.Plugin.configuration ; required_cluster_stack= response.Xapi_storage.Plugin.required_cluster_stack + ; smapi_version= SMAPIv3 } in wrap th @@ -1455,6 +1444,12 @@ module VDIImpl (M : META) = struct set ~dbg ~sr ~vdi:response.Xapi_storage.Control.key ~key:_snapshot_of_key ~value:vdi >>>= fun () -> + set ~dbg ~sr ~vdi:response.Xapi_storage.Control.key + ~key:_vdi_content_id_key ~value:vdi_info.content_id + >>>= fun () -> + set ~dbg ~sr ~vdi:response.Xapi_storage.Control.key + ~key:_vdi_type_key ~value:vdi_info.ty + >>>= fun () -> let response = { (vdi_of_volume response) with @@ -1752,6 +1747,8 @@ module VDIImpl (M : META) = struct let vdi = Storage_interface.Vdi.string_of vdi in let* () = unset ~dbg ~sr ~vdi ~key:(_sm_config_prefix_key ^ key) in return () + + let similar_content_impl _dbg _sr _vdi = wrap @@ return [] end module DPImpl (M : META) = struct @@ -1788,59 +1785,115 @@ end module DATAImpl (M : META) = struct module VDI = VDIImpl (M) - module MIRROR = struct - let data_import_activate_impl dbg _dp sr vdi' vm' = - wrap - @@ - let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in - Attached_SRs.find sr >>>= fun sr -> - (* Discover the URIs using Volume.stat *) - VDI.stat ~dbg ~sr ~vdi >>>= fun response -> - ( match - List.assoc_opt _clone_on_boot_key response.Xapi_storage.Control.keys - with - | None -> - return response - | Some temporary -> - VDI.stat ~dbg ~sr ~vdi:temporary + let stat dbg sr vdi' _vm key = + let open Storage_interface in + let convert_key = function + | Mirror.CopyV1 k -> + Data_client.CopyV1 k + | Mirror.MirrorV1 k -> + Data_client.MirrorV1 k + in + + let vdi = Vdi.string_of vdi' in + Attached_SRs.find sr >>>= fun sr -> + VDI.stat ~dbg ~sr ~vdi >>>= fun response -> + ( match + List.assoc_opt _clone_on_boot_key response.Xapi_storage.Control.keys + with + | None -> + return response + | Some temporary -> + VDI.stat ~dbg ~sr ~vdi:temporary + ) + >>>= fun response -> + choose_datapath response >>>= fun (rpc, _datapath, _uri) -> + let key = convert_key key in + return_data_rpc (fun () -> Data_client.stat (rpc ~dbg) dbg key) + >>>= function + | {failed; complete; progress} -> + return Mirror.{failed; complete; progress} + + let stat_impl dbg sr vdi vm key = wrap @@ stat dbg sr vdi vm key + + let mirror dbg sr vdi' vm' remote = + let vdi = Storage_interface.Vdi.string_of vdi' in + let domain = Storage_interface.Vm.string_of vm' in + Attached_SRs.find sr >>>= fun sr -> + VDI.stat ~dbg ~sr ~vdi >>>= fun response -> + ( match + List.assoc_opt _clone_on_boot_key response.Xapi_storage.Control.keys + with + | None -> + return response + | Some temporary -> + VDI.stat ~dbg ~sr ~vdi:temporary + ) + >>>= fun response -> + choose_datapath response >>>= fun (rpc, _datapath, uri) -> + return_data_rpc (fun () -> + Data_client.mirror (rpc ~dbg) dbg uri domain remote + ) + >>>= function + | CopyV1 v -> + return (Storage_interface.Mirror.CopyV1 v) + | MirrorV1 v -> + return (Storage_interface.Mirror.MirrorV1 v) + + let mirror_impl dbg sr vdi vm remote = wrap @@ mirror dbg sr vdi vm remote + + let data_import_activate_impl dbg _dp sr vdi' vm' = + wrap + @@ + let vdi = Storage_interface.Vdi.string_of vdi' in + let domain = Storage_interface.Vm.string_of vm' in + Attached_SRs.find sr >>>= fun sr -> + (* Discover the URIs using Volume.stat *) + VDI.stat ~dbg ~sr ~vdi >>>= fun response -> + ( match + List.assoc_opt _clone_on_boot_key response.Xapi_storage.Control.keys + with + | None -> + return response + | Some temporary -> + VDI.stat ~dbg ~sr ~vdi:temporary + ) + >>>= fun response -> + choose_datapath response >>>= fun (rpc, datapath, uri) -> + if Datapath_plugins.supports_feature datapath _vdi_mirror_in then + return_data_rpc (fun () -> + Datapath_client.import_activate (rpc ~dbg) dbg uri domain ) - >>>= fun response -> - choose_datapath response >>>= fun (rpc, datapath, uri) -> - if Datapath_plugins.supports_feature datapath _vdi_mirror_in then - return_data_rpc (fun () -> - Datapath_client.import_activate (rpc ~dbg) dbg uri domain - ) - else - fail (Storage_interface.Errors.Unimplemented _vdi_mirror_in) - - let get_nbd_server_impl dbg _dp sr vdi' vm' = - wrap - @@ - let vdi = Storage_interface.Vdi.string_of vdi' in - let domain = Storage_interface.Vm.string_of vm' in - VDI.vdi_attach_common dbg sr vdi domain >>>= function - | response -> ( - let _, _, _, nbds = - Storage_interface.implementations_of_backend - { - Storage_interface.implementations= - List.map convert_implementation - response.Xapi_storage.Data.implementations - } - in - match nbds with - | ({uri} as nbd) :: _ -> - info (fun m -> - m "%s qemu-dp nbd server address is %s" __FUNCTION__ uri - ) - >>= fun () -> - let socket, _export = Storage_interface.parse_nbd_uri nbd in - return socket - | _ -> - fail (backend_error "No nbd server found" []) - ) - end + else + fail (Storage_interface.Errors.Unimplemented _vdi_mirror_in) + + let get_nbd_server_impl dbg _dp sr vdi' vm' = + wrap + @@ + let vdi = Storage_interface.Vdi.string_of vdi' in + let domain = Storage_interface.Vm.string_of vm' in + VDI.vdi_attach_common dbg sr vdi domain >>>= function + | response -> ( + let _, _, _, nbds = + Storage_interface.implementations_of_backend + { + Storage_interface.implementations= + List.map convert_implementation + response.Xapi_storage.Data.implementations + } + in + match nbds with + | ({uri} as nbd) :: _ -> + info (fun m -> + m "%s qemu-dp nbd server address is %s" __FUNCTION__ uri + ) + >>= fun () -> + let socket, _export = Storage_interface.parse_nbd_uri nbd in + return socket + | _ -> + fail (backend_error "No nbd server found" []) + ) + + module MIRROR = struct end end (* Bind the implementations *) @@ -1854,6 +1907,7 @@ let bind ~volume_script_dir = (* this version field will be updated once query is called *) let version = ref None end in + let u name _ = failwith ("Unimplemented: " ^ name) in let module Query = QueryImpl (RuntimeMeta) in S.Query.query Query.query_impl ; S.Query.diagnostics Query.query_diagnostics_impl ; @@ -1898,45 +1952,50 @@ let bind ~volume_script_dir = S.VDI.set_content_id VDI.vdi_set_content_id_impl ; S.VDI.add_to_sm_config VDI.vdi_add_to_sm_config_impl ; S.VDI.remove_from_sm_config VDI.vdi_remove_from_sm_config_impl ; + S.VDI.similar_content VDI.similar_content_impl ; let module DP = DPImpl (RuntimeMeta) in S.DP.destroy2 DP.dp_destroy2 ; S.DP.attach_info DP.dp_attach_info_impl ; let module DATA = DATAImpl (RuntimeMeta) in - S.DATA.MIRROR.get_nbd_server DATA.MIRROR.get_nbd_server_impl ; - S.DATA.MIRROR.import_activate DATA.MIRROR.data_import_activate_impl ; + S.DATA.copy (u "DATA.copy") ; + S.DATA.mirror DATA.mirror_impl ; + S.DATA.stat DATA.stat_impl ; + S.DATA.get_nbd_server DATA.get_nbd_server_impl ; + S.DATA.import_activate DATA.data_import_activate_impl ; - let u name _ = failwith ("Unimplemented: " ^ name) in S.get_by_name (u "get_by_name") ; S.VDI.get_by_name (u "VDI.get_by_name") ; - S.DATA.MIRROR.receive_start (u "DATA.MIRROR.receive_start") ; - S.DATA.MIRROR.receive_start2 (u "DATA.MIRROR.receive_start2") ; S.UPDATES.get (u "UPDATES.get") ; S.SR.update_snapshot_info_dest (u "SR.update_snapshot_info_dest") ; - S.DATA.MIRROR.list (u "DATA.MIRROR.list") ; S.TASK.stat (u "TASK.stat") ; S.DP.diagnostics (u "DP.diagnostics") ; S.TASK.destroy (u "TASK.destroy") ; S.DP.destroy (u "DP.destroy") ; - S.VDI.similar_content (u "VDI.similar_content") ; - S.DATA.copy (u "DATA.copy") ; S.DP.stat_vdi (u "DP.stat_vdi") ; + S.DATA.MIRROR.send_start (u "DATA.MIRROR.send_start") ; + S.DATA.MIRROR.receive_start (u "DATA.MIRROR.receive_start") ; + S.DATA.MIRROR.receive_start2 (u "DATA.MIRROR.receive_start2") ; + S.DATA.MIRROR.receive_start3 (u "DATA.MIRROR.receive_start3") ; S.DATA.MIRROR.receive_finalize (u "DATA.MIRROR.receive_finalize") ; S.DATA.MIRROR.receive_finalize2 (u "DATA.MIRROR.receive_finalize2") ; + S.DATA.MIRROR.receive_finalize3 (u "DATA.MIRROR.receive_finalize3") ; + S.DATA.MIRROR.receive_cancel (u "DATA.MIRROR.receive_cancel") ; + S.DATA.MIRROR.receive_cancel2 (u "DATA.MIRROR.receive_cancel2") ; + S.DATA.MIRROR.pre_deactivate_hook (u "DATA.MIRROR.pre_deactivate_hook") ; + S.DATA.MIRROR.has_mirror_failed (u "DATA.MIRROR.has_mirror_failed") ; + S.DATA.MIRROR.list (u "DATA.MIRROR.list") ; + S.DATA.MIRROR.stat (u "DATA.MIRROR.stat") ; S.DP.create (u "DP.create") ; S.TASK.cancel (u "TASK.cancel") ; + S.TASK.list (u "TASK.list") ; S.VDI.attach (u "VDI.attach") ; S.VDI.attach2 (u "VDI.attach2") ; S.VDI.activate (u "VDI.activate") ; - S.DATA.MIRROR.stat (u "DATA.MIRROR.stat") ; - S.TASK.list (u "TASK.list") ; S.VDI.get_url (u "VDI.get_url") ; - S.DATA.MIRROR.start (u "DATA.MIRROR.start") ; S.Policy.get_backend_vm (u "Policy.get_backend_vm") ; - S.DATA.MIRROR.receive_cancel (u "DATA.MIRROR.receive_cancel") ; S.SR.update_snapshot_info_src (u "SR.update_snapshot_info_src") ; - S.DATA.MIRROR.stop (u "DATA.MIRROR.stop") ; Rpc_lwt.server S.implementation let process_smapiv2_requests server txt = @@ -2178,6 +2237,19 @@ let register_exn_pretty_printers () = assert false ) +module XapiStorageScript : Observer_helpers.Server_impl = struct + include Observer_skeleton.Observer + + let create _context ~dbg:_ ~uuid:_ ~name_label:_ ~attributes:_ ~endpoints:_ + ~enabled = + config.use_observer <- enabled + + let destroy _context ~dbg:_ ~uuid:_ = config.use_observer <- false + + let set_enabled _context ~dbg:_ ~uuid:_ ~enabled = + config.use_observer <- enabled +end + let () = register_exn_pretty_printers () ; let root_dir = ref "/var/lib/xapi/storage-scripts" in @@ -2224,9 +2296,17 @@ let () = Logs.set_reporter (lwt_reporter ()) ; Logs.set_level ~all:true (Some Logs.Info) ; + + let module S = Observer_helpers.Server (XapiStorageScript) () in + let s = + Xcp_service.make ~path:Observer_helpers.default_path + ~queue_name:Observer_helpers.queue_name ~rpc_fn:S.process () + in + let (_ : Thread.t) = + Thread.create (fun () -> Xcp_service.serve_forever s) () + in + let main = - let* observer_enabled = observer_is_component_enabled () in - config.use_observer <- observer_enabled ; if !self_test_only then self_test ~root_dir:!root_dir else diff --git a/ocaml/xapi-storage-script/python-self-test.t b/ocaml/xapi-storage-script/python-self-test.t new file mode 100644 index 00000000000..9ac59bed953 --- /dev/null +++ b/ocaml/xapi-storage-script/python-self-test.t @@ -0,0 +1,47 @@ +run the self-checks for xapi-storage-script, it logs to stderr, so process +stderr instead of stdout + +The output of the logs needs to delete randomization, there are two sources: +pids and uuids + + $ export PYTHONPATH=../xapi-storage/python/; ./main.exe --root=$PWD/test --self-test-only=true 2>&1 >/dev/null | sed -E 's/\[[0-9]+\]/[PID]/g' | sed -E 's/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/UUID/g' + [INFO] {"method":"Plugin.query","params":[{"dbg":"debug"}],"id":2} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/Plugin.Query[PID] succeeded: {"plugin": "dummy", "name": "dummy SR plugin", "description": "Dummy v5 SR for unit tests.", "vendor": "Citrix Systems Inc", "copyright": "(C) 2018 Citrix Inc", "version": "1.0", "required_api_version": "5.0", "features": ["SR_ATTACH", "SR_DETACH", "SR_CREATE", "SR_PROBE", "VDI_CREATE", "VDI_DESTROY"], "configuration": {}, "required_cluster_stack": []} + + [INFO] {"method":"Plugin.diagnostics","params":[{"dbg":"debug"}],"id":4} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/Plugin.diagnostics[PID] succeeded: "Dummy diagnostics" + + [INFO] {"method":"SR.create","params":[{"description":"dummy description","name":"dummy name","configuration":{"uri":"file:///dev/null"},"uuid":"dummySR","dbg":"debug"}],"id":6} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/SR.create[PID] succeeded: {"uri": "file:///tmp/dummy"} + + [INFO] {"method":"SR.attach","params":[{"configuration":{"uri":"file:///tmp/dummy"},"dbg":"debug"}],"id":9} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/SR.attach[PID] succeeded: "file:///tmp/dummy" + + [INFO] {"method":"SR.stat","params":[{"sr":"file:///tmp/dummy","dbg":"debug"}],"id":10} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/SR.stat[PID] succeeded: {"sr": "file:///tmp/dummy", "name": "dummy SR plugin", "description": "Dummy v5 SR for unit tests.", "total_space": 0, "free_space": 0, "datasources": [], "clustered": false, "health": ["Healthy", ""]} + + [INFO] {"method":"Volume.create","params":[{"sharable":false,"size":0,"description":"vdi description","name":"vdi name","sr":"file:///tmp/dummy","dbg":"debug"}],"id":12} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/Volume.create[PID] succeeded: {"name": "vdi name", "description": "vdi description", "key": "UUID", "uuid": "UUID", "read_write": true, "sharable": false, "virtual_size": 0, "physical_utilisation": 0, "uri": ["raw+file:///tmp/disk.raw"], "keys": {}} + + [INFO] {"method":"Volume.set","params":[{"v":"redolog","k":"vdi-type","key":"UUID","sr":"file:///tmp/dummy","dbg":"debug"}],"id":13} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/Volume.set[PID] succeeded: null + + [INFO] {"method":"Volume.stat","params":[{"key":"UUID","sr":"file:///tmp/dummy","dbg":"debug"}],"id":15} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/Volume.stat[PID] succeeded: {"name": "dummy SR plugin", "description": "Dummy v5 SR for unit tests.", "key": "UUID", "uuid": "UUID", "read_write": true, "virtual_size": 0, "physical_utilisation": 0, "sharable": false, "uri": ["raw+file:///tmp/disk.raw"], "keys": {}} + + [INFO] {"method":"Volume.stat","params":[{"key":"UUID","sr":"file:///tmp/dummy","dbg":"debug"}],"id":17} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/Volume.stat[PID] succeeded: {"name": "dummy SR plugin", "description": "Dummy v5 SR for unit tests.", "key": "UUID", "uuid": "UUID", "read_write": true, "virtual_size": 0, "physical_utilisation": 0, "sharable": false, "uri": ["raw+file:///tmp/disk.raw"], "keys": {}} + + [INFO] {"method":"Volume.destroy","params":[{"key":"UUID","sr":"file:///tmp/dummy","dbg":"debug"}],"id":18} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/Volume.destroy[PID] succeeded: null + + [INFO] {"method":"SR.stat","params":[{"sr":"file:///tmp/dummy","dbg":"debug"}],"id":20} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/SR.stat[PID] succeeded: {"sr": "file:///tmp/dummy", "name": "dummy SR plugin", "description": "Dummy v5 SR for unit tests.", "total_space": 0, "free_space": 0, "datasources": [], "clustered": false, "health": ["Healthy", ""]} + + [INFO] {"method":"SR.ls","params":[{"sr":"file:///tmp/dummy","dbg":"debug"}],"id":22} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/SR.ls[PID] succeeded: [{"name": "dummy SR plugin", "description": "Dummy v5 SR for unit tests.", "key": "file1", "uuid": "file1", "read_write": true, "virtual_size": 0, "physical_utilisation": 0, "sharable": false, "uri": ["raw+file:///tmp/disk.raw"], "keys": {}}] + + [INFO] {"method":"SR.probe","params":[{"configuration":{"uri":"file:///tmp/dummy"},"dbg":"debug"}],"id":24} + [INFO] $TESTCASE_ROOT/test/volume/org.xen.xapi.storage.dummyv5/SR.probe[PID] succeeded: [{"configuration": {"uri": "file:///tmp/dummy"}, "complete": true, "extra_info": {}}, {"configuration": {"uri": "file:///tmp/dummy", "sr_uuid": "myuuid"}, "sr": {"sr": "file:///tmp/dummy", "name": "dummy SR plugin", "description": "Dummy v5 SR for unit tests.", "total_space": 0, "free_space": 0, "datasources": [], "clustered": false, "health": ["Healthy", ""]}, "complete": true, "extra_info": {}}] + + [INFO] test thread shutdown cleanly diff --git a/ocaml/xapi-types/dune b/ocaml/xapi-types/dune index ab33ae1f354..3a49a7dca2e 100644 --- a/ocaml/xapi-types/dune +++ b/ocaml/xapi-types/dune @@ -23,6 +23,8 @@ xapi-stdext-unix ) (wrapped false) - (preprocess (per_module ((pps ppx_deriving_rpc) API Event_types Features SecretString))) + (preprocess + (per_module + ((pps ppx_deriving_rpc) API Event_types SecretString) + ((pps ppx_deriving_rpc ppx_deriving.enum) Features))) ) - diff --git a/ocaml/xapi-types/event_types.ml b/ocaml/xapi-types/event_types.ml index 83c82b0bc8d..46ea2d310df 100644 --- a/ocaml/xapi-types/event_types.ml +++ b/ocaml/xapi-types/event_types.ml @@ -20,37 +20,15 @@ let rpc_of_op = API.rpc_of_event_operation let op_of_rpc = API.event_operation_of_rpc type event = { - id: string - ; ts: string - ; ty: string - ; op: op - ; reference: string - ; snapshot: Rpc.t option + id: string [@key "id"] + ; ts: string [@key "timestamp"] + ; ty: string [@key "class"] + ; op: op [@key "operation"] + ; reference: string [@key "ref"] + ; snapshot: Rpc.t option [@key "snapshot"] } [@@deriving rpc] -let ev_struct_remap = - [ - ("id", "id") - ; ("ts", "timestamp") - ; ("ty", "class") - ; ("op", "operation") - ; ("reference", "ref") - ; ("snapshot", "snapshot") - ] - -let remap map str = - match str with - | Rpc.Dict d -> - Rpc.Dict (List.map (fun (k, v) -> (List.assoc k map, v)) d) - | _ -> - str - -let rpc_of_event ev = remap ev_struct_remap (rpc_of_event ev) - -let event_of_rpc rpc = - event_of_rpc (remap (List.map (fun (k, v) -> (v, k)) ev_struct_remap) rpc) - type events = event list [@@deriving rpc] type token = string [@@deriving rpc] diff --git a/ocaml/xapi-types/features.ml b/ocaml/xapi-types/features.ml index 52469387acc..7453ab49a7c 100644 --- a/ocaml/xapi-types/features.ml +++ b/ocaml/xapi-types/features.ml @@ -68,79 +68,119 @@ type feature = | VM_groups | VM_start | VM_appliance_start -[@@deriving rpc] +[@@deriving rpc, enum] type orientation = Positive | Negative -let keys_of_features = - [ - (VLAN, ("restrict_vlan", Negative, "VLAN")) - ; (QoS, ("restrict_qos", Negative, "QoS")) - ; (Shared_storage, ("restrict_pool_attached_storage", Negative, "SStorage")) - ; (Netapp, ("restrict_netapp", Negative, "NTAP")) - ; (Equalogic, ("restrict_equalogic", Negative, "EQL")) - ; (Pooling, ("restrict_pooling", Negative, "Pool")) - ; (HA, ("enable_xha", Positive, "XHA")) - ; (Marathon, ("restrict_marathon", Negative, "MTC")) - ; (Email, ("restrict_email_alerting", Negative, "email")) - ; (Performance, ("restrict_historical_performance", Negative, "perf")) - ; (WLB, ("restrict_wlb", Negative, "WLB")) - ; (RBAC, ("restrict_rbac", Negative, "RBAC")) - ; (DMC, ("restrict_dmc", Negative, "DMC")) - ; (Checkpoint, ("restrict_checkpoint", Negative, "chpt")) - ; (CPU_masking, ("restrict_cpu_masking", Negative, "Mask")) - ; (Connection, ("restrict_connection", Negative, "Cnx")) - ; (No_platform_filter, ("platform_filter", Negative, "Plat")) - ; (No_nag_dialog, ("regular_nag_dialog", Negative, "nonag")) - ; (VMPR, ("restrict_vmpr", Negative, "VMPR")) - ; (VMSS, ("restrict_vmss", Negative, "VMSS")) - ; (IntelliCache, ("restrict_intellicache", Negative, "IntelliCache")) - ; (GPU, ("restrict_gpu", Negative, "GPU")) - ; (DR, ("restrict_dr", Negative, "DR")) - ; (VIF_locking, ("restrict_vif_locking", Negative, "VIFLock")) - ; (Storage_motion, ("restrict_storage_xen_motion", Negative, "SXM")) - ; (VGPU, ("restrict_vgpu", Negative, "vGPU")) - ; (Integrated_GPU, ("restrict_integrated_gpu_passthrough", Negative, "iGPU")) - ; (VSS, ("restrict_vss", Negative, "VSS")) - ; ( Guest_agent_auto_update - , ("restrict_guest_agent_auto_update", Negative, "GAAU") - ) - ; ( PCI_device_for_auto_update - , ("restrict_pci_device_for_auto_update", Negative, "PciAU") - ) - ; (Xen_motion, ("restrict_xen_motion", Negative, "Live_migration")) - ; (Guest_ip_setting, ("restrict_guest_ip_setting", Negative, "GuestIP")) - ; (AD, ("restrict_ad", Negative, "AD")) - ; (Nested_virt, ("restrict_nested_virt", Negative, "Nested_virt")) - ; (Live_patching, ("restrict_live_patching", Negative, "Live_patching")) - ; ( Live_set_vcpus - , ("restrict_set_vcpus_number_live", Negative, "Live_set_vcpus") - ) - ; (PVS_proxy, ("restrict_pvs_proxy", Negative, "PVS_proxy")) - ; (IGMP_snooping, ("restrict_igmp_snooping", Negative, "IGMP_snooping")) - ; (RPU, ("restrict_rpu", Negative, "RPU")) - ; (Pool_size, ("restrict_pool_size", Negative, "Pool_size")) - ; (CBT, ("restrict_cbt", Negative, "CBT")) - ; (USB_passthrough, ("restrict_usb_passthrough", Negative, "USB_passthrough")) - ; (Network_sriov, ("restrict_network_sriov", Negative, "Network_sriov")) - ; (Corosync, ("restrict_corosync", Negative, "Corosync")) - ; (Cluster_address, ("restrict_cluster_address", Negative, "Cluster_address")) - ; (Zstd_export, ("restrict_zstd_export", Negative, "Zstd_export")) - ; ( Pool_secret_rotation - , ("restrict_pool_secret_rotation", Negative, "Pool_secret_rotation") - ) - ; ( Certificate_verification - , ("restrict_certificate_verification", Negative, "Certificate_verification") - ) - ; (Updates, ("restrict_updates", Negative, "Upd")) - ; ( Internal_repo_access - , ("restrict_internal_repo_access", Negative, "Internal_repo_access") - ) - ; (VTPM, ("restrict_vtpm", Negative, "VTPM")) - ; (VM_groups, ("restrict_vm_groups", Negative, "VM_groups")) - ; (VM_start, ("restrict_vm_start", Negative, "Start")) - ; (VM_appliance_start, ("restrict_vm_appliance_start", Negative, "Start")) - ] +let props_of_feature = function + | VLAN -> + ("restrict_vlan", Negative, "VLAN") + | QoS -> + ("restrict_qos", Negative, "QoS") + | Shared_storage -> + ("restrict_pool_attached_storage", Negative, "SStorage") + | Netapp -> + ("restrict_netapp", Negative, "NTAP") + | Equalogic -> + ("restrict_equalogic", Negative, "EQL") + | Pooling -> + ("restrict_pooling", Negative, "Pool") + | HA -> + ("enable_xha", Positive, "XHA") + | Marathon -> + ("restrict_marathon", Negative, "MTC") + | Email -> + ("restrict_email_alerting", Negative, "email") + | Performance -> + ("restrict_historical_performance", Negative, "perf") + | WLB -> + ("restrict_wlb", Negative, "WLB") + | RBAC -> + ("restrict_rbac", Negative, "RBAC") + | DMC -> + ("restrict_dmc", Negative, "DMC") + | Checkpoint -> + ("restrict_checkpoint", Negative, "chpt") + | CPU_masking -> + ("restrict_cpu_masking", Negative, "Mask") + | Connection -> + ("restrict_connection", Negative, "Cnx") + | No_platform_filter -> + ("platform_filter", Negative, "Plat") + | No_nag_dialog -> + ("regular_nag_dialog", Negative, "nonag") + | VMPR -> + ("restrict_vmpr", Negative, "VMPR") + | VMSS -> + ("restrict_vmss", Negative, "VMSS") + | IntelliCache -> + ("restrict_intellicache", Negative, "IntelliCache") + | GPU -> + ("restrict_gpu", Negative, "GPU") + | DR -> + ("restrict_dr", Negative, "DR") + | VIF_locking -> + ("restrict_vif_locking", Negative, "VIFLock") + | Storage_motion -> + ("restrict_storage_xen_motion", Negative, "SXM") + | VGPU -> + ("restrict_vgpu", Negative, "vGPU") + | Integrated_GPU -> + ("restrict_integrated_gpu_passthrough", Negative, "iGPU") + | VSS -> + ("restrict_vss", Negative, "VSS") + | Guest_agent_auto_update -> + ("restrict_guest_agent_auto_update", Negative, "GAAU") + | PCI_device_for_auto_update -> + ("restrict_pci_device_for_auto_update", Negative, "PciAU") + | Xen_motion -> + ("restrict_xen_motion", Negative, "Live_migration") + | Guest_ip_setting -> + ("restrict_guest_ip_setting", Negative, "GuestIP") + | AD -> + ("restrict_ad", Negative, "AD") + | Nested_virt -> + ("restrict_nested_virt", Negative, "Nested_virt") + | Live_patching -> + ("restrict_live_patching", Negative, "Live_patching") + | Live_set_vcpus -> + ("restrict_set_vcpus_number_live", Negative, "Live_set_vcpus") + | PVS_proxy -> + ("restrict_pvs_proxy", Negative, "PVS_proxy") + | IGMP_snooping -> + ("restrict_igmp_snooping", Negative, "IGMP_snooping") + | RPU -> + ("restrict_rpu", Negative, "RPU") + | Pool_size -> + ("restrict_pool_size", Negative, "Pool_size") + | CBT -> + ("restrict_cbt", Negative, "CBT") + | USB_passthrough -> + ("restrict_usb_passthrough", Negative, "USB_passthrough") + | Network_sriov -> + ("restrict_network_sriov", Negative, "Network_sriov") + | Corosync -> + ("restrict_corosync", Negative, "Corosync") + | Cluster_address -> + ("restrict_cluster_address", Negative, "Cluster_address") + | Zstd_export -> + ("restrict_zstd_export", Negative, "Zstd_export") + | Pool_secret_rotation -> + ("restrict_pool_secret_rotation", Negative, "Pool_secret_rotation") + | Certificate_verification -> + ("restrict_certificate_verification", Negative, "Certificate_verification") + | Updates -> + ("restrict_updates", Negative, "Upd") + | Internal_repo_access -> + ("restrict_internal_repo_access", Negative, "Internal_repo_access") + | VTPM -> + ("restrict_vtpm", Negative, "VTPM") + | VM_groups -> + ("restrict_vm_groups", Negative, "VM_groups") + | VM_start -> + ("restrict_vm_start", Negative, "Start") + | VM_appliance_start -> + ("restrict_vm_appliance_start", Negative, "Start") (* A list of features that must be considered "enabled" by `of_assoc_list` if the feature string is missing from the list. These are existing features @@ -149,52 +189,40 @@ let keys_of_features = let enabled_when_unknown = [Xen_motion; AD; Updates; VM_start; VM_appliance_start] -let name_of_feature f = rpc_of_feature f |> Rpc.string_of_rpc - -let string_of_feature f = - let str, o, _ = List.assoc f keys_of_features in - (str, o) +let all_features = + let length = max_feature - min_feature + 1 in + let start = min_feature in + List.init length (fun i -> feature_of_enum (i + start) |> Option.get) -let tag_of_feature f = - let _, _, tag = List.assoc f keys_of_features in - tag +let name_of_feature f = rpc_of_feature f |> Rpc.string_of_rpc -let all_features = List.map (fun (f, _) -> f) keys_of_features +let is_enabled v = function Positive -> v | Negative -> not v let to_compact_string (s : feature list) = let get_tag f = - let tag = tag_of_feature f in + let _, _, tag = props_of_feature f in if List.mem f s then tag else String.make (String.length tag) ' ' in - let tags = List.map get_tag all_features in - String.concat " " tags + List.map get_tag all_features |> String.concat " " let to_assoc_list (s : feature list) = let get_map f = - let str, o = string_of_feature f in + let str, o, _ = props_of_feature f in let switch = List.mem f s in - let switch = string_of_bool (if o = Positive then switch else not switch) in + let switch = string_of_bool (is_enabled switch o) in (str, switch) in List.map get_map all_features let of_assoc_list l = - let get_feature f = + let enabled f = try - let str, o = string_of_feature f in - let v = bool_of_string (List.assoc str l) in - let v = if o = Positive then v else not v in - if v then Some f else None - with _ -> if List.mem f enabled_when_unknown then Some f else None + let str, o, _ = props_of_feature f in + let v = List.assoc str l in + is_enabled (bool_of_string v) o + with _ -> List.mem f enabled_when_unknown in - (* Filter_map to avoid having to carry the whole xapi-stdext-std - * Note that the following is not tail recursive, in this case I - * have chosen such implementation because the feature list is small - * and the implementation looks readable and fairly self-contained. - * Do not use this pattern for lists that can be long. *) - List.fold_right - (fun f acc -> match get_feature f with Some v -> v :: acc | None -> acc) - all_features [] + List.filter enabled all_features diff --git a/ocaml/xapi-types/secretString.ml b/ocaml/xapi-types/secretString.ml index 781dac86697..b552e46edfd 100644 --- a/ocaml/xapi-types/secretString.ml +++ b/ocaml/xapi-types/secretString.ml @@ -24,6 +24,8 @@ let write_to_channel c s = output_string c s let equal = String.equal +let length = String.length + let pool_secret = "pool_secret" let with_cookie t cookies = (pool_secret, t) :: cookies diff --git a/ocaml/xapi-types/secretString.mli b/ocaml/xapi-types/secretString.mli index 82d97eaaa72..1f14b6a9e95 100644 --- a/ocaml/xapi-types/secretString.mli +++ b/ocaml/xapi-types/secretString.mli @@ -16,7 +16,7 @@ * It is still possible to convert it to Rpc.t and recover it that way, * it is not a protection against willfully recovering the protected string * (we do need to send these as parameters in RPCs). - * *) + *) (** a type with no direct conversions to string *) type t @@ -25,6 +25,8 @@ val of_string : string -> t val equal : t -> t -> bool +val length : t -> int + val json_rpc_of_t : t -> Rpc.t val t_of_rpc : Rpc.t -> t diff --git a/ocaml/xapi/cancel_tasks.ml b/ocaml/xapi/cancel_tasks.ml index 690cd1026b1..b15e0aac057 100644 --- a/ocaml/xapi/cancel_tasks.ml +++ b/ocaml/xapi/cancel_tasks.ml @@ -21,6 +21,7 @@ open D let safe_wrapper n f x = try f x with e -> + Backtrace.is_important e ; debug "Caught exception while cancelling tasks (%s): %s" n (ExnHelper.string_of_exn e) ; Debug.log_backtrace e (Backtrace.get e) @@ -83,14 +84,14 @@ let update_all_allowed_operations ~__context = in let vbd_records = List.map - (fun vbd -> (vbd, Db.VBD.get_record_internal ~__context ~self:vbd)) + (fun vbd -> Db.VBD.get_record_internal ~__context ~self:vbd) all_vbds in List.iter (safe_wrapper "allowed_ops - VDIs" (fun self -> let relevant_vbds = List.filter - (fun (_, vbd_record) -> vbd_record.Db_actions.vBD_VDI = self) + (fun vbd_record -> vbd_record.Db_actions.vBD_VDI = self) vbd_records in Xapi_vdi.update_allowed_operations_internal ~__context ~self diff --git a/ocaml/xapi/cert_refresh.ml b/ocaml/xapi/cert_refresh.ml index 12ab75dc230..213d0abc224 100644 --- a/ocaml/xapi/cert_refresh.ml +++ b/ocaml/xapi/cert_refresh.ml @@ -79,7 +79,7 @@ let host ~__context ~type' = Server_error (cannot_contact_host, [Ref.string_of (HostSet.choose unreachable)]) ) ; - let content = X509.Certificate.encode_pem cert |> Cstruct.to_string in + let content = X509.Certificate.encode_pem cert in (* distribute public part of new cert in pool *) Cert_distrib.distribute_new_host_cert ~__context ~host ~content ; (* replace certs in file system on host *) diff --git a/ocaml/xapi/certificates.ml b/ocaml/xapi/certificates.ml index 4d9702bb439..f0a75779642 100644 --- a/ocaml/xapi/certificates.ml +++ b/ocaml/xapi/certificates.ml @@ -20,19 +20,19 @@ module D = Debug.Make (struct let name = "certificates" end) open D (* Certificate locations: - * a) stunnel external = /etc/xensource/xapi-ssl.pem - * b) stunnel SNI (internal) = /etc/xensource/xapi-pool-tls.pem - * c) user trusted cert folder = /etc/stunnel/certs/ - * d) internal trusted cert folder = /etc/stunnel/certs-pool/ - * e) appliance trusted bundle = /etc/stunnel/xapi-stunnel-ca-bundle.pem - * f) host-in-pool trusted bundle = /etc/stunnel/xapi-pool-ca-bundle.pem - * - * Note that the bundles (e) and (f) are generated automatically using the contents of (c) and (d) respectively *) + * a) stunnel external = /etc/xensource/xapi-ssl.pem + * b) stunnel SNI (internal) = /etc/xensource/xapi-pool-tls.pem + * c) user trusted cert folder = /etc/stunnel/certs/ + * d) internal trusted cert folder = /etc/stunnel/certs-pool/ + * e) appliance trusted bundle = /etc/stunnel/xapi-stunnel-ca-bundle.pem + * f) host-in-pool trusted bundle = /etc/stunnel/xapi-pool-ca-bundle.pem + * + * Note that the bundles (e) and (f) are generated automatically using the contents of (c) and (d) respectively *) type t_trusted = CA_Certificate | CRL let pem_of_string x = - match Cstruct.of_string x |> X509.Certificate.decode_pem with + match X509.Certificate.decode_pem x with | Error _ -> D.error "pem_of_string: failed to parse certificate string" ; raise @@ -75,7 +75,7 @@ let to_string = function CA_Certificate -> "CA certificate" | CRL -> "CRL" adding a colon between every octet, in uppercase. *) let pp_hash hash = - let hex = Hex.(show @@ of_cstruct hash) in + let hex = Hex.(show @@ of_string hash) in let length = (3 * String.length hex / 2) - 1 in let value_of i = match (i + 1) mod 3 with @@ -441,9 +441,7 @@ let get_internal_server_certificate () = open Rresult let hostnames_of_pem_cert pem = - Cstruct.of_string pem - |> X509.Certificate.decode_pem - >>| X509.Certificate.hostnames + X509.Certificate.decode_pem pem >>| X509.Certificate.hostnames let install_server_certificate ~pem_chain ~pem_leaf ~pkcs8_private_key ~path = let installation = diff --git a/ocaml/xapi/certificates.mli b/ocaml/xapi/certificates.mli index 064c7e47e31..6776220df45 100644 --- a/ocaml/xapi/certificates.mli +++ b/ocaml/xapi/certificates.mli @@ -18,10 +18,9 @@ type t_trusted = CA_Certificate | CRL val pem_of_string : string -> X509.Certificate.t -val pp_hash : Cstruct.t -> string +val pp_hash : string -> string -val pp_fingerprint : - hash_type:Mirage_crypto.Hash.hash -> X509.Certificate.t -> string +val pp_fingerprint : hash_type:Digestif.hash' -> X509.Certificate.t -> string val validate_name : t_trusted -> string -> unit diff --git a/ocaml/xapi/certificates_sync.ml b/ocaml/xapi/certificates_sync.ml index a9691adf298..2ab3492ffa8 100644 --- a/ocaml/xapi/certificates_sync.ml +++ b/ocaml/xapi/certificates_sync.ml @@ -57,10 +57,8 @@ let get_server_cert path = | Error msg -> Error (`Msg (msg, [])) | Ok cert -> - let host_pem = cert.GP.host_cert in let* host_cert = - Cstruct.of_string host_pem - |> X509.Certificate.decode_pem + X509.Certificate.decode_pem cert.GP.host_cert |> R.reword_error (fun (`Msg msg) -> D.info {|Failed to decode certificate because "%s"|} msg ; `Msg (server_certificate_invalid, []) diff --git a/ocaml/xapi/check-no-lwtssl.sh b/ocaml/xapi/check-no-lwtssl.sh new file mode 100755 index 00000000000..c7065d0dbc0 --- /dev/null +++ b/ocaml/xapi/check-no-lwtssl.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +SSL=libssl +CRYPTO=libcrypto +DEPS="${SSL}|${CRYPTO}" + +ldd "$1" | grep -q -E "${DEPS}" 2>&1 +if [ $? -eq 1 ]; then + echo -e "\n\033[32;1m[OK]\033[0m $1 does not depend on ${DEPS}"; + exit 0 +else + echo -e "\n\033[31;1m[ERROR]\033[0m $1 depends on ${DEPS}"; + exit 1 +fi diff --git a/ocaml/xapi/console.ml b/ocaml/xapi/console.ml index 03cb4bf9559..b812cf65c76 100644 --- a/ocaml/xapi/console.ml +++ b/ocaml/xapi/console.ml @@ -185,7 +185,7 @@ let console_of_request __context req = let db = Context.database_of __context in let is_vm, _ = let module DB = - (val Xapi_database.Db_cache.get db : Xapi_database.Db_interface.DB_ACCESS) + (val Xapi_database.Db_cache.get db : Xapi_database.Db_interface.DB_ACCESS2) in match DB.get_table_from_ref db _ref with | Some c when c = Db_names.vm -> diff --git a/ocaml/xapi/context.ml b/ocaml/xapi/context.ml index b71ed4ca234..a49c8ecd1bb 100644 --- a/ocaml/xapi/context.ml +++ b/ocaml/xapi/context.ml @@ -504,11 +504,40 @@ let get_client_ip context = let get_user_agent context = match context.origin with Internal -> None | Http (rq, _) -> rq.user_agent -let with_tracing ?originator ~__context name f = +let finally_destroy_context ~__context f = + let tracing = __context.tracing in + Xapi_stdext_pervasives.Pervasiveext.finally + (fun () -> f __context) + (fun () -> + __context.tracing <- tracing ; + destroy __context ; + __context.tracing <- None + ) + +let with_context ?http_other_config ?quiet ?subtask_of ?session_id ?database + ?task_in_database ?task_description ?origin task_name f = + let __context = + make ?http_other_config ?quiet ?subtask_of ?session_id ?database + ?task_in_database ?task_description ?origin task_name + in + finally_destroy_context ~__context f + +let with_subcontext ~__context ?task_in_database task_name f = + let __context = make_subcontext ~__context ?task_in_database task_name in + finally_destroy_context ~__context f + +let with_forwarded_task ?http_other_config ?session_id ?origin task_id f = + let __context = + from_forwarded_task ?http_other_config ?session_id ?origin task_id + in + finally_destroy_context ~__context f + +let with_tracing ?(attributes = []) ?originator ~__context name f = let open Tracing in let parent = __context.tracing in let span_attributes = Attributes.attr_of_originator originator + @ attributes @ make_attributes ~task_id:__context.task_id ?session_id:__context.session_id () in diff --git a/ocaml/xapi/context.mli b/ocaml/xapi/context.mli index 34e51afd2ee..ac3250f8569 100644 --- a/ocaml/xapi/context.mli +++ b/ocaml/xapi/context.mli @@ -146,7 +146,50 @@ val complete_tracing : ?error:exn * Printexc.raw_backtrace -> t -> unit val tracing_of : t -> Tracing.Span.t option +val finally_destroy_context : __context:t -> (t -> 'a) -> 'a +(** [finally_destroy_context ~context f] executes [f ~__context] and then + ensure [__context] is destroyed.*) + +val with_context : + ?http_other_config:(string * string) list + -> ?quiet:bool + -> ?subtask_of:API.ref_task + -> ?session_id:API.ref_session + -> ?database:Xapi_database.Db_ref.t + -> ?task_in_database:bool + -> ?task_description:string + -> ?origin:origin + -> string + -> (t -> 'a) + -> 'a +(** [with_context ?http_other_config ?quiet ?subtask_of ?session_id ?database + ?task_in_database ?task_description ?origin name f] creates a + context [__context], executes [f ~__context] and then ensure [__context] is + destroyed.*) + +val with_subcontext : + __context:t -> ?task_in_database:bool -> string -> (t -> 'a) -> 'a +(** [with_subcontext ~__context ?task_in_database name] creates a subcontext + [__context], executes [f ~__context] and then ensure `__context` is + destroyed.*) + +val with_forwarded_task : + ?http_other_config:(string * string) list + -> ?session_id:API.ref_session + -> ?origin:origin + -> API.ref_task + -> (t -> 'a) + -> 'a +(** [with_forwarded_task ?http_other_config ?session_id ?origin task f] + creates a context form frowarded task [task], executes [f ~__context] and + then ensure [__context] is destroyed.*) + val with_tracing : - ?originator:string -> __context:t -> string -> (t -> 'a) -> 'a + ?attributes:(string * string) list + -> ?originator:string + -> __context:t + -> string + -> (t -> 'a) + -> 'a val set_client_span : t -> Tracing.Span.t option diff --git a/ocaml/xapi/cpuid_helpers.ml b/ocaml/xapi/cpuid_helpers.ml index 1bf6731efad..d001b7ab73f 100644 --- a/ocaml/xapi/cpuid_helpers.ml +++ b/ocaml/xapi/cpuid_helpers.ml @@ -12,8 +12,6 @@ * GNU Lesser General Public License for more details. *) -open Xapi_globs - module D = Debug.Make (struct let name = "cpuid_helpers" end) open D @@ -24,20 +22,19 @@ let features_t t = (Xenops_interface.CPU_policy.of_string t) Xenops_interface.CPU_policy.to_string -let features = - Map_check.(field Xapi_globs.cpu_info_features_key (features_t `vm)) +let features = Map_check.(field Constants.cpu_info_features_key (features_t `vm)) let features_pv = - Map_check.(field Xapi_globs.cpu_info_features_pv_key (features_t `host)) + Map_check.(field Constants.cpu_info_features_pv_key (features_t `host)) let features_hvm = - Map_check.(field Xapi_globs.cpu_info_features_hvm_key (features_t `host)) + Map_check.(field Constants.cpu_info_features_hvm_key (features_t `host)) let features_pv_host = - Map_check.(field Xapi_globs.cpu_info_features_pv_host_key (features_t `host)) + Map_check.(field Constants.cpu_info_features_pv_host_key (features_t `host)) let features_hvm_host = - Map_check.(field Xapi_globs.cpu_info_features_hvm_host_key (features_t `host)) + Map_check.(field Constants.cpu_info_features_hvm_host_key (features_t `host)) let cpu_count = Map_check.(field "cpu_count" int) @@ -55,7 +52,7 @@ let get_flags_for_vm ~__context domain_type cpu_info = | `pv -> features_pv in - let vendor = List.assoc cpu_info_vendor_key cpu_info in + let vendor = List.assoc Constants.cpu_info_vendor_key cpu_info in let migration = Map_check.getf features_field cpu_info in (vendor, migration) @@ -124,16 +121,18 @@ let assert_vm_is_compatible ~__context ~vm ~host = get_host_compatibility_info ~__context ~domain_type ~host () in let vm_cpu_info = vm_rec.API.vM_last_boot_CPU_flags in - if List.mem_assoc cpu_info_vendor_key vm_cpu_info then ( + if List.mem_assoc Constants.cpu_info_vendor_key vm_cpu_info then ( (* Check the VM was last booted on a CPU with the same vendor as this host's CPU. *) - let vm_cpu_vendor = List.assoc cpu_info_vendor_key vm_cpu_info in + let vm_cpu_vendor = + List.assoc Constants.cpu_info_vendor_key vm_cpu_info + in debug "VM last booted on CPU of vendor %s; host CPUs are of vendor %s" vm_cpu_vendor host_cpu_vendor ; if vm_cpu_vendor <> host_cpu_vendor then fail "VM last booted on a host which had a CPU from a different vendor." ) ; - if List.mem_assoc cpu_info_features_key vm_cpu_info then ( + if List.mem_assoc Constants.cpu_info_features_key vm_cpu_info then ( (* Check the VM was last booted on a CPU whose features are a subset of the features of this host's CPU. *) let vm_cpu_features = Map_check.getf features vm_cpu_info in debug diff --git a/ocaml/xapi/create_misc.ml b/ocaml/xapi/create_misc.ml index cd0a97b4115..26d2c886d52 100644 --- a/ocaml/xapi/create_misc.ml +++ b/ocaml/xapi/create_misc.ml @@ -307,7 +307,7 @@ and create_domain_zero_console_record_with_protocol ~__context ~domain_zero_ref let location = Uri.( make ~scheme:"https" ~host:address ~path:Constants.console_uri - ~query:[("ref", [Ref.string_of domain_zero_ref])] + ~query:[("ref", [Ref.string_of console_ref])] () |> to_string ) @@ -579,16 +579,16 @@ let create_host_cpu ~__context host_info = ; ("model", cpu_info.model) ; ("stepping", cpu_info.stepping) ; ("flags", cpu_info.flags) - ; ( Xapi_globs.cpu_info_features_pv_key + ; ( Constants.cpu_info_features_pv_key , CPU_policy.to_string cpu_info.features_pv ) - ; ( Xapi_globs.cpu_info_features_hvm_key + ; ( Constants.cpu_info_features_hvm_key , CPU_policy.to_string cpu_info.features_hvm ) - ; ( Xapi_globs.cpu_info_features_hvm_host_key + ; ( Constants.cpu_info_features_hvm_host_key , CPU_policy.to_string cpu_info.features_hvm_host ) - ; ( Xapi_globs.cpu_info_features_pv_host_key + ; ( Constants.cpu_info_features_pv_host_key , CPU_policy.to_string cpu_info.features_pv_host ) ] @@ -698,8 +698,8 @@ let create_pool_cpuinfo ~__context = ("vendor", "") ; ("socket_count", "0") ; ("cpu_count", "0") - ; (Xapi_globs.cpu_info_features_pv_host_key, "") - ; (Xapi_globs.cpu_info_features_hvm_host_key, "") + ; (Constants.cpu_info_features_pv_host_key, "") + ; (Constants.cpu_info_features_hvm_host_key, "") ] in let pool_cpuinfo = List.fold_left merge zero all_host_cpus in diff --git a/ocaml/xapi/db.ml b/ocaml/xapi/db.ml index 4b4b6c2deea..f343086a2d2 100644 --- a/ocaml/xapi/db.ml +++ b/ocaml/xapi/db.ml @@ -23,5 +23,5 @@ let is_valid_ref __context r = false else let t = Context.database_of __context in - let module DB = (val Db_cache.get t : Db_interface.DB_ACCESS) in + let module DB = (val Db_cache.get t : Db_interface.DB_ACCESS2) in DB.is_valid_ref t (Ref.string_of r) diff --git a/ocaml/xapi/db_gc.ml b/ocaml/xapi/db_gc.ml index c8c68309369..50e2a23a6a1 100644 --- a/ocaml/xapi/db_gc.ml +++ b/ocaml/xapi/db_gc.ml @@ -165,16 +165,18 @@ let detect_rolling_upgrade ~__context = in (* Resynchronise *) if actually_in_progress <> pool_says_in_progress then ( - let platform_versions = + let host_versions = List.map (fun host -> - Helpers.version_string_of ~__context (Helpers.LocalObject host) + Helpers.get_software_versions ~__context + (Helpers.LocalObject host) + |> Helpers.versions_string_of ) (Db.Host.get_all ~__context) in debug "xapi platform version = %s; host platform versions = [ %s ]" (Xapi_version.platform_version ()) - (String.concat "; " platform_versions) ; + (String.concat "; " host_versions) ; warn "Pool thinks rolling upgrade%s in progress but Host version \ numbers indicate otherwise; correcting" diff --git a/ocaml/xapi/db_gc_util.ml b/ocaml/xapi/db_gc_util.ml index 202b51cc5eb..bcacc7d86c0 100644 --- a/ocaml/xapi/db_gc_util.ml +++ b/ocaml/xapi/db_gc_util.ml @@ -28,7 +28,7 @@ let gc_connector ~__context get_all get_record valid_ref1 valid_ref2 delete_record = let db = Context.database_of __context in let module DB = - (val Xapi_database.Db_cache.get db : Xapi_database.Db_interface.DB_ACCESS) + (val Xapi_database.Db_cache.get db : Xapi_database.Db_interface.DB_ACCESS2) in let all_refs = get_all ~__context in let do_gc ref = diff --git a/ocaml/xapi/dbsync_master.ml b/ocaml/xapi/dbsync_master.ml index efd5be874a0..f8316b81993 100644 --- a/ocaml/xapi/dbsync_master.ml +++ b/ocaml/xapi/dbsync_master.ml @@ -54,7 +54,7 @@ let create_pool_record ~__context = ~update_sync_day:0L ~update_sync_enabled:false ~local_auth_max_threads:8L ~ext_auth_max_threads:1L ~ext_auth_cache_enabled:false ~ext_auth_cache_size:50L ~ext_auth_cache_expiry:300L ~recommendations:[] - ~license_server:[] + ~license_server:[] ~ha_reboot_vm_on_internal_shutdown:true let set_master_ip ~__context = let ip = diff --git a/ocaml/xapi/dbsync_slave.ml b/ocaml/xapi/dbsync_slave.ml index 366990e2692..ab8a6a3ef24 100644 --- a/ocaml/xapi/dbsync_slave.ml +++ b/ocaml/xapi/dbsync_slave.ml @@ -60,6 +60,11 @@ let create_localhost ~__context info = ~license_server:[("address", "localhost"); ("port", "27000")] ~local_cache_sr:Ref.null ~chipset_info:[] ~ssl_legacy:false ~last_software_update:Date.epoch ~last_update_hash:"" + ~ssh_enabled:Constants.default_ssh_enabled + ~ssh_enabled_timeout:Constants.default_ssh_enabled_timeout + ~ssh_expiry:Date.epoch + ~console_idle_timeout:Constants.default_console_idle_timeout + ~ssh_auto_mode:!Xapi_globs.ssh_auto_mode_default in () @@ -376,5 +381,32 @@ let update_env __context sync_keys = Create_misc.create_chipset_info ~__context info ) ; switched_sync Xapi_globs.sync_gpus (fun () -> Xapi_pgpu.update_gpus ~__context) ; + switched_sync Xapi_globs.sync_ssh_status (fun () -> + let ssh_service = !Xapi_globs.ssh_service in + let status = Fe_systemctl.is_active ~service:ssh_service in + Db.Host.set_ssh_enabled ~__context ~self:localhost ~value:status ; + let auto_mode_in_db = + Db.Host.get_ssh_auto_mode ~__context ~self:localhost + in + let ssh_monitor_enabled = + Fe_systemctl.is_active ~service:!Xapi_globs.ssh_monitor_service + in + (* For xs9 when fresh install, the ssh_monitor service is not enabled by default. + If the auto_mode is enabled, we need to enable the ssh_monitor service. + and user may have disabled monitor service by mistake as well, so we need to check the status. *) + if auto_mode_in_db <> ssh_monitor_enabled then + Xapi_host.set_ssh_auto_mode ~__context ~self:localhost + ~value:auto_mode_in_db ; + let console_timeout = + Db.Host.get_console_idle_timeout ~__context ~self:localhost + in + let console_timeout_file_exists = + Sys.file_exists !Xapi_globs.console_timeout_profile_path + in + (* Ensure the console timeout profile file exists if the timeout is configured *) + if console_timeout > 0L && not console_timeout_file_exists then + Xapi_host.set_console_idle_timeout ~__context ~self:localhost + ~value:console_timeout + ) ; remove_pending_guidances ~__context diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index b74a1ecc16d..3b0f0e1b843 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -1,288 +1,342 @@ (rule - (target server.ml) - (deps - (:gen ../idl/ocaml_backend/gen_api_main.exe) - ) - (action - (with-stdout-to %{target} - (run %{gen} server --gen-debug --filter-internal --filter closed))) -) + (target server.ml) + (deps + (:gen ../idl/ocaml_backend/gen_api_main.exe)) + (action + (with-stdout-to + %{target} + (run %{gen} server --gen-debug --filter-internal --filter closed)))) (rule - (target db_actions.ml) - (deps - (:gen ../idl/ocaml_backend/gen_api_main.exe) - ) - (action - (with-stdout-to %{target} - (run %{gen} db --filter nothing))) -) + (target db_actions.ml) + (deps + (:gen ../idl/ocaml_backend/gen_api_main.exe)) + (action + (with-stdout-to + %{target} + (run %{gen} db --filter nothing)))) (rule - (target custom_actions.ml) - (deps - (:gen ../idl/ocaml_backend/gen_api_main.exe) - ) - (action - (with-stdout-to %{target} - (run %{gen} actions --filter-internal --filter closed))) -) + (target custom_actions.ml) + (deps + (:gen ../idl/ocaml_backend/gen_api_main.exe)) + (action + (with-stdout-to + %{target} + (run %{gen} actions --filter-internal --filter closed)))) (rule - (target rbac_static.ml) - (deps - (:gen ../idl/ocaml_backend/gen_api_main.exe) - ) - (action - (with-stdout-to %{target} - (run %{gen} rbac --filter-internal --filter closed))) -) + (target rbac_static.ml) + (deps + (:gen ../idl/ocaml_backend/gen_api_main.exe)) + (action + (with-stdout-to + %{target} + (run %{gen} rbac --filter-internal --filter closed)))) (rule - (target rbac_static.csv) - (deps - (:gen ../idl/ocaml_backend/gen_api_main.exe) - ) - (action - (with-stdout-to %{target} - (run %{gen} rbac --gen-debug --filter-internal --filter closed))) -) + (target rbac_static.csv) + (deps + (:gen ../idl/ocaml_backend/gen_api_main.exe)) + (action + (with-stdout-to + %{target} + (run %{gen} rbac --gen-debug --filter-internal --filter closed)))) (install - (package xapi-debug) - (section share_root) - (files rbac_static.csv) -) - + (package xapi-debug) + (section share_root) + (files rbac_static.csv)) (library - (name xapi_internal_minimal) - (modules context custom_actions xapi_globs server_helpers session_check rbac rbac_audit db_actions taskHelper eventgen locking_helpers exnHelper rbac_static xapi_role xapi_extensions db) - (modes best) - (wrapped false) - (libraries - http_lib - httpsvr - ipaddr - xapi-types - xapi_database - mtime - tracing - tracing_propagator - uuid - rpclib.core - threads.posix - fmt - clock - astring - stunnel - sexplib0 - sexplib - sexpr - tgroup - forkexec - xapi-idl - xapi_aux - xapi-stdext-std - xapi-stdext-pervasives - xapi-backtrace - xapi-datamodel - xapi-consts - xapi_version - xapi-stdext-threads - xapi-stdext-unix - rpclib.xml - xapi-log) -) + (name xapi_internal_minimal) + (modules + context + custom_actions + xapi_globs + server_helpers + session_check + rbac + rbac_audit + db_actions + taskHelper + eventgen + locking_helpers + exnHelper + rbac_static + xapi_role + xapi_extensions + db) + (modes best) + (wrapped false) + (libraries + http_lib + httpsvr + ipaddr + xapi-types + xapi_database + mtime + tracing + tracing_propagator + uuid + rpclib.core + threads.posix + fmt + clock + astring + stunnel + sexplib0 + sexplib + sexpr + tgroup + forkexec + xapi-idl + xapi_aux + xapi-stdext-std + xapi-stdext-pervasives + xapi-backtrace + xapi-datamodel + xapi-consts + xapi_version + xapi-stdext-threads + xapi-stdext-unix + rpclib.xml + xapi-log)) (library - (name xapi_internal) - (wrapped false) - (modes best) - (modules (:standard \ - xapi_main server api_server xapi custom_actions context xapi_globs server_helpers session_check rbac rbac_audit rbac_static db_actions taskHelper eventgen locking_helpers exnHelper xapi_role xapi_extensions db)) - (libraries - angstrom - astring - cstruct - base64 - clock - cohttp - cohttp_posix - domain-name - ezxenstore.core - fmt - forkexec - gencertlib - gzip - hex - http_lib - httpsvr - ipaddr - ipaddr.unix - magic-mime - message-switch-core - message-switch-unix - mirage-crypto - mirage-crypto-rng - mirage-crypto-rng.unix - mtime - mtime.clock.os - pam - pciutil - pci - psq - ptime - ptime.clock.os - rpclib.core - rpclib.json - rpclib.xml - re - result - rresult - rrd-transport.lib - rrd-transport.file - rrdd-plugin.base - rrdd-plugin.local - sexplib - sexplib0 - sexpr - sha - stunnel - tapctl - tar - tar-unix - tgroup - threads.posix - tracing - tracing_propagator - unixpwd - uri - uuid - uuidm - vhd_lib - x509 - xapi_aux - xapi-backtrace - (re_export xapi-consts) - xapi-consts.xapi_version - xapi-client - xapi-cli-protocol - xapi_cli_server - xapi_database - xapi-datamodel - xapi-idl - xapi-idl.cluster - xapi-idl.rrd - xapi-idl.rrd.interface - xapi-idl.rrd.interface.types - xapi-idl.storage - xapi-idl.storage.interface - xapi-idl.storage.interface.types - xapi-idl.xen - xapi-idl.xen.interface - xapi-idl.xen.interface.types - xapi-idl.network - xapi-idl.v6 - xapi-idl.memory - xapi-idl.gpumon - xapi-idl.updates - (re_export xapi_internal_minimal) - xapi-inventory - xapi-log - xapi-open-uri - xapi-rrd - (re_export xapi-types) - xapi-stdext-encodings - xapi-stdext-pervasives - xapi-stdext-std - xapi-stdext-threads - xapi-stdext-threads.scheduler - xapi-stdext-unix - xapi-stdext-zerocheck - xapi-tracing - xapi-tracing-export - xapi_version - xapi_xenopsd - xenstore_transport.unix - xml-light2 - xmlm - xxhash - yojson - zstd - xapi_host_driver_helpers - ) - (preprocess (per_module - ((pps ppx_sexp_conv) Cert_distrib) - ((pps ppx_deriving.ord) Xapi_observer_components) + (name xapi_internal) + (wrapped false) + (modes best) + (modules + (:standard + \ + xapi_main + server + api_server + xapi + custom_actions + context + xapi_globs + server_helpers + session_check + rbac + rbac_audit + rbac_static + db_actions + taskHelper + eventgen + locking_helpers + exnHelper + xapi_role + xapi_extensions + db)) + (libraries + angstrom + astring + cstruct + base64 + clock + cohttp + cohttp_posix + digestif + domain-name + ezxenstore.core + fmt + forkexec + gencertlib + gzip + hex + http_lib + httpsvr + ipaddr + ipaddr.unix + magic-mime + message-switch-core + message-switch-unix + mirage-crypto + mirage-crypto-rng + mirage-crypto-rng.unix + mtime + mtime.clock.os + pam + pciutil + pci + psq + ptime + ptime.clock.os + rpclib.core + rpclib.json + rpclib.xml + re + result + rresult + rrd-transport.lib + rrd-transport.file + rrdd-plugin.base + rrdd-plugin.local + sexplib + sexplib0 + sexpr + sha + stunnel + tapctl + tar + tar-unix + tgroup + threads.posix + tracing + tracing_propagator + unixpwd + uri + uuid + uuidm + x509 + xapi_aux + xapi-backtrace + (re_export xapi-consts) + xapi-consts.xapi_version + xapi-client + xapi-cli-protocol + xapi_cli_server + xapi_database + xapi-datamodel + xapi-idl + xapi-idl.cluster + xapi-idl.rrd + xapi-idl.rrd.interface + xapi-idl.rrd.interface.types + xapi-idl.storage + xapi-idl.storage.interface + xapi-idl.storage.interface.types + xapi-idl.xen + xapi-idl.xen.interface + xapi-idl.xen.interface.types + xapi-idl.network + xapi-idl.v6 + xapi-idl.memory + xapi-idl.gpumon + xapi-idl.updates + (re_export xapi_internal_minimal) + xapi-inventory + xapi-log + xapi-open-uri + xapi-rrd + (re_export xapi-types) + xapi-stdext-encodings + xapi-stdext-pervasives + xapi-stdext-std + xapi-stdext-threads + xapi-stdext-threads.scheduler + xapi-stdext-unix + xapi-stdext-zerocheck + xapi-tracing + xapi-tracing-export + xapi_version + xapi_xenopsd + xenstore_transport.unix + xml-light2 + xmlm + xxhash + yojson + zstd + ) + (preprocess + (per_module + ((pps ppx_sexp_conv) + Cert_distrib) + ((pps ppx_deriving.ord) + Xapi_observer_components) ((pps ppx_deriving_rpc) - Config_file_sync Extauth_plugin_ADwinbind Importexport Sparse_dd_wrapper - Storage_migrate Storage_mux Storage_smapiv1_wrapper Stream_vdi - System_domains Xapi_psr Xapi_services Xapi_udhcpd))) -) + Config_file_sync + Extauth_plugin_ADwinbind + Importexport + Sparse_dd_wrapper + Storage_migrate + Storage_migrate_helper + Storage_mux + Storage_smapiv1_wrapper + Stream_vdi + System_domains + Xapi_psr + Xapi_services + Xapi_udhcpd)))) (library - (name xapi_internal_server_only) - (modes best) - (modules server) - (libraries xapi_database xapi_internal_minimal http_lib rpclib.core xapi-types xapi-log xapi-stdext-encodings xapi-consts xapi-backtrace clock rpclib.json) - (wrapped false) -) + (name xapi_internal_server_only) + (modes best) + (modules server) + (libraries + xapi_database + xapi_internal_minimal + http_lib + rpclib.core + xapi-types + xapi-log + xapi-stdext-encodings + xapi-consts + xapi-backtrace + clock + rpclib.json) + (wrapped false)) (library - (name xapi_internal_server) - (modes best) - (wrapped false) - (modules api_server xapi) - (libraries - clock - forkexec - http_lib - httpsvr - rpclib.core - rpclib.json - rpclib.xml - stunnel - tgroup - threads.posix - tracing - tracing_propagator - xapi-backtrace - xapi-client - xapi-consts - xapi-datamodel - xapi_internal_minimal - xapi-idl - xapi-inventory - (re_export xapi_internal_server_only) - xapi-log - xapi-stdext-encodings - xapi-stdext-pervasives - xapi-stdext-std - xapi-stdext-threads - xapi-stdext-threads.scheduler - xapi-stdext-unix - xapi-types - xapi_aux - xapi-consts.xapi_version - xapi_cli_server - xapi_database - xapi_internal) -) + (name xapi_internal_server) + (modes best) + (wrapped false) + (modules api_server xapi) + (libraries + clock + forkexec + http_lib + httpsvr + rpclib.core + rpclib.json + rpclib.xml + stunnel + tgroup + threads.posix + tracing + tracing_propagator + xapi-backtrace + xapi-client + xapi-consts + xapi-datamodel + xapi_internal_minimal + xapi-idl + xapi-inventory + (re_export xapi_internal_server_only) + xapi-log + xapi-stdext-encodings + xapi-stdext-pervasives + xapi-stdext-std + xapi-stdext-threads + xapi-stdext-threads.scheduler + xapi-stdext-unix + xapi-types + xapi_aux + xapi-consts.xapi_version + xapi_cli_server + xapi_database + xapi_internal)) (executable - (modes exe) - (name xapi_main) - (public_name xapi) - (package xapi) - (modules xapi_main) - (libraries - xapi_internal - xapi_internal_server - xapi_internal_minimal - xapi-idl - xapi-log - xapi-stdext-unix - ) -) + (modes exe) + (name xapi_main) + (public_name xapi) + (package xapi) + (modules xapi_main) + (libraries + xapi_internal + xapi_internal_server + xapi_internal_minimal + xapi-idl + xapi-log + xapi-stdext-unix)) +(rule + (alias runtest) + (package xapi) + (deps + (:x xapi_main.exe)) + (action + (run ./check-no-lwtssl.sh %{x}))) diff --git a/ocaml/xapi/eventgen.ml b/ocaml/xapi/eventgen.ml index 753bb8fdf7b..810b30bd80b 100644 --- a/ocaml/xapi/eventgen.ml +++ b/ocaml/xapi/eventgen.ml @@ -107,7 +107,7 @@ open Xapi_database.Db_action_helper let is_valid_ref db = function | Schema.Value.String r -> ( try - ignore (Database.table_of_ref r db) ; + ignore (Database.table_of_ref (r :> string) db) ; true with _ -> false ) diff --git a/ocaml/xapi/export.ml b/ocaml/xapi/export.ml index 81dcb22bc44..3c00b544f73 100644 --- a/ocaml/xapi/export.ml +++ b/ocaml/xapi/export.ml @@ -713,11 +713,15 @@ open Http open Client let lock_vm ~__context ~vm ~task_id op = - (* Note slight race here because we haven't got the master lock *) - Xapi_vm_lifecycle.assert_operation_valid ~__context ~self:vm ~op ~strict:true ; - (* ... small race lives here ... *) - Db.VM.add_to_current_operations ~__context ~self:vm ~key:task_id ~value:op ; - Xapi_vm_lifecycle.update_allowed_operations ~__context ~self:vm + Helpers.retry ~__context ~doc:task_id ~policy:Helpers.Policy.fail_quickly + (fun () -> + (* Note slight race here because we haven't got the master lock *) + Xapi_vm_lifecycle.assert_operation_valid ~__context ~self:vm ~op + ~strict:true ; + (* ... small race lives here ... *) + Db.VM.add_to_current_operations ~__context ~self:vm ~key:task_id ~value:op ; + Xapi_vm_lifecycle.update_allowed_operations ~__context ~self:vm + ) let unlock_vm ~__context ~vm ~task_id = Db.VM.remove_from_current_operations ~__context ~self:vm ~key:task_id ; diff --git a/ocaml/xapi/export_raw_vdi.ml b/ocaml/xapi/export_raw_vdi.ml index cea32fb5533..df3d778d579 100644 --- a/ocaml/xapi/export_raw_vdi.ml +++ b/ocaml/xapi/export_raw_vdi.ml @@ -47,12 +47,18 @@ let localhost_handler rpc session_id vdi (req : Http.Request.t) let copy base_path path size = try debug "Copying VDI contents..." ; - Vhd_tool_wrapper.send ?relative_to:base_path - (Vhd_tool_wrapper.update_task_progress __context) - "none" - (Importexport.Format.to_string format) - s path size "" ; - debug "Copying VDI complete." + match format with + | Qcow -> + Qcow_tool_wrapper.send ?relative_to:base_path + (Qcow_tool_wrapper.update_task_progress __context) + s path size + | Vhd | Tar | Raw -> + Vhd_tool_wrapper.send ?relative_to:base_path + (Vhd_tool_wrapper.update_task_progress __context) + "none" + (Importexport.Format.to_string format) + s path size "" ; + debug "Copying VDI complete." with Unix.Unix_error (Unix.EIO, _, _) -> raise (Api_errors.Server_error @@ -73,7 +79,7 @@ let localhost_handler rpc session_id vdi (req : Http.Request.t) in Http_svr.headers s headers ; match format with - | Raw | Vhd -> + | Raw | Vhd | Qcow -> let size = Db.VDI.get_virtual_size ~__context ~self:vdi in if format = Vhd && size > Constants.max_vhd_size then raise diff --git a/ocaml/xapi/extauth_plugin_ADpbis.ml b/ocaml/xapi/extauth_plugin_ADpbis.ml index 56f723ff6f3..ea2dedfccc6 100644 --- a/ocaml/xapi/extauth_plugin_ADpbis.ml +++ b/ocaml/xapi/extauth_plugin_ADpbis.ml @@ -942,10 +942,9 @@ module AuthADlw : Auth_signature.AUTH_MODULE = struct Db.Host.get_external_auth_service_name ~__context ~self:host ) in - if - List.mem_assoc "domain" config_params - (* legacy test: do we have domain name in config? *) - then (* then config:domain must match service-name *) + (* legacy test: do we have domain name in config? + then config:domain must match service-name *) + if List.mem_assoc "domain" config_params then let _domain = List.assoc "domain" config_params in if service_name <> _domain then raise diff --git a/ocaml/xapi/extauth_plugin_ADwinbind.ml b/ocaml/xapi/extauth_plugin_ADwinbind.ml index 6f51eea9cc5..214a941fe07 100644 --- a/ocaml/xapi/extauth_plugin_ADwinbind.ml +++ b/ocaml/xapi/extauth_plugin_ADwinbind.ml @@ -208,7 +208,7 @@ module Ldap = struct (* * Escape characters according to * https://docs.microsoft.com/en-gb/windows/win32/adsi/search-filter-syntax?redirectedfrom=MSDN#special-characters - * *) + *) let reg_star = {|*|} |> Re.str |> Re.compile @@ -402,7 +402,7 @@ module Ldap = struct let* stdout = try (* Query KDC instead of use domain here - * Just in case cannot resolve domain name from DNS *) + * Just in case cannot resolve domain name from DNS *) let args = [ "ads" @@ -553,7 +553,7 @@ module Wbinfo = struct * Name : UCC * Alt_Name : ucc.local * SID : S-1-5-21-2850064427-2368465266-4270348630 - * *) + *) let args = ["--domain-info"; from_name] in let* stdout = call_wbinfo args in let key = @@ -614,7 +614,7 @@ module Wbinfo = struct * CHILD1 * GRANDCHILD * UDDCHILD1 - * *) + *) let args = ["--all-domains"] in let* stdout = call_wbinfo args in Ok @@ -686,11 +686,30 @@ module Wbinfo = struct let parse_uid_info stdout = (* looks like one line from /etc/passwd: https://en.wikipedia.org/wiki/Passwd#Password_file *) match String.split_on_char ':' stdout with - | [user_name; _passwd; uid; gid; gecos; _homedir; _shell] -> ( - try Ok {user_name; uid= int_of_string uid; gid= int_of_string gid; gecos} - with _ -> Error () - ) + | user_name :: _passwd :: uid :: gid :: rest -> ( + (* We expect at least homedir and shell at the end *) + let rest = List.rev rest in + match rest with + | _shell :: _homedir :: tail -> ( + (* Rev it back to original order *) + let tail = List.rev tail in + let gecos = String.concat ":" tail in + try + Ok + { + user_name + ; uid= int_of_string uid + ; gid= int_of_string gid + ; gecos + } + with _ -> Error () + ) + | _ -> + debug "%s uid_info format error: %s" __FUNCTION__ stdout ; + Error () + ) | _ -> + debug "%s uid_info format error: %s" __FUNCTION__ stdout ; Error () let uid_info_of_uid (uid : int) = @@ -815,7 +834,6 @@ let query_domain_workgroup ~domain = with _ -> raise (Auth_service_error (E_LOOKUP, err_msg)) let config_winbind_daemon ~workgroup ~netbios_name ~domain = - let open Xapi_stdext_unix in let smb_config = "/etc/samba/smb.conf" in let allow_fallback = (*`allow kerberos auth fallback` depends on our internal samba patch, @@ -825,51 +843,41 @@ let config_winbind_daemon ~workgroup ~netbios_name ~domain = * upgrade to samba packages with this capacity *) if !Xapi_globs.winbind_allow_kerberos_auth_fallback then "yes" else "no" in - let conf_contents = - match (workgroup, netbios_name, domain) with - | Some wkgroup, Some netbios, Some dom -> - String.concat "\n" - [ - "# auto-generated by xapi" - ; "[global]" - ; "kerberos method = secrets and keytab" - ; Printf.sprintf "realm = %s" dom - ; "security = ADS" - ; "template shell = /bin/bash" - ; "winbind refresh tickets = yes" - ; "winbind enum groups = no" - ; "winbind enum users = no" - ; "winbind scan trusted domains = yes" - ; "winbind use krb5 enterprise principals = yes" - ; Printf.sprintf "winbind cache time = %d" - !Xapi_globs.winbind_cache_time - ; Printf.sprintf "machine password timeout = 0" - ; Printf.sprintf "kerberos encryption types = %s" - (Kerberos_encryption_types.Winbind.to_string - !Xapi_globs.winbind_kerberos_encryption_type - ) - ; Printf.sprintf "workgroup = %s" wkgroup - ; Printf.sprintf "netbios name = %s" netbios - ; "idmap config * : range = 3000000-3999999" - ; Printf.sprintf "idmap config %s: backend = rid" dom - ; Printf.sprintf "idmap config %s: range = 2000000-2999999" dom - ; Printf.sprintf "log level = %s" (debug_level ()) - ; Printf.sprintf "allow kerberos auth fallback = %s" allow_fallback - ; "idmap config * : backend = tdb" - ; "" (* Empty line at the end *) - ] - | _ -> - String.concat "\n" - [ - "# autogenerated by xapi"; "[global]"; "" (* Empty line at the end *) - ] - in - - let len = String.length conf_contents in - Unixext.atomic_write_to_file smb_config 0o0644 (fun fd -> - let (_ : int) = Unix.single_write_substring fd conf_contents 0 len in - Unix.fsync fd + ( match (workgroup, netbios_name, domain) with + | Some wkgroup, Some netbios, Some dom -> + [ + "# autogenerated by xapi" + ; "[global]" + ; "kerberos method = secrets and keytab" + ; Printf.sprintf "realm = %s" dom + ; "security = ADS" + ; "template shell = /bin/bash" + ; "winbind refresh tickets = yes" + ; "winbind enum groups = no" + ; "winbind enum users = no" + ; "winbind scan trusted domains = yes" + ; "winbind use krb5 enterprise principals = yes" + ; Printf.sprintf "winbind cache time = %d" !Xapi_globs.winbind_cache_time + ; Printf.sprintf "machine password timeout = 0" + ; Printf.sprintf "kerberos encryption types = %s" + (Kerberos_encryption_types.Winbind.to_string + !Xapi_globs.winbind_kerberos_encryption_type + ) + ; Printf.sprintf "workgroup = %s" wkgroup + ; Printf.sprintf "netbios name = %s" netbios + ; "idmap config * : range = 3000000-3999999" + ; Printf.sprintf "idmap config %s: backend = rid" dom + ; Printf.sprintf "idmap config %s: range = 2000000-2999999" dom + ; Printf.sprintf "log level = %s" (debug_level ()) + ; Printf.sprintf "allow kerberos auth fallback = %s" allow_fallback + ; "idmap config * : backend = tdb" + ; "" (* Empty line at the end *) + ] + | _ -> + ["# autogenerated by xapi"; "[global]"; "" (* Empty line at the end *)] ) + |> String.concat "\n" + |> Xapi_stdext_unix.Unixext.write_string_to_file smb_config let clear_winbind_config () = (* Keep the winbind configuration if xapi config file specified explictly, @@ -1096,6 +1104,35 @@ module Winbind = struct netbios_name ) else hostname + + let set_machine_account_encryption_type netbios_name = + match !Xapi_globs.winbind_set_machine_account_kerberos_encryption_type with + | true -> ( + let args = + [ + "ads" + ; "enctypes" + ; "set" + ; "--machine-pass" + ; "-d" + ; debug_level () + ; Printf.sprintf "%s$" netbios_name + ; Printf.sprintf "%d" + (Kerberos_encryption_types.Winbind.to_encoding + !Xapi_globs.winbind_kerberos_encryption_type + ) + ] + in + try + Helpers.call_script + ~timeout:Mtime.Span.(5 * s) + !Xapi_globs.net_cmd args + |> ignore + with _ -> + warn "Failed to set machine account encryption type, ignoring" + ) + | false -> + debug "Skip setting machine account encryption type to DC" end module ClosestKdc = struct @@ -1207,7 +1244,6 @@ module RotateMachinePassword = struct let generate_krb5_tmp_config ~domain ~kdc_fqdn = (* Configure which server to change the password * https://web.mit.edu/kerberos/krb5-devel/doc/admin/conf_files/krb5_conf.html *) - let open Xapi_stdext_unix in let realm = String.uppercase_ascii domain in let domain_netbios = Wbinfo.domain_name_of ~target_name_type:NetbiosName ~from_name:domain @@ -1221,28 +1257,21 @@ module RotateMachinePassword = struct [] in - let conf_contents = - String.concat "\n" - ([ - "# auto-generated by xapi" - ; "[libdefaults]" - ; Printf.sprintf "default_realm = %s" realm - ; "[realms]" - ; Printf.sprintf "%s={" realm - ; Printf.sprintf "kpasswd_server=%s" kdc_fqdn - ; Printf.sprintf "kdc=%s" kdc_fqdn - ; "}" (* include winbind generated configure if exists *) - ] - @ include_item - @ [""] (* Empty line at the end *) - ) - in - - let len = String.length conf_contents in - Unixext.atomic_write_to_file tmp_krb5_conf 0o0644 (fun fd -> - let (_ : int) = Unix.single_write_substring fd conf_contents 0 len in - Unix.fsync fd - ) + [ + "# autogenerated by xapi" + ; "[libdefaults]" + ; Printf.sprintf "default_realm = %s" realm + ; "[realms]" + ; Printf.sprintf "%s={" realm + ; Printf.sprintf "kpasswd_server=%s" kdc_fqdn + ; Printf.sprintf "kdc=%s" kdc_fqdn + ; "}" (* include winbind generated configure if exists *) + ] + @ include_item + @ [""] + (* Empty line at the end *) + |> String.concat "\n" + |> Xapi_stdext_unix.Unixext.write_string_to_file tmp_krb5_conf let clear_tmp_krb5_conf () = if !Xapi_globs.winbind_keep_configuration then @@ -1307,6 +1336,157 @@ module RotateMachinePassword = struct let stop_rotate () = Scheduler.remove_from_queue task_name end +module type LocalHostTag = sig + val local_ip : string +end + +module HostsConfTagIPv4 : LocalHostTag = struct let local_ip = "127.0.0.1" end + +module HostsConfTagIPv6 : LocalHostTag = struct let local_ip = "::1" end + +module type HostsConf = sig + (* add the domain info into conf*) + val join : name:string -> domain:string -> lines:string list -> string list + + (* remove the domain info from conf*) + val leave : name:string -> domain:string -> lines:string list -> string list +end + +module HostsConfFunc (T : LocalHostTag) : HostsConf = struct + let sep = ' ' + + let sep_str = String.make 1 sep + + type t = Add | Remove + + let interest line = String.starts_with ~prefix:T.local_ip line + + let handle op name domain line = + let line = String.lowercase_ascii line in + let name = String.lowercase_ascii name in + let domain = String.lowercase_ascii domain in + let fqdn = Printf.sprintf "%s.%s" name domain in + let rec add_hostname pre line = + match line with + | ip :: alias when ip = T.local_ip -> + (* Add localhost IP *) + add_hostname [ip] alias + | sp :: left when sp = "" -> + (* Add space to reserve the indent *) + add_hostname (pre @ [sp]) left + | alias :: left -> + (* hosts entry: ip fqdn alias1 alias2 ... *) + pre @ [fqdn; name; alias] @ left + | [] -> + failwith "Can not add local hostname to non local IP" + in + + match interest line with + | false -> + line + | true -> + String.split_on_char sep line + |> List.filter (fun x -> x <> name && x <> fqdn) + |> (fun x -> match op with Add -> add_hostname [] x | Remove -> x) + |> String.concat sep_str + + let leave ~name ~domain ~lines = + List.map (fun line -> handle Remove name domain line) lines + (* If no name for local ip left, just remove it *) + |> List.filter (fun x -> String.trim x <> T.local_ip) + + let join ~name ~domain ~lines = + List.map (fun line -> handle Add name domain line) lines |> fun x -> + match List.exists (fun l -> interest l) x with + | true -> + x + | false -> + (* Does not found and updated the conf, then add one *) + [ + Printf.sprintf "%s%s%s.%s%s%s" T.local_ip sep_str name domain sep_str + name + ] + @ x +end + +module HostsConfIPv4 = HostsConfFunc (HostsConfTagIPv4) +module HostsConfIPv6 = HostsConfFunc (HostsConfTagIPv6) + +module ConfigHosts = struct + open Xapi_stdext_unix.Unixext + + let path = "/etc/hosts" + + let join ~name ~domain = + read_lines ~path |> fun lines -> + HostsConfIPv4.join ~name ~domain ~lines |> fun lines -> + HostsConfIPv6.join ~name ~domain ~lines |> fun x -> + x @ [""] (* Add final line break *) + |> String.concat "\n" + |> write_string_to_file path + + let leave ~name ~domain = + read_lines ~path |> fun lines -> + HostsConfIPv4.leave ~name ~domain ~lines |> fun lines -> + HostsConfIPv6.leave ~name ~domain ~lines |> fun x -> + x @ [""] (* Add final line break *) + |> String.concat "\n" + |> write_string_to_file path +end + +module DNSSync = struct + let task_name = "Sync hostname with DNS" + + type t = Register | Unregister + + let handle op hostname netbios_name domain = + (* By default, hostname should equal to netbios_name, just register it to DNS server*) + try + let ops = + match op with Register -> "register" | Unregister -> "unregister" + in + let netbios_fqdn = Printf.sprintf "%s.%s" netbios_name domain in + let args = ["ads"; "dns"] @ [ops] @ ["--machine-pass"] in + Helpers.call_script net_cmd (args @ [netbios_fqdn]) |> ignore ; + if hostname <> netbios_name then + let hostname_fqdn = Printf.sprintf "%s.%s" hostname domain in + (* netbios_name is compressed, op on extra hostname *) + Helpers.call_script net_cmd (args @ [hostname_fqdn]) |> ignore + with e -> + debug "Register/unregister with DNS failed %s" (ExnHelper.string_of_exn e) + + let register hostname netbios_name domain = + handle Register hostname netbios_name domain + + let unregister hostname netbios_name domain = + handle Unregister hostname netbios_name domain + + let sync () = + Server_helpers.exec_with_new_task "sync hostname with DNS" + @@ fun __context -> + let host = Helpers.get_localhost ~__context in + let service_name = + Db.Host.get_external_auth_service_name ~__context ~self:host + in + let netbios_name = + Db.Host.get_external_auth_configuration ~__context ~self:host + |> fun config -> List.assoc_opt "netbios_name" config + in + let hostname = Db.Host.get_hostname ~__context ~self:host in + match netbios_name with + | Some netbios -> + register hostname netbios service_name + | None -> + debug "Netbios name is none, skip sync hostname to DNS" + + let trigger_sync ~start = + debug "Trigger task: %s" task_name ; + Scheduler.add_to_queue task_name + (Scheduler.Periodic !Xapi_globs.winbind_dns_sync_interval) start sync + + let stop_sync () = Scheduler.remove_from_queue task_name +end + let build_netbios_name ~config_params = let key = "netbios-name" in match List.assoc_opt key config_params with @@ -1475,7 +1655,7 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct * the joined domain (with 1 way trust ) , just return the default value * This is NOT a regression issue of PBIS * PBIS cannot handle such case neither - * *) + *) debug "Fallback to default value as no DC info in xapi database" ; Ok default_account in @@ -1626,20 +1806,30 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct ~ou_conf ~workgroup:(Some workgroup) ~machine_pwd_last_change_time:(Some machine_pwd_last_change_time) ~netbios_name:(Some netbios_name) ; + (* Trigger right now *) ClosestKdc.trigger_update ~start:0. ; RotateMachinePassword.trigger_rotate ~start:0. ; + ConfigHosts.join ~domain:service_name ~name:netbios_name ; + let _, _ = + Forkhelpers.execute_command_get_output !Xapi_globs.set_hostname + [get_localhost_name ()] + in (* Trigger right now *) + DNSSync.trigger_sync ~start:0. ; + Winbind.set_machine_account_encryption_type netbios_name ; debug "Succeed to join domain %s" service_name with | Forkhelpers.Spawn_internal_error (_, stdout, _) -> error "Join domain: %s error: %s" service_name stdout ; clear_winbind_config () ; + ConfigHosts.leave ~domain:service_name ~name:netbios_name ; (* The configure is kept for debug purpose with max level *) raise (Auth_service_error (stdout |> tag_from_err_msg, stdout)) | Xapi_systemctl.Systemctl_fail _ -> let msg = Printf.sprintf "Failed to start %s" Winbind.name in error "Start daemon error: %s" msg ; config_winbind_daemon ~domain:None ~workgroup:None ~netbios_name:None ; + ConfigHosts.leave ~domain:service_name ~name:netbios_name ; raise (Auth_service_error (E_GENERIC, msg)) | e -> let msg = @@ -1650,6 +1840,7 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct in error "Enable extauth error: %s" msg ; clear_winbind_config () ; + ConfigHosts.leave ~domain:service_name ~name:netbios_name ; raise (Auth_service_error (E_GENERIC, msg)) (* unit on_disable() @@ -1663,7 +1854,17 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct let@ __context = Context.with_tracing ~__context __FUNCTION__ in let user = List.assoc_opt "user" config_params in let pass = List.assoc_opt "pass" config_params in - let {service_name; _} = get_domain_info_from_db () in + let {service_name; netbios_name; _} = get_domain_info_from_db () in + DNSSync.stop_sync () ; + ( match netbios_name with + | Some netbios -> + ConfigHosts.leave ~domain:service_name ~name:netbios ; + let hostname = get_localhost_name () in + DNSSync.unregister hostname netbios service_name + | _ -> + () + ) ; + (* Clean extauth config *) persist_extauth_config ~domain:None ~user:None ~ou_conf:[] ~workgroup:None ~machine_pwd_last_change_time:None ~netbios_name:None ; @@ -1688,7 +1889,15 @@ module AuthADWinbind : Auth_signature.AUTH_MODULE = struct Winbind.start ~timeout:5. ~wait_until_success:true ; ClosestKdc.trigger_update ~start:ClosestKdc.startup_delay ; RotateMachinePassword.trigger_rotate ~start:5. ; - Winbind.check_ready_to_serve ~timeout:300. + Winbind.check_ready_to_serve ~timeout:300. ; + DNSSync.trigger_sync ~start:5. ; + + let {service_name; netbios_name; _} = get_domain_info_from_db () in + match netbios_name with + | Some name -> + ConfigHosts.join ~domain:service_name ~name + | _ -> + () (* unit on_xapi_exit() diff --git a/ocaml/xapi/extauth_plugin_ADwinbind.mli b/ocaml/xapi/extauth_plugin_ADwinbind.mli index 0c9137d5f54..dab3963fa1a 100644 --- a/ocaml/xapi/extauth_plugin_ADwinbind.mli +++ b/ocaml/xapi/extauth_plugin_ADwinbind.mli @@ -75,3 +75,15 @@ module Migrate_from_pbis : sig val parse_value_from_pbis : string -> string end + +module type HostsConf = sig + (* add the domain info into conf*) + val join : name:string -> domain:string -> lines:string list -> string list + + (* remove the domain info from conf*) + val leave : name:string -> domain:string -> lines:string list -> string list +end + +module HostsConfIPv4 : HostsConf + +module HostsConfIPv6 : HostsConf diff --git a/ocaml/xapi/helpers.ml b/ocaml/xapi/helpers.ml index 2ef16112053..9807c4540d4 100644 --- a/ocaml/xapi/helpers.ml +++ b/ocaml/xapi/helpers.ml @@ -157,7 +157,7 @@ let get_management_iface_is_connected ~__context = let get_management_ip_addr ~__context = let dbg = Context.string_of_task __context in - Option.map fst (Networking_info.get_management_ip_addr ~dbg) + Networking_info.get_management_ip_addr ~dbg let get_localhost_uuid () = Xapi_inventory.lookup Xapi_inventory._installation_uuid @@ -171,8 +171,13 @@ let get_localhost ~__context = match localhost_ref = Ref.null with | false -> localhost_ref - | true -> - get_localhost_uncached ~__context + | true -> ( + try get_localhost_uncached ~__context + with Db_exn.Read_missing_uuid (_, _, _) as e -> + Unixext.raise_with_preserved_backtrace e (fun () -> + warn "The database has not fully come up yet, so localhost is missing" + ) + ) (* Determine the gateway and DNS PIFs: * If one of the PIFs with IP has other_config:defaultroute=true, then @@ -586,6 +591,7 @@ let call_api_functions ~__context f = call_api_functions_internal ~__context f let call_emergency_mode_functions hostname f = + let __FUN = __FUNCTION__ in let open Xmlrpc_client in let transport = SSL @@ -604,15 +610,13 @@ let call_emergency_mode_functions hostname f = in finally (fun () -> f rpc session_id) - (fun () -> Client.Client.Session.local_logout ~rpc ~session_id) - -let progress ~__context t = - for i = 0 to int_of_float (t *. 100.) do - let v = float_of_int i /. 100. /. t in - TaskHelper.set_progress ~__context v ; - Thread.delay 1. - done ; - TaskHelper.set_progress ~__context 1. + (fun () -> + try Client.Client.Session.local_logout ~rpc ~session_id + with _ -> + (* This is an emergency mode function, so we don't care about the error + in logout *) + debug "%s: The logout failed in emergency mode function" __FUN + ) let is_domain_zero_with_record ~__context vm_ref vm_rec = let host_ref = vm_rec.API.vM_resident_on in @@ -863,12 +867,18 @@ let assert_we_are_master ~__context = ) (* Host version compare helpers *) -let compare_int_lists : int list -> int list -> int = +let rec compare_int_lists : int list -> int list -> int = fun a b -> - let first_non_zero is = - List.fold_left (fun a b -> if a <> 0 then a else b) 0 is - in - first_non_zero (List.map2 compare a b) + match (a, b) with + | [], [] -> + 0 + | [], _ -> + -1 + | _, [] -> + 1 + | x :: xs, y :: ys -> + let r = compare x y in + if r <> 0 then r else compare_int_lists xs ys let group_by f list = let evaluated_list = List.map (fun x -> (x, f x)) list in @@ -909,41 +919,69 @@ let sort_by_schwarzian ?(descending = false) f list = |> List.sort (fun (_, x') (_, y') -> comp x' y') |> List.map (fun (x, _) -> x) -let platform_version_inverness = [2; 4; 0] +let version_keys_list = + Xapi_globs.[_platform_version; _xapi_build_version; _xen_version] -let version_string_of : __context:Context.t -> [`host] api_object -> string = - fun ~__context host -> - try - let software_version = - match host with - | LocalObject host_ref -> - Db.Host.get_software_version ~__context ~self:host_ref - | RemoteObject (rpc, session_id, host_ref) -> - Client.Client.Host.get_software_version ~rpc ~session_id - ~self:host_ref - in - List.assoc Xapi_globs._platform_version software_version - with Not_found -> Xapi_globs.default_platform_version +let get_software_versions ~__context host = + ( match host with + | LocalObject self -> + Db.Host.get_software_version ~__context ~self + | RemoteObject (rpc, session_id, self) -> + Client.Client.Host.get_software_version ~rpc ~session_id ~self + ) + |> List.filter (fun (k, _) -> List.mem k version_keys_list) + +let versions_string_of : (string * string) list -> string = + fun ver_list -> + ver_list + |> List.map (fun (k, v) -> Printf.sprintf "%s: %s" k v) + |> String.concat "," + +let version_numbers_of_string version_string = + ( match String.split_on_char '-' version_string with + | [standard_version; patch] -> + String.split_on_char '.' standard_version @ [patch] + | [standard_version] -> + String.split_on_char '.' standard_version + | _ -> + ["0"; "0"; "0"] + ) + |> List.filter_map int_of_string_opt -let version_of : __context:Context.t -> [`host] api_object -> int list = - fun ~__context host -> - let vs = version_string_of ~__context host in - List.map int_of_string (String.split_on_char '.' vs) +let version_of : version_key:string -> (string * string) list -> int list = + fun ~version_key versions_list -> + List.assoc_opt version_key versions_list + |> Option.value ~default:"0.0.0" + |> version_numbers_of_string (* Compares host versions, analogous to Stdlib.compare. *) -let compare_host_platform_versions : - __context:Context.t -> [`host] api_object -> [`host] api_object -> int = - fun ~__context host_a host_b -> - let version_of = version_of ~__context in - compare_int_lists (version_of host_a) (version_of host_b) - -let max_version_in_pool : __context:Context.t -> int list = +let compare_versions : + version_key:string + -> (string * string) list + -> (string * string) list + -> int = + fun ~version_key sw_ver_a sw_ver_b -> + let version_a = version_of ~version_key sw_ver_a in + let version_b = version_of ~version_key sw_ver_b in + compare_int_lists version_a version_b + +let compare_all_versions ~is_greater_or_equal:a ~than:b = + List.for_all + (fun version_key -> compare_versions ~version_key a b >= 0) + version_keys_list + +let max_version_in_pool : __context:Context.t -> (string * string) list = fun ~__context -> let max_version a b = - if a = [] then b else if compare_int_lists a b > 0 then a else b + if a = [] then + b + else if compare_all_versions ~is_greater_or_equal:a ~than:b then + a + else + b and versions = List.map - (fun host_ref -> version_of ~__context (LocalObject host_ref)) + (fun host_ref -> get_software_versions ~__context (LocalObject host_ref)) (Db.Host.get_all ~__context) in List.fold_left max_version [] versions @@ -951,21 +989,30 @@ let max_version_in_pool : __context:Context.t -> int list = let host_has_highest_version_in_pool : __context:Context.t -> host:[`host] api_object -> bool = fun ~__context ~host -> - let host_version = version_of ~__context host + let host_versions = get_software_versions ~__context host and max_version = max_version_in_pool ~__context in - compare_int_lists host_version max_version >= 0 + compare_all_versions ~is_greater_or_equal:host_versions ~than:max_version let host_versions_not_decreasing ~__context ~host_from ~host_to = - compare_host_platform_versions ~__context host_from host_to <= 0 + let sw_vers_from = get_software_versions ~__context host_from in + let sw_vers_to = get_software_versions ~__context host_to in + compare_all_versions ~is_greater_or_equal:sw_vers_to ~than:sw_vers_from -let is_platform_version_same_on_master ~__context ~host = +let are_host_versions_same_on_master_inner ~__context ~host ~master = if is_pool_master ~__context ~host then true else - let master = get_master ~__context in - compare_host_platform_versions ~__context (LocalObject master) - (LocalObject host) - = 0 + let sw_ver_master = get_software_versions ~__context (LocalObject master) in + let sw_ver_host = get_software_versions ~__context (LocalObject host) in + List.for_all + (fun version_key -> + compare_versions ~version_key sw_ver_master sw_ver_host = 0 + ) + version_keys_list + +let are_host_versions_same_on_master ~__context ~host = + let master = get_master ~__context in + are_host_versions_same_on_master_inner ~__context ~host ~master let maybe_raise_vtpm_unimplemented func message = if not !ignore_vtpm_unimplemented then ( @@ -973,8 +1020,8 @@ let maybe_raise_vtpm_unimplemented func message = raise Api_errors.(Server_error (not_implemented, [message])) ) -let assert_platform_version_is_same_on_master ~__context ~host ~self = - if not (is_platform_version_same_on_master ~__context ~host) then +let assert_host_versions_are_same_on_master ~__context ~host ~self = + if not (are_host_versions_same_on_master ~__context ~host) then raise (Api_errors.Server_error ( Api_errors.vm_host_incompatible_version @@ -1000,15 +1047,14 @@ let assert_host_has_highest_version_in_pool : let pool_has_different_host_platform_versions ~__context = let all_hosts = Db.Host.get_all ~__context in - let platform_versions = - List.map - (fun host -> version_string_of ~__context (LocalObject host)) - all_hosts - in - let is_different_to_me platform_version = - platform_version <> Xapi_version.platform_version () - in - List.fold_left ( || ) false (List.map is_different_to_me platform_versions) + let master = get_master ~__context in + not + (List.for_all + (fun host -> + are_host_versions_same_on_master_inner ~__context ~host ~master + ) + all_hosts + ) (* Checks that a host has a PBD for a particular SR (meaning that the SR is visible to the host) *) @@ -1349,13 +1395,19 @@ let vm_to_string __context vm = raise (Api_errors.Server_error (Api_errors.invalid_value, [str])) ; let t = Context.database_of __context in let module DB = - (val Xapi_database.Db_cache.get t : Xapi_database.Db_interface.DB_ACCESS) + (val Xapi_database.Db_cache.get t : Xapi_database.Db_interface.DB_ACCESS2) in - let fields = fst (DB.read_record t Db_names.vm str) in + let fields, _ = DB.read_record t Db_names.vm str in let sexpr = SExpr.Node (List.map - (fun (key, value) -> SExpr.Node [SExpr.String key; SExpr.String value]) + (fun (key, value) -> + SExpr.Node + [ + SExpr.String key + ; SExpr.String (Schema.CachedValue.string_of value) + ] + ) fields ) in @@ -1675,7 +1727,7 @@ module Repeat_with_uniform_backoff : POLICY = struct debug "Waiting for up to %f seconds before retrying..." this_timeout ; let start = Unix.gettimeofday () in ( match e with - | Api_errors.Server_error (code, [cls; objref]) + | Api_errors.Server_error (code, cls :: objref :: _) when code = Api_errors.other_operation_in_progress -> Early_wakeup.wait (cls, objref) this_timeout | _ -> diff --git a/ocaml/xapi/import.ml b/ocaml/xapi/import.ml index c3abaf34b00..91a900dedda 100644 --- a/ocaml/xapi/import.ml +++ b/ocaml/xapi/import.ml @@ -836,6 +836,7 @@ module GuestMetrics : HandlerTools = struct ~memory:gm_record.API.vM_guest_metrics_memory ~disks:gm_record.API.vM_guest_metrics_disks ~networks:gm_record.API.vM_guest_metrics_networks + ~services:gm_record.API.vM_guest_metrics_services ~pV_drivers_detected:gm_record.API.vM_guest_metrics_PV_drivers_detected ~other:gm_record.API.vM_guest_metrics_other ~last_updated:gm_record.API.vM_guest_metrics_last_updated diff --git a/ocaml/xapi/import_raw_vdi.ml b/ocaml/xapi/import_raw_vdi.ml index 565c29e7d8e..8eacfe0a786 100644 --- a/ocaml/xapi/import_raw_vdi.ml +++ b/ocaml/xapi/import_raw_vdi.ml @@ -106,6 +106,10 @@ let localhost_handler rpc session_id vdi_opt (req : Request.t) ) ) | None -> + (* FIXME: Currently, when importing an image with a virtual + size that's bigger than the VDI's virtual size, we fail in + an unhelpful manner on some write. + We could instead parse the header first and fail early. *) let vdi = match ( vdi_opt @@ -122,6 +126,22 @@ let localhost_handler rpc session_id vdi_opt (req : Request.t) ~virtual_size:length ~_type:`user ~sharable:false ~read_only:false ~other_config:[] ~xenstore_data:[] ~sm_config:[] ~tags:[] + | None, Importexport.Format.Qcow, _, _ -> + error + "Importing a QCOW2 directly into an SR not yet \ + supported" ; + raise + (HandleError + ( Api_errors.Server_error + ( Api_errors.internal_error + , [ + "Importing a QCOW2 directly into an SR not \ + yet supported" + ] + ) + , Http.http_400_badrequest ~version:"1.0" () + ) + ) | None, Importexport.Format.Vhd, _, _ -> error "Importing a VHD directly into an SR not yet supported" ; @@ -158,6 +178,13 @@ let localhost_handler rpc session_id vdi_opt (req : Request.t) in Http_svr.headers s headers ; ( match format with + | Qcow -> + Sm_fs_ops.with_block_attached_device __context rpc + session_id vdi `RW (fun path -> + Qcow_tool_wrapper.receive + (Qcow_tool_wrapper.update_task_progress __context) + s path + ) | Raw | Vhd -> let prezeroed = not diff --git a/ocaml/xapi/importexport.ml b/ocaml/xapi/importexport.ml index a210bda04d6..6ba6769b7ef 100644 --- a/ocaml/xapi/importexport.ml +++ b/ocaml/xapi/importexport.ml @@ -430,9 +430,17 @@ let sr_of_req ~__context (req : Http.Request.t) = None module Format = struct - type t = Raw | Vhd | Tar + type t = Raw | Vhd | Tar | Qcow - let to_string = function Raw -> "raw" | Vhd -> "vhd" | Tar -> "tar" + let to_string = function + | Raw -> + "raw" + | Vhd -> + "vhd" + | Tar -> + "tar" + | Qcow -> + "qcow2" let of_string x = match String.lowercase_ascii x with @@ -442,6 +450,8 @@ module Format = struct Some Vhd | "tar" -> Some Tar + | "qcow2" -> + Some Qcow | _ -> None @@ -457,6 +467,8 @@ module Format = struct "application/vhd" | Tar -> "application/x-tar" + | Qcow -> + "application/x-qemu-disk" let _key = "format" diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 5e2ca4774ed..d01bdc851d8 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -143,24 +143,15 @@ let do_op_on_common ~local_fn ~__context ~host ~remote_fn f = let task_opt = set_forwarding_on_task ~__context ~host in f __context host task_opt remote_fn with - | Xmlrpc_client.Connection_reset | Http_client.Http_request_rejected _ -> - warn - "Caught Connection_reset when contacting host %s; converting into \ - CANNOT_CONTACT_HOST" - (Ref.string_of host) ; - raise - (Api_errors.Server_error - (Api_errors.cannot_contact_host, [Ref.string_of host]) - ) - | Xmlrpc_client.Stunnel_connection_failed -> - warn - "Caught Stunnel_connection_failed while contacting host %s; converting \ - into CANNOT_CONTACT_HOST" - (Ref.string_of host) ; - raise - (Api_errors.Server_error - (Api_errors.cannot_contact_host, [Ref.string_of host]) - ) + | ( Xmlrpc_client.Connection_reset + | Http_client.Http_request_rejected _ + | Xmlrpc_client.Stunnel_connection_failed ) as e + -> + error + "%s: Caught %s when contacting host %s; converting into \ + CANNOT_CONTACT_HOST" + __FUNCTION__ (Printexc.to_string e) (Ref.string_of host) ; + raise Api_errors.(Server_error (cannot_contact_host, [Ref.string_of host])) (* regular forwarding fn, with session and live-check. Used by most calls, will use the connection cache. *) @@ -1177,6 +1168,32 @@ functor let get_guest_secureboot_readiness ~__context ~self = info "%s: pool='%s'" __FUNCTION__ (pool_uuid ~__context self) ; Local.Pool.get_guest_secureboot_readiness ~__context ~self + + let enable_ssh ~__context ~self = + info "%s: pool = '%s'" __FUNCTION__ (pool_uuid ~__context self) ; + Local.Pool.enable_ssh ~__context ~self + + let disable_ssh ~__context ~self = + info "%s: pool = '%s'" __FUNCTION__ (pool_uuid ~__context self) ; + Local.Pool.disable_ssh ~__context ~self + + let set_ssh_enabled_timeout ~__context ~self ~value = + info "Pool.set_ssh_enabled_timeout: pool='%s' value='%Ld'" + (pool_uuid ~__context self) + value ; + Local.Pool.set_ssh_enabled_timeout ~__context ~self ~value + + let set_console_idle_timeout ~__context ~self ~value = + info "Pool.set_console_idle_timeout: pool='%s' value='%Ld'" + (pool_uuid ~__context self) + value ; + Local.Pool.set_console_idle_timeout ~__context ~self ~value + + let set_ssh_auto_mode ~__context ~self ~value = + info "Pool.set_ssh_auto_mode: pool='%s' value='%b'" + (pool_uuid ~__context self) + value ; + Local.Pool.set_ssh_auto_mode ~__context ~self ~value end module VM = struct @@ -2010,6 +2027,34 @@ functor forward_vm_op ~local_fn ~__context ~vm ~remote_fn ) + let call_host_plugin ~__context ~vm ~plugin ~fn ~args = + info + "VM.call_host_plugin: VM = '%s'; plugin = '%s'; fn = '%s'; args = [ \ + 'hidden' ]" + (vm_uuid ~__context vm) plugin fn ; + let local_fn = Local.VM.call_host_plugin ~vm ~plugin ~fn ~args in + let remote_fn = Client.VM.call_host_plugin ~vm ~plugin ~fn ~args in + let power_state = Db.VM.get_power_state ~__context ~self:vm in + (* Insisting on running to make sure xenstore and domain exist + and the VM can react to xenstore events. Permitting Paused in + addition could be an option *) + if power_state <> `Running then + raise + Api_errors.( + Server_error + ( vm_bad_power_state + , [ + Ref.string_of vm + ; Record_util.vm_power_state_to_string `Running + ; Record_util.vm_power_state_to_string power_state + ] + ) + ) ; + with_vm_operation ~__context ~self:vm ~doc:"VM.call_host_plugin" + ~op:`call_plugin ~policy:Helpers.Policy.fail_immediately (fun () -> + forward_vm_op ~local_fn ~__context ~vm ~remote_fn + ) + let set_has_vendor_device ~__context ~self ~value = info "VM.set_has_vendor_device: VM = '%s' to %b" (vm_uuid ~__context self) value ; @@ -2456,6 +2501,8 @@ functor let snapshot = Db.VM.get_record ~__context ~self:vm in reserve_memory_for_vm ~__context ~vm ~host ~snapshot ~host_op:`vm_migrate (fun () -> + if Db.VM.get_VGPUs ~__context ~self:vm <> [] then + Xapi_stats.incr_pool_vgpu_migration_count () ; forward_vm_op ~local_fn ~__context ~vm ~remote_fn ) ) ; @@ -2532,7 +2579,7 @@ functor forward_vm_op ~local_fn ~__context ~vm ~remote_fn:(fun ~rpc ~session_id -> (* try InternalAsync.VM.migrate_send first to avoid long running idle stunnel connection - * fall back on Async.VM.migrate_send if slave doesn't support InternalAsync *) + * fall back on Async.VM.migrate_send if slave doesn't support InternalAsync *) Helpers.try_internal_async ~__context API.ref_VM_of_rpc (fun () -> Client.InternalAsync.VM.migrate_send ~rpc ~session_id ~vm @@ -2577,6 +2624,8 @@ functor assert_can_migrate ~__context ~vm ~dest ~live ~vdi_map ~vif_map ~vgpu_map ~options ) ; + if Db.VM.get_VGPUs ~__context ~self:vm <> [] then + Xapi_stats.incr_pool_vgpu_migration_count () ; forward_migrate_send () ) in @@ -3066,16 +3115,28 @@ functor Xapi_vm_lifecycle.update_allowed_operations ~__context ~self let add_to_blocked_operations ~__context ~self ~key ~value = - info "VM.add_to_blocked_operations: self = '%s'" - (vm_uuid ~__context self) ; + info "VM.add_to_blocked_operations: self = '%s', key = '%s'" + (vm_uuid ~__context self) + (API.vm_operations_to_string key) ; Local.VM.add_to_blocked_operations ~__context ~self ~key ~value ; Xapi_vm_lifecycle.update_allowed_operations ~__context ~self let remove_from_blocked_operations ~__context ~self ~key = - info "VM.remove_from_blocked_operations: self = '%s'" - (vm_uuid ~__context self) ; + info "VM.remove_from_blocked_operations: self = '%s', key = '%s'" + (vm_uuid ~__context self) + (API.vm_operations_to_string key) ; Local.VM.remove_from_blocked_operations ~__context ~self ~key ; Xapi_vm_lifecycle.update_allowed_operations ~__context ~self + + let sysprep ~__context ~self ~unattend ~timeout = + info "VM.sysprep: self = '%s'" (vm_uuid ~__context self) ; + let local_fn = Local.VM.sysprep ~self ~unattend ~timeout in + let remote_fn = Client.VM.sysprep ~self ~unattend ~timeout in + let policy = Helpers.Policy.fail_immediately in + with_vm_operation ~__context ~self ~doc:"VM.sysprep" ~op:`sysprep + ~policy (fun () -> + forward_vm_op ~local_fn ~__context ~vm:self ~remote_fn + ) end module VM_metrics = struct end @@ -3288,13 +3349,15 @@ functor (host_uuid ~__context host) ; Local.Host.get_management_interface ~__context ~host - let disable ~__context ~host = - info "Host.disable: host = '%s'" (host_uuid ~__context host) ; + let disable ~__context ~host ~auto_enable = + info "Host.disable: host = '%s', auto_enable = '%b'" + (host_uuid ~__context host) + auto_enable ; (* Block call if this would break our VM restart plan *) Xapi_ha_vm_failover.assert_host_disable_preserves_ha_plan ~__context host ; - let local_fn = Local.Host.disable ~host in - let remote_fn = Client.Host.disable ~host in + let local_fn = Local.Host.disable ~host ~auto_enable in + let remote_fn = Client.Host.disable ~host ~auto_enable in do_op_on ~local_fn ~__context ~host ~remote_fn ; Xapi_host_helpers.update_allowed_operations ~__context ~self:host @@ -4015,6 +4078,42 @@ functor let emergency_clear_mandatory_guidance ~__context = info "Host.emergency_clear_mandatory_guidance" ; Local.Host.emergency_clear_mandatory_guidance ~__context + + let enable_ssh ~__context ~self = + info "%s: host = '%s'" __FUNCTION__ (host_uuid ~__context self) ; + let local_fn = Local.Host.enable_ssh ~self in + let remote_fn = Client.Host.enable_ssh ~self in + do_op_on ~local_fn ~__context ~host:self ~remote_fn + + let disable_ssh ~__context ~self = + info "%s: host = '%s'" __FUNCTION__ (host_uuid ~__context self) ; + let local_fn = Local.Host.disable_ssh ~self in + let remote_fn = Client.Host.disable_ssh ~self in + do_op_on ~local_fn ~__context ~host:self ~remote_fn + + let set_ssh_enabled_timeout ~__context ~self ~value = + info "Host.set_ssh_enabled_timeout: host='%s' value='%Ld'" + (host_uuid ~__context self) + value ; + let local_fn = Local.Host.set_ssh_enabled_timeout ~self ~value in + let remote_fn = Client.Host.set_ssh_enabled_timeout ~self ~value in + do_op_on ~local_fn ~__context ~host:self ~remote_fn + + let set_console_idle_timeout ~__context ~self ~value = + info "Host.set_console_idle_timeout: host='%s' value='%Ld'" + (host_uuid ~__context self) + value ; + let local_fn = Local.Host.set_console_idle_timeout ~self ~value in + let remote_fn = Client.Host.set_console_idle_timeout ~self ~value in + do_op_on ~local_fn ~__context ~host:self ~remote_fn + + let set_ssh_auto_mode ~__context ~self ~value = + info "Host.set_ssh_auto_mode: host='%s' value='%b'" + (host_uuid ~__context self) + value ; + let local_fn = Local.Host.set_ssh_auto_mode ~self ~value in + let remote_fn = Client.Host.set_ssh_auto_mode ~self ~value in + do_op_on ~local_fn ~__context ~host:self ~remote_fn end module Host_crashdump = struct @@ -5643,14 +5742,21 @@ functor if Helpers.i_am_srmaster ~__context ~sr then List.iter (fun vdi -> - if Db.VDI.get_current_operations ~__context ~self:vdi <> [] - then - raise - (Api_errors.Server_error - ( Api_errors.other_operation_in_progress - , [Datamodel_common._vdi; Ref.string_of vdi] - ) - ) + match Db.VDI.get_current_operations ~__context ~self:vdi with + | (op_ref, op_type) :: _ -> + raise + (Api_errors.Server_error + ( Api_errors.other_operation_in_progress + , [ + Datamodel_common._vdi + ; Ref.string_of vdi + ; API.vdi_operations_to_string op_type + ; op_ref + ] + ) + ) + | [] -> + () ) (Db.SR.get_VDIs ~__context ~self:sr) ; SR.mark_sr ~__context ~sr ~doc ~op @@ -6304,7 +6410,7 @@ functor let remote_fn = Client.Cluster_host.forget ~self in (* We need to ask another host that has a cluster host to mark it as dead. * We might've run force destroy and this host would no longer have a cluster host - * *) + *) let other_hosts = Db.Cluster.get_cluster_hosts ~__context ~self:cluster |> List.filter (( <> ) self) diff --git a/ocaml/xapi/monitor_dbcalls.ml b/ocaml/xapi/monitor_dbcalls.ml index ab521155d2c..48b96bbd92a 100644 --- a/ocaml/xapi/monitor_dbcalls.ml +++ b/ocaml/xapi/monitor_dbcalls.ml @@ -127,8 +127,7 @@ let monitor_dbcall_thread () = try let rrd_files = Monitor_types.find_rrd_files () in pifs_update_fn () ; - Monitor_mem_host.update rrd_files ; - Monitor_mem_vms.update rrd_files ; + Monitor_mem.update rrd_files ; Monitor_pvs_proxy.update rrd_files ; Thread.delay 5. with e -> diff --git a/ocaml/xapi/monitor_master.ml b/ocaml/xapi/monitor_master.ml index b506d030205..18a2c9edf7e 100644 --- a/ocaml/xapi/monitor_master.ml +++ b/ocaml/xapi/monitor_master.ml @@ -173,7 +173,7 @@ let update_pifs ~__context host pifs = |> List.concat_map vifs_on_local_bridge |> List.iter set_carrier with e -> - log_backtrace () ; + log_backtrace e ; error "Failed to update VIF carrier flags for PIF: %s" (ExnHelper.string_of_exn e) ) ; diff --git a/ocaml/xapi/monitor_mem.ml b/ocaml/xapi/monitor_mem.ml new file mode 100644 index 00000000000..79cf3cadf9d --- /dev/null +++ b/ocaml/xapi/monitor_mem.ml @@ -0,0 +1,178 @@ +(* + * Copyright (C) Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module Mtxext = Xapi_stdext_threads.Threadext.Mutex +module Mcache = Monitor_dbcalls_cache + +module D = Debug.Make (struct let name = __MODULE__ end) + +open D + +let get_datasources rrd_files = + List.filter_map + (fun filename -> + if String.starts_with ~prefix:Xapi_globs.metrics_prefix_mem filename then + Some (filename, Monitor_types.datasources_from_filename filename) + else + None + ) + rrd_files + +module Host = struct + let get_changes datasources = + let named_dss = + List.concat_map + (fun (filename, datasources) -> + try + Mcache.log_errors_from filename ; + datasources + |> List.filter_map (function + | Rrd.Host, ds + when List.mem ds.Ds.ds_name + ["memory_total_kib"; "memory_free_kib"] -> + Some ds + | _ -> + None (* we are only interested in Host memory stats *) + ) + |> List.map (function ds -> + let value = + match ds.Ds.ds_value with + | Rrd.VT_Int64 v -> + Memory.bytes_of_kib v + | Rrd.VT_Float v -> + Memory.bytes_of_kib (Int64.of_float v) + | Rrd.VT_Unknown -> + -1L + in + (ds.Ds.ds_name, value) + ) + with e -> + if not (Mcache.is_ignored filename) then ( + error "Unable to read host memory metrics from %s: %s" filename + (Printexc.to_string e) ; + Mcache.ignore_errors_from filename + ) ; + [] + ) + datasources + in + let free_bytes = List.assoc_opt "memory_free_kib" named_dss in + let total_bytes = List.assoc_opt "memory_total_kib" named_dss in + (* Check if anything has changed since our last reading. *) + match (free_bytes, total_bytes) with + | Some free, Some total + when !Mcache.host_memory_free_cached <> free + || !Mcache.host_memory_total_cached <> total -> + Some (free, total) + | _ -> + None + + let set_changes (free_bytes, total_bytes) = + Mtxext.execute Mcache.host_memory_m (fun _ -> + Mcache.host_memory_free_cached := free_bytes ; + Mcache.host_memory_total_cached := total_bytes + ) + + let update __context datasources = + match get_changes datasources with + | None -> + () + | Some ((free, total) as c) -> ( + try + let host = Helpers.get_localhost ~__context in + let metrics = Db.Host.get_metrics ~__context ~self:host in + Db.Host_metrics.set_memory_total ~__context ~self:metrics ~value:total ; + Db.Host_metrics.set_memory_free ~__context ~self:metrics ~value:free ; + set_changes c + with e -> + error "Unable to update host memory metrics: %s" (Printexc.to_string e) + ) +end + +module VMs = struct + let get_changes datasources = + List.iter + (fun (filename, datasources) -> + try + Mcache.log_errors_from filename ; + datasources + |> List.filter_map (function + | Rrd.VM vm_uuid, ds when ds.Ds.ds_name = "memory" -> + Some (vm_uuid, ds) + | _ -> + None (* we are only interested in VM stats *) + ) + |> List.iter (function vm_uuid, ds -> + let value = + match ds.Ds.ds_value with + | Rrd.VT_Int64 v -> + v + | Rrd.VT_Float v -> + Int64.of_float v + | Rrd.VT_Unknown -> + -1L + in + Hashtbl.add Mcache.vm_memory_tmp vm_uuid value + ) + with e -> + if not (Mcache.is_ignored filename) then ( + error "Unable to read memory usage for VM %s: %s" filename + (Printexc.to_string e) ; + Mcache.ignore_errors_from filename + ) + ) + datasources ; + (* Check if anything has changed since our last reading. *) + Mcache.get_updates_map ~before:Mcache.vm_memory_cached + ~after:Mcache.vm_memory_tmp + + let set_changes ?except () = + Mtxext.execute Mcache.vm_memory_cached_m (fun _ -> + Mcache.transfer_map ?except ~source:Mcache.vm_memory_tmp + ~target:Mcache.vm_memory_cached () + ) + + let update __context datasources = + let host = Helpers.get_localhost ~__context in + let keeps = ref [] in + List.iter + (fun (vm_uuid, memory) -> + try + let vm = Db.VM.get_by_uuid ~__context ~uuid:vm_uuid in + let vmm = Db.VM.get_metrics ~__context ~self:vm in + if Db.VM.get_resident_on ~__context ~self:vm = host then + Db.VM_metrics.set_memory_actual ~__context ~self:vmm ~value:memory + else + Mcache.clear_cache_for_vm ~vm_uuid + with e -> + keeps := vm_uuid :: !keeps ; + error "Unable to update memory usage for VM %s: %s" vm_uuid + (Printexc.to_string e) + ) + (get_changes datasources) ; + set_changes ~except:!keeps () +end + +let update rrd_files = + let ( let@ ) f x = f x in + let@ __context = + Server_helpers.exec_with_new_task "Updating memory metrics" + in + let datasources = get_datasources rrd_files in + if datasources = [] then + error "%s: no memory datasources found!" __FUNCTION__ + else ( + Host.update __context datasources ; + VMs.update __context datasources + ) diff --git a/ocaml/xapi/monitor_mem.mli b/ocaml/xapi/monitor_mem.mli new file mode 100644 index 00000000000..c2b74b2512f --- /dev/null +++ b/ocaml/xapi/monitor_mem.mli @@ -0,0 +1,18 @@ +(* Copyright (C) Cloud Software Group Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; version 2.1 only. with the special + exception on linking described in file LICENSE. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. +*) + +module Mcache = Monitor_dbcalls_cache + +val update : Mcache.StringSet.elt list -> unit +(** [update rrd_files] Reads rrd_files and update the host and VM memory + metrics in xapi's cache. *) diff --git a/ocaml/xapi/monitor_mem_host.ml b/ocaml/xapi/monitor_mem_host.ml deleted file mode 100644 index e4c2f012a24..00000000000 --- a/ocaml/xapi/monitor_mem_host.ml +++ /dev/null @@ -1,98 +0,0 @@ -(* - * Copyright (C) Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -module Mtxext = Xapi_stdext_threads.Threadext.Mutex -module Mcache = Monitor_dbcalls_cache - -module D = Debug.Make (struct let name = "monitor_mem_host" end) - -open D - -let get_changes rrd_files = - let named_dss = - List.concat_map - (fun filename -> - try - let datasources = Monitor_types.datasources_from_filename filename in - Mcache.log_errors_from filename ; - datasources - |> List.filter_map (function - | Rrd.Host, ds - when List.mem ds.Ds.ds_name - ["memory_total_kib"; "memory_free_kib"] -> - Some ds - | _ -> - None (* we are only interested in Host memory stats *) - ) - |> List.map (function ds -> - let value = - match ds.Ds.ds_value with - | Rrd.VT_Int64 v -> - Memory.bytes_of_kib v - | Rrd.VT_Float v -> - Memory.bytes_of_kib (Int64.of_float v) - | Rrd.VT_Unknown -> - -1L - in - (ds.Ds.ds_name, value) - ) - with e -> - if not (Mcache.is_ignored filename) then ( - error "Unable to read host memory metrics from %s: %s" filename - (Printexc.to_string e) ; - Mcache.ignore_errors_from filename - ) ; - [] - ) - rrd_files - in - let free_bytes = List.assoc_opt "memory_free_kib" named_dss in - let total_bytes = List.assoc_opt "memory_total_kib" named_dss in - (* Check if anything has changed since our last reading. *) - match (free_bytes, total_bytes) with - | Some free, Some total - when !Mcache.host_memory_free_cached <> free - || !Mcache.host_memory_total_cached <> total -> - Some (free, total) - | _ -> - None - -let set_changes (free_bytes, total_bytes) = - Mtxext.execute Mcache.host_memory_m (fun _ -> - Mcache.host_memory_free_cached := free_bytes ; - Mcache.host_memory_total_cached := total_bytes - ) - -let update rrd_files = - let is_host_rrd = - Astring.String.is_prefix ~affix:Xapi_globs.metrics_prefix_mem_host - in - let rrd_files = List.filter is_host_rrd rrd_files in - Server_helpers.exec_with_new_task "Updating host memory metrics" - (fun __context -> - let changes = get_changes rrd_files in - match changes with - | None -> - () - | Some ((free, total) as c) -> ( - try - let host = Helpers.get_localhost ~__context in - let metrics = Db.Host.get_metrics ~__context ~self:host in - Db.Host_metrics.set_memory_total ~__context ~self:metrics ~value:total ; - Db.Host_metrics.set_memory_free ~__context ~self:metrics ~value:free ; - set_changes c - with e -> - error "Unable to update host memory metrics: %s" (Printexc.to_string e) - ) - ) diff --git a/ocaml/xapi/monitor_mem_vms.ml b/ocaml/xapi/monitor_mem_vms.ml deleted file mode 100644 index 37d737d92df..00000000000 --- a/ocaml/xapi/monitor_mem_vms.ml +++ /dev/null @@ -1,89 +0,0 @@ -(* - * Copyright (C) Citrix Systems Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; version 2.1 only. with the special - * exception on linking described in file LICENSE. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - *) - -module Mtxext = Xapi_stdext_threads.Threadext.Mutex -module Mcache = Monitor_dbcalls_cache - -module D = Debug.Make (struct let name = "monitor_mem_vms" end) - -open D - -let get_changes rrd_files = - List.iter - (fun filename -> - try - let datasources = Monitor_types.datasources_from_filename filename in - Mcache.log_errors_from filename ; - datasources - |> List.filter_map (function - | Rrd.VM vm_uuid, ds when ds.Ds.ds_name = "memory" -> - Some (vm_uuid, ds) - | _ -> - None (* we are only interested in VM stats *) - ) - |> List.iter (function vm_uuid, ds -> - let value = - match ds.Ds.ds_value with - | Rrd.VT_Int64 v -> - v - | Rrd.VT_Float v -> - Int64.of_float v - | Rrd.VT_Unknown -> - -1L - in - Hashtbl.add Mcache.vm_memory_tmp vm_uuid value - ) - with e -> - if not (Mcache.is_ignored filename) then ( - error "Unable to read memory usage for VM %s: %s" filename - (Printexc.to_string e) ; - Mcache.ignore_errors_from filename - ) - ) - rrd_files ; - (* Check if anything has changed since our last reading. *) - Mcache.get_updates_map ~before:Mcache.vm_memory_cached - ~after:Mcache.vm_memory_tmp - -let set_changes ?except () = - Mtxext.execute Mcache.vm_memory_cached_m (fun _ -> - Mcache.transfer_map ?except ~source:Mcache.vm_memory_tmp - ~target:Mcache.vm_memory_cached () - ) - -let update rrd_files = - let is_vm_rrd = - Astring.String.is_prefix ~affix:Xapi_globs.metrics_prefix_mem_vms - in - let rrd_files = List.filter is_vm_rrd rrd_files in - Server_helpers.exec_with_new_task "Updating VM memory usage" (fun __context -> - let host = Helpers.get_localhost ~__context in - let keeps = ref [] in - List.iter - (fun (vm_uuid, memory) -> - try - let vm = Db.VM.get_by_uuid ~__context ~uuid:vm_uuid in - let vmm = Db.VM.get_metrics ~__context ~self:vm in - if Db.VM.get_resident_on ~__context ~self:vm = host then - Db.VM_metrics.set_memory_actual ~__context ~self:vmm ~value:memory - else - Mcache.clear_cache_for_vm ~vm_uuid - with e -> - keeps := vm_uuid :: !keeps ; - error "Unable to update memory usage for VM %s: %s" vm_uuid - (Printexc.to_string e) - ) - (get_changes rrd_files) ; - set_changes ~except:!keeps () - ) diff --git a/ocaml/xapi/nm.ml b/ocaml/xapi/nm.ml index 229b53adbe2..fbc37a5fedc 100644 --- a/ocaml/xapi/nm.ml +++ b/ocaml/xapi/nm.ml @@ -634,28 +634,25 @@ let bring_pif_up ~__context ?(management_interface = false) (pif : API.ref_PIF) rc.API.pIF_ip_configuration_mode = `Static | `IPv6 -> rc.API.pIF_ipv6_configuration_mode = `Static + || rc.API.pIF_ipv6_configuration_mode = `Autoconf in let dns = match (static, rc.API.pIF_DNS) with | false, _ | true, "" -> - ([], []) + None | true, pif_dns -> let nameservers = List.map Unix.inet_addr_of_string - (String.split ',' pif_dns) + (String.split_on_char ',' pif_dns) in let domains = match List.assoc_opt "domain" rc.API.pIF_other_config with - | None -> + | None | Some "" -> [] - | Some domains -> ( - try String.split ',' domains - with _ -> - warn "Invalid DNS search domains: %s" domains ; - [] - ) + | Some domains -> + String.split_on_char ',' domains in - (nameservers, domains) + Some (nameservers, domains) in let mtu = determine_mtu rc net_rc in let ethtool_settings, ethtool_offload = diff --git a/ocaml/xapi/pool_db_backup.ml b/ocaml/xapi/pool_db_backup.ml index 2a0ab1eae21..f82e3340c12 100644 --- a/ocaml/xapi/pool_db_backup.ml +++ b/ocaml/xapi/pool_db_backup.ml @@ -192,7 +192,7 @@ let restore_from_xml __context dry_run (xml_filename : string) = (Db_xml.From.file (Datamodel_schema.of_datamodel ()) xml_filename) in version_check db ; - let db_ref = Db_ref.in_memory (ref (ref db)) in + let db_ref = Db_ref.in_memory (Atomic.make db) in let new_context = Context.make ~database:db_ref "restore_db" in prepare_database_for_restore ~old_context:__context ~new_context ; (* write manifest and unmarshalled db directly to db_temporary_restore_path, so its ready for us on restart *) diff --git a/ocaml/xapi/pool_features_helpers.ml b/ocaml/xapi/pool_features_helpers.ml index dda8619013c..36e7e7a0252 100644 --- a/ocaml/xapi/pool_features_helpers.ml +++ b/ocaml/xapi/pool_features_helpers.ml @@ -58,17 +58,16 @@ let rec compute_additional_restrictions all_host_params = function [] | flag :: rest -> let switches = - List.map + List.exists (function | params -> - if List.mem_assoc flag params then - bool_of_string (List.assoc flag params) - else - true + List.assoc_opt flag params + |> Fun.flip Option.bind bool_of_string_opt + |> Option.value ~default:true ) all_host_params in - (flag, string_of_bool (List.fold_left ( || ) false switches)) + (flag, string_of_bool switches) :: compute_additional_restrictions all_host_params rest (* Combine the host-level feature restrictions into pool-level ones, and write diff --git a/ocaml/xapi/qcow_tool_wrapper.ml b/ocaml/xapi/qcow_tool_wrapper.ml new file mode 100644 index 00000000000..30d0eb63811 --- /dev/null +++ b/ocaml/xapi/qcow_tool_wrapper.ml @@ -0,0 +1,59 @@ +(* + * Copyright (C) 2025 Vates. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module D = Debug.Make (struct let name = __MODULE__ end) + +open D + +let run_qcow_tool qcow_tool ?input_fd ?output_fd (_progress_cb : int -> unit) + (args : string list) = + info "Executing %s %s" qcow_tool (String.concat " " args) ; + let open Forkhelpers in + match + with_logfile_fd "qcow-tool" (fun log_fd -> + let pid = + safe_close_and_exec input_fd output_fd (Some log_fd) [] qcow_tool args + in + let _, status = waitpid pid in + if status <> Unix.WEXITED 0 then ( + error "qcow-tool failed, returning VDI_IO_ERROR" ; + raise + (Api_errors.Server_error + (Api_errors.vdi_io_error, ["Device I/O errors"]) + ) + ) + ) + with + | Success (out, _) -> + debug "qcow-tool successful export (%s)" out + | Failure (out, _e) -> + error "qcow-tool output: %s" out ; + raise (Api_errors.Server_error (Api_errors.vdi_io_error, [out])) + +let update_task_progress (__context : Context.t) (x : int) = + TaskHelper.set_progress ~__context (float_of_int x /. 100.) + +let receive (progress_cb : int -> unit) (unix_fd : Unix.file_descr) + (path : string) = + let args = [path] in + let qcow_tool = !Xapi_globs.qcow_stream_tool in + run_qcow_tool qcow_tool progress_cb args ~input_fd:unix_fd + +let send ?relative_to (progress_cb : int -> unit) (unix_fd : Unix.file_descr) + (path : string) (_size : Int64.t) = + let args = + [path] @ match relative_to with None -> [] | Some vdi -> ["--diff"; vdi] + in + let qcow_tool = !Xapi_globs.qcow_to_stdout in + run_qcow_tool qcow_tool progress_cb args ~output_fd:unix_fd diff --git a/ocaml/xapi/qcow_tool_wrapper.mli b/ocaml/xapi/qcow_tool_wrapper.mli new file mode 100644 index 00000000000..51c3c626567 --- /dev/null +++ b/ocaml/xapi/qcow_tool_wrapper.mli @@ -0,0 +1,25 @@ +(* + * Copyright (C) 2025 Vates. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +val update_task_progress : Context.t -> int -> unit + +val receive : (int -> unit) -> Unix.file_descr -> string -> unit + +val send : + ?relative_to:string + -> (int -> unit) + -> Unix.file_descr + -> string + -> int64 + -> unit diff --git a/ocaml/xapi/repository.ml b/ocaml/xapi/repository.ml index bf418ee8b03..ea87a715e17 100644 --- a/ocaml/xapi/repository.ml +++ b/ocaml/xapi/repository.ml @@ -33,6 +33,8 @@ let updates_in_cache : (API.ref_host, Yojson.Basic.t) Hashtbl.t = let introduce ~__context ~name_label ~name_description ~binary_url ~source_url ~update ~gpgkey_path = + assert_url_is_not_blocked ~url:binary_url ; + assert_url_is_not_blocked ~url:source_url ; assert_url_is_valid ~url:binary_url ; assert_url_is_valid ~url:source_url ; assert_gpgkey_path_is_valid gpgkey_path ; @@ -306,6 +308,8 @@ let sync ~__context ~self ~token ~token_id ~username ~password = with | Api_errors.Server_error (_, _) as e -> raise e + | Stunnel.Stunnel_verify_error reason -> + raise (Api_errors.Server_error (Api_errors.ssl_verify_error, [reason])) | e -> error "Failed to sync with remote YUM repository: %s" (ExnHelper.string_of_exn e) ; diff --git a/ocaml/xapi/repository_helpers.ml b/ocaml/xapi/repository_helpers.ml index 62df609c53a..91a3c1b4670 100644 --- a/ocaml/xapi/repository_helpers.ml +++ b/ocaml/xapi/repository_helpers.ml @@ -209,6 +209,23 @@ let assert_url_is_valid ~url = error "Invalid url %s: %s" url (ExnHelper.string_of_exn e) ; raise Api_errors.(Server_error (invalid_base_url, [url])) +let url_matches ~url (patterns : string list) : bool = + List.exists + (fun pattern -> + try + let re = Re.Perl.re pattern |> Re.compile in + Re.execp re url + with exn -> + error "Exception in %s: %s" __FUNCTION__ (Printexc.to_string exn) ; + false + ) + patterns + +let assert_url_is_not_blocked ~url = + let blocklist = !Xapi_globs.repository_url_blocklist in + if url_matches ~url blocklist then + raise Api_errors.(Server_error (blocked_repo_url, [url])) + let is_gpgkey_path_valid = function | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '-' -> true diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index 04aae674472..48789c455aa 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -53,9 +53,10 @@ let parameter_count_mismatch_failure func expected received = API.response_of_failure Api_errors.message_parameter_count_mismatch [func; expected; received] -(** WARNING: the context is destroyed when execution is finished if the task is not forwarded, in database and not called asynchronous. *) -let exec_with_context ~__context ~need_complete ?marshaller ?f_forward - ?(called_async = false) ?quiet f = +(** WARNING: DOES NOT DESTROY the context when execution is finished. The + caller must destroy it *) +let exec_with_context ~__context ~need_complete ?marshaller ?f_forward ?quiet f + = (* Execute fn f in specified __context, marshalling result with "marshaller" *) let exec () = (* NB: @@ -95,23 +96,15 @@ let exec_with_context ~__context ~need_complete ?marshaller ?f_forward if need_complete then TaskHelper.failed ~__context e ; raise e in - Locking_helpers.Thread_state.with_named_thread - (TaskHelper.get_name ~__context) (Context.get_task_id __context) (fun () -> - let client = Context.get_client __context in - Debug.with_thread_associated ?client ?quiet - (Context.string_of_task __context) - (fun () -> - (* CP-982: promote tracking debug line to info status *) - if called_async then - info "spawning a new thread to handle the current task%s" - (Context.trackid ~with_brackets:true ~prefix:" " __context) ; - Xapi_stdext_pervasives.Pervasiveext.finally exec (fun () -> - if not called_async then Context.destroy __context - (* else debug "nothing more to process for this thread" *) - ) - ) - () - ) + let@ () = + Locking_helpers.Thread_state.with_named_thread + (TaskHelper.get_name ~__context) + (Context.get_task_id __context) + in + let client = Context.get_client __context in + Debug.with_thread_associated ?client ?quiet + (Context.string_of_task __context) + exec () let dispatch_exn_wrapper f = try f () @@ -168,18 +161,22 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name let sync () = let need_complete = not (Context.forwarded_task __context) in - exec_with_context ~__context ~need_complete ~called_async - ?f_forward:forward_op ~marshaller op_fn + let@ __context = Context.finally_destroy_context ~__context in + exec_with_context ~__context ~need_complete ?f_forward:forward_op + ~marshaller op_fn |> marshaller |> Rpc.success in + let async ~need_complete = (* Fork thread in which to execute async call *) + info "spawning a new thread to handle the current task%s" + (Context.trackid ~with_brackets:true ~prefix:" " __context) ; ignore (Thread.create (fun () -> - exec_with_context ~__context ~need_complete ~called_async - ?f_forward:forward_op ~marshaller op_fn + exec_with_context ~__context ~need_complete ?f_forward:forward_op + ~marshaller op_fn ) () ) ; @@ -200,26 +197,27 @@ let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name (* in the following functions, it is our responsibility to complete any tasks we create *) let exec_with_new_task ?http_other_config ?quiet ?subtask_of ?session_id ?task_in_database ?task_description ?origin task_name f = - exec_with_context ?quiet - ~__context: - (Context.make ?http_other_config ?quiet ?subtask_of ?session_id - ?task_in_database ?task_description ?origin task_name - ) ~need_complete:true (fun ~__context -> f __context + let@ __context = + Context.with_context ?http_other_config ?quiet ?subtask_of ?session_id + ?task_in_database ?task_description ?origin task_name + in + exec_with_context ~__context ~need_complete:true (fun ~__context -> + f __context ) let exec_with_forwarded_task ?http_other_config ?session_id ?origin task_id f = - exec_with_context - ~__context: - (Context.from_forwarded_task ?http_other_config ?session_id ?origin - task_id - ) ~need_complete:true (fun ~__context -> f __context + let@ __context = + Context.with_forwarded_task ?http_other_config ?session_id ?origin task_id + in + exec_with_context ~__context ~need_complete:true (fun ~__context -> + f __context ) let exec_with_subtask ~__context ?task_in_database task_name f = - let subcontext = - Context.make_subcontext ~__context ?task_in_database task_name + let@ __context = + Context.with_subcontext ~__context ?task_in_database task_name in - exec_with_context ~__context:subcontext ~need_complete:true f + exec_with_context ~__context ~need_complete:true f let forward_extension ~__context rbac call = rbac __context (fun () -> Xapi_extensions.call_extension call) diff --git a/ocaml/xapi/sm_exec.ml b/ocaml/xapi/sm_exec.ml index 1da0c6c7e83..c4e2c46a1a9 100644 --- a/ocaml/xapi/sm_exec.ml +++ b/ocaml/xapi/sm_exec.ml @@ -582,6 +582,7 @@ let parse_sr_get_driver_info driver (xml : Xml.xml) = ; sr_driver_configuration= configuration ; sr_driver_text_features= text_features ; sr_driver_required_cluster_stack= [] + ; sr_smapi_version= SMAPIv1 } let sr_get_driver_info ~dbg driver = diff --git a/ocaml/xapi/smint.ml b/ocaml/xapi/smint.ml index b5c290afcb7..1b4e4d45e47 100644 --- a/ocaml/xapi/smint.ml +++ b/ocaml/xapi/smint.ml @@ -36,6 +36,7 @@ module Feature = struct | Sr_metadata | Sr_trim | Sr_multipath + | Sr_caching | Vdi_create | Vdi_delete | Vdi_attach @@ -75,6 +76,7 @@ module Feature = struct ; ("SR_METADATA", Sr_metadata) ; ("SR_TRIM", Sr_trim) ; ("SR_MULTIPATH", Sr_multipath) + ; ("SR_CACHING", Sr_caching) ; ("SR_STATS", Sr_stats) ; ("VDI_CREATE", Vdi_create) ; ("VDI_DELETE", Vdi_delete) @@ -130,7 +132,7 @@ module Feature = struct Some (feature, 1L) ) | feature :: _ -> - error "SM.feature: unknown feature %s" feature ; + warn "SM.feature: unknown feature %s" feature ; None (** [compat_features features1 features2] finds the compatible features in the input @@ -190,6 +192,7 @@ type sr_driver_info = { ; sr_driver_text_features: string list ; sr_driver_configuration: (string * string) list ; sr_driver_required_cluster_stack: string list + ; sr_smapi_version: Storage_interface.smapi_version } let query_result_of_sr_driver_info x = @@ -204,6 +207,7 @@ let query_result_of_sr_driver_info x = ; features= x.sr_driver_text_features ; configuration= x.sr_driver_configuration ; required_cluster_stack= x.sr_driver_required_cluster_stack + ; smapi_version= x.sr_smapi_version } type attach_info = { diff --git a/ocaml/xapi/sparse_dd_wrapper.ml b/ocaml/xapi/sparse_dd_wrapper.ml index 0195fc38884..4085933d330 100644 --- a/ocaml/xapi/sparse_dd_wrapper.ml +++ b/ocaml/xapi/sparse_dd_wrapper.ml @@ -92,10 +92,8 @@ let dd_internal progress_cb base prezeroed verify_cert ?(proto = None) infile match proto with | None -> [] - | Some (StreamCommon.Nbd export) -> + | Some (`NBD export) -> ["-dest-proto"; "nbd"; "-nbd-export"; export] - | Some p -> - ["-dest-proto"; StreamCommon.string_of_protocol p] in let verify_args = match verify_cert with diff --git a/ocaml/xapi/storage_access.ml b/ocaml/xapi/storage_access.ml index d38cab783b5..cda399e9d60 100644 --- a/ocaml/xapi/storage_access.ml +++ b/ocaml/xapi/storage_access.ml @@ -109,10 +109,15 @@ exception Message_switch_failure (** Synchronise the SM table with the SMAPIv1 plugins on the disk and the SMAPIv2 plugins mentioned in the configuration file whitelist. *) let on_xapi_start ~__context = + (* An SM is either implemented as a plugin - for which we check its + presence, or via an API *) + let is_available rc = + Sys.file_exists rc.API.sM_driver_filename + || Version.String.ge rc.sM_required_api_version "5.0" + in let existing = - List.map - (fun (rf, rc) -> (rc.API.sM_type, (rf, rc))) - (Db.SM.get_all_records ~__context) + Db.SM.get_all_records ~__context + |> List.map (fun (rf, rc) -> (rc.API.sM_type, (rf, rc))) in let explicitly_configured_drivers = List.filter_map @@ -156,6 +161,7 @@ let on_xapi_start ~__context = | Message_switch_failure -> [] (* no more logging *) | e -> + Backtrace.is_important e ; error "Unexpected error querying the message switch: %s" (Printexc.to_string e) ; Debug.log_backtrace e (Backtrace.get e) ; @@ -165,6 +171,9 @@ let on_xapi_start ~__context = in (* Add all the running SMAPIv2 drivers *) let to_keep = to_keep @ running_smapiv2_drivers in + let unavailable = + List.filter (fun (_, (_, rc)) -> not (is_available rc)) existing + in (* Delete all records which aren't configured or in-use *) List.iter (fun ty -> @@ -175,6 +184,13 @@ let on_xapi_start ~__context = try Db.SM.destroy ~__context ~self with _ -> () ) (Listext.List.set_difference (List.map fst existing) to_keep) ; + List.iter + (fun (name, (self, rc)) -> + info "%s: unregistering SM plugin %s (%s) since it is unavailable" + __FUNCTION__ name rc.API.sM_uuid ; + try Db.SM.destroy ~__context ~self with _ -> () + ) + unavailable ; (* Synchronize SMAPIv1 plugins *) @@ -264,7 +280,9 @@ let bind ~__context ~pbd = let service = make_service uuid ty in System_domains.register_service service queue_name ; let info = Client.Query.query dbg in - Storage_mux.register (Storage_interface.Sr.of_string sr_uuid) rpc uuid info ; + Storage_mux_reg.register + (Storage_interface.Sr.of_string sr_uuid) + rpc uuid info ; info with e -> error @@ -281,7 +299,7 @@ let unbind ~__context ~pbd = let ty = Db.SR.get_type ~__context ~self:sr in let sr = Db.SR.get_uuid ~__context ~self:sr in info "SR %s will nolonger be implemented by VM %s" sr (Ref.string_of driver) ; - Storage_mux.unregister (Storage_interface.Sr.of_string sr) ; + Storage_mux_reg.unregister (Storage_interface.Sr.of_string sr) ; let service = make_service uuid ty in System_domains.unregister_service service diff --git a/ocaml/xapi/storage_migrate.ml b/ocaml/xapi/storage_migrate.ml index 37ec703709a..1ff03c3d7ed 100644 --- a/ocaml/xapi/storage_migrate.ml +++ b/ocaml/xapi/storage_migrate.ml @@ -15,714 +15,118 @@ module D = Debug.Make (struct let name = "storage_migrate" end) open D - -(** As SXM is such a long running process, we dedicate this to log important - milestones during the SXM process *) -module SXM = Debug.Make (struct - let name = "SXM" -end) - module Listext = Xapi_stdext_std.Listext open Xapi_stdext_pervasives.Pervasiveext module Unixext = Xapi_stdext_unix.Unixext -open Xmlrpc_client open Storage_interface open Storage_task +open Storage_migrate_helper -module State = struct - module Receive_state = struct - type t = { - sr: Sr.t - ; dummy_vdi: Vdi.t - ; leaf_vdi: Vdi.t - ; leaf_dp: dp - ; parent_vdi: Vdi.t - ; remote_vdi: Vdi.t - ; mirror_vm: Vm.t - } - [@@deriving rpcty] - - let rpc_of_t = Rpcmarshal.marshal t.Rpc.Types.ty - - let t_of_rpc x = - match Rpcmarshal.unmarshal t.Rpc.Types.ty x with - | Ok y -> - y - | Error (`Msg m) -> - failwith (Printf.sprintf "Failed to unmarshal Receive_state.t: %s" m) - end - - module Send_state = struct - type remote_info = { - dp: dp - ; vdi: Vdi.t - ; url: string - ; verify_dest: bool [@default false] - } - [@@deriving rpcty] - - type tapdev = Tapctl.tapdev - - let typ_of_tapdev = - Rpc.Types.( - Abstract - { - aname= "tapdev" - ; test_data= [] - ; rpc_of= Tapctl.rpc_of_tapdev - ; of_rpc= (fun x -> Ok (Tapctl.tapdev_of_rpc x)) - } - ) - - type handle = Scheduler.handle - - let typ_of_handle = - Rpc.Types.( - Abstract - { - aname= "handle" - ; test_data= [] - ; rpc_of= Scheduler.rpc_of_handle - ; of_rpc= (fun x -> Ok (Scheduler.handle_of_rpc x)) - } - ) - - type t = { - url: string - ; dest_sr: Sr.t - ; remote_info: remote_info option - ; local_dp: dp - ; tapdev: tapdev option - ; mutable failed: bool - ; mutable watchdog: handle option - } - [@@deriving rpcty] - - let rpc_of_t = Rpcmarshal.marshal t.Rpc.Types.ty - - let t_of_rpc x = - match Rpcmarshal.unmarshal t.Rpc.Types.ty x with - | Ok y -> - y - | Error (`Msg m) -> - failwith (Printf.sprintf "Failed to unmarshal Send_state.t: %s" m) - end - - module Copy_state = struct - type t = { - base_dp: dp - ; leaf_dp: dp - ; remote_dp: dp - ; dest_sr: Sr.t - ; copy_vdi: Vdi.t - ; remote_url: string - ; verify_dest: bool [@default false] - } - [@@deriving rpcty] - - let rpc_of_t = Rpcmarshal.marshal t.Rpc.Types.ty - - let t_of_rpc x = - match Rpcmarshal.unmarshal t.Rpc.Types.ty x with - | Ok y -> - y - | Error (`Msg m) -> - failwith (Printf.sprintf "Failed to unmarshal Copy_state.t: %s" m) - end - - let loaded = ref false - - let mutex = Mutex.create () - - type send_table = (string, Send_state.t) Hashtbl.t - - type recv_table = (string, Receive_state.t) Hashtbl.t - - type copy_table = (string, Copy_state.t) Hashtbl.t - - type osend - - type orecv - - type ocopy - - type _ operation = - | Send_op : Send_state.t -> osend operation - | Recv_op : Receive_state.t -> orecv operation - | Copy_op : Copy_state.t -> ocopy operation - - type _ table = - | Send_table : send_table -> osend table - | Recv_table : recv_table -> orecv table - | Copy_table : copy_table -> ocopy table - - let active_send : send_table = Hashtbl.create 10 - - let active_recv : recv_table = Hashtbl.create 10 - - let active_copy : copy_table = Hashtbl.create 10 - - let table_of_op : type a. a operation -> a table = function - | Send_op _ -> - Send_table active_send - | Recv_op _ -> - Recv_table active_recv - | Copy_op _ -> - Copy_table active_copy - - let persist_root = ref "/var/run/nonpersistent" - - let path_of_table : type a. a table -> string = function - | Send_table _ -> - Filename.concat !persist_root "storage_mirrors_send.json" - | Recv_table _ -> - Filename.concat !persist_root "storage_mirrors_recv.json" - | Copy_table _ -> - Filename.concat !persist_root "storage_mirrors_copy.json" +module type SMAPIv2_MIRROR = Storage_interface.MIRROR - let rpc_of_table : type a. a table -> Rpc.t = - let open Rpc_std_helpers in - function - | Send_table send_table -> - rpc_of_hashtbl ~rpc_of:Send_state.rpc_of_t send_table - | Recv_table recv_table -> - rpc_of_hashtbl ~rpc_of:Receive_state.rpc_of_t recv_table - | Copy_table copy_table -> - rpc_of_hashtbl ~rpc_of:Copy_state.rpc_of_t copy_table +let s_of_sr = Storage_interface.Sr.string_of - let to_string : type a. a table -> string = - fun table -> rpc_of_table table |> Jsonrpc.to_string +let choose_backend dbg sr = + debug "%s dbg: %s choosing backend for sr :%s" __FUNCTION__ dbg (s_of_sr sr) ; + match Storage_mux_reg.smapi_version_of_sr sr with + | SMAPIv1 -> + (module Storage_smapiv1_migrate.MIRROR : SMAPIv2_MIRROR) + | SMAPIv3 -> + (module Storage_smapiv3_migrate.MIRROR : SMAPIv2_MIRROR) + | SMAPIv2 -> + (* this should never happen *) + failwith "unsupported SMAPI version smapiv2" - let rpc_of_path path = Unixext.string_of_file path |> Jsonrpc.of_string - - let load_one : type a. a table -> unit = - fun table -> - let rpc = path_of_table table |> rpc_of_path in - let open Rpc_std_helpers in - match table with - | Send_table table -> - Hashtbl.iter (Hashtbl.replace table) - (hashtbl_of_rpc ~of_rpc:Send_state.t_of_rpc rpc) - | Recv_table table -> - Hashtbl.iter (Hashtbl.replace table) - (hashtbl_of_rpc ~of_rpc:Receive_state.t_of_rpc rpc) - | Copy_table table -> - Hashtbl.iter (Hashtbl.replace table) - (hashtbl_of_rpc ~of_rpc:Copy_state.t_of_rpc rpc) - - let load () = - ignore_exn (fun () -> load_one (Send_table active_send)) ; - ignore_exn (fun () -> load_one (Recv_table active_recv)) ; - ignore_exn (fun () -> load_one (Copy_table active_copy)) ; - loaded := true - - let save_one : type a. a table -> unit = - fun table -> - to_string table |> Unixext.write_string_to_file (path_of_table table) - - let save () = - Unixext.mkdir_rec !persist_root 0o700 ; - save_one (Send_table active_send) ; - save_one (Recv_table active_recv) ; - save_one (Copy_table active_copy) - - let access_table ~save_after f table = - Xapi_stdext_threads.Threadext.Mutex.execute mutex (fun () -> - if not !loaded then load () ; - let result = f table in - if save_after then save () ; - result - ) - - let map_of () = - let contents_of table = - Hashtbl.fold (fun k v acc -> (k, v) :: acc) table [] - in - let send_ops = access_table ~save_after:false contents_of active_send in - let recv_ops = access_table ~save_after:false contents_of active_recv in - let copy_ops = access_table ~save_after:false contents_of active_copy in - (send_ops, recv_ops, copy_ops) - - let add : type a. string -> a operation -> unit = - fun id op -> - let add' : type a. string -> a operation -> a table -> unit = - fun id op table -> - match (table, op) with - | Send_table table, Send_op op -> - Hashtbl.replace table id op - | Recv_table table, Recv_op op -> - Hashtbl.replace table id op - | Copy_table table, Copy_op op -> - Hashtbl.replace table id op - in - access_table ~save_after:true - (fun table -> add' id op table) - (table_of_op op) - - let find id table = - access_table ~save_after:false - (fun table -> Hashtbl.find_opt table id) - table - - let remove id table = - access_table ~save_after:true (fun table -> Hashtbl.remove table id) table - - let clear () = - access_table ~save_after:true (fun table -> Hashtbl.clear table) active_send ; - access_table ~save_after:true (fun table -> Hashtbl.clear table) active_recv ; - access_table ~save_after:true (fun table -> Hashtbl.clear table) active_copy - - let remove_local_mirror id = remove id active_send - - let remove_receive_mirror id = remove id active_recv - - let remove_copy id = remove id active_copy - - let find_active_local_mirror id = find id active_send - - let find_active_receive_mirror id = find id active_recv - - let find_active_copy id = find id active_copy - - let mirror_id_of (sr, vdi) = - Printf.sprintf "%s/%s" - (Storage_interface.Sr.string_of sr) - (Storage_interface.Vdi.string_of vdi) - - let of_mirror_id id = - match String.split_on_char '/' id with - | sr :: rest -> - Storage_interface. - (Sr.of_string sr, Vdi.of_string (String.concat "/" rest)) - | _ -> - failwith "Bad id" - - let copy_id_of (sr, vdi) = - Printf.sprintf "copy/%s/%s" - (Storage_interface.Sr.string_of sr) - (Storage_interface.Vdi.string_of vdi) - - let of_copy_id id = - match String.split_on_char '/' id with - | op :: sr :: rest when op = "copy" -> - Storage_interface. - (Sr.of_string sr, Vdi.of_string (String.concat "/" rest)) - | _ -> - failwith "Bad id" +(** module [MigrateRemote] is similar to [MigrateLocal], but most of these functions +tend to be executed on the receiver side. *) +module MigrateRemote = struct + (** [receive_finalize3 dbg mirror_id sr url verify_dest] takes an [sr] parameter + which is the source sr and multiplexes based on the type of that *) + let receive_finalize3 ~dbg ~mirror_id ~sr ~url ~verify_dest = + let (module Migrate_Backend) = choose_backend dbg sr in + Migrate_Backend.receive_finalize3 () ~dbg ~mirror_id ~sr ~url ~verify_dest + + let receive_cancel2 ~dbg ~mirror_id ~sr ~url ~verify_dest = + let (module Migrate_Backend) = choose_backend dbg sr in + Migrate_Backend.receive_cancel2 () ~dbg ~mirror_id ~url ~verify_dest end -let vdi_info x = - match x with - | Some (Vdi_info v) -> - v - | _ -> - failwith "Runtime type error: expecting Vdi_info" - -module Local = StorageAPI (Idl.Exn.GenClient (struct - let rpc call = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"smapiv2" - (Storage_utils.localhost_connection_args ()) - call -end)) - -let tapdisk_of_attach_info (backend : Storage_interface.backend) = - let _, blockdevices, _, nbds = - Storage_interface.implementations_of_backend backend - in - match (blockdevices, nbds) with - | blockdevice :: _, _ -> ( - let path = blockdevice.Storage_interface.path in - try - match Tapctl.of_device (Tapctl.create ()) path with - | tapdev, _, _ -> - Some tapdev - with - | Tapctl.Not_blktap -> - debug "Device %s is not controlled by blktap" path ; - None - | Tapctl.Not_a_device -> - debug "%s is not a device" path ; - None - | _ -> - debug "Device %s has an unknown driver" path ; - None - ) - | _, nbd :: _ -> ( - try - let path, _ = Storage_interface.parse_nbd_uri nbd in - let filename = Unix.realpath path |> Filename.basename in - Scanf.sscanf filename "nbd%d.%d" (fun pid minor -> - Some (Tapctl.tapdev_of ~pid ~minor) - ) - with _ -> - debug "No tapdisk found for NBD backend: %s" nbd.Storage_interface.uri ; - None - ) - | _ -> - debug "No tapdisk found for backend: %s" - (Storage_interface.(rpc_of backend) backend |> Rpc.to_string) ; - None - -let with_activated_disk ~dbg ~sr ~vdi ~dp ~vm f = - let attached_vdi = - Option.map - (fun vdi -> - let backend = Local.VDI.attach3 dbg dp sr vdi vm false in - (vdi, backend) - ) - vdi - in - finally - (fun () -> - let path_and_nbd = - Option.map - (fun (vdi, backend) -> - let _xendisks, blockdevs, files, nbds = - Storage_interface.implementations_of_backend backend - in - match (files, blockdevs, nbds) with - | {path} :: _, _, _ | _, {path} :: _, _ -> - Local.VDI.activate3 dbg dp sr vdi vm ; - (path, false) - | _, _, nbd :: _ -> - Local.VDI.activate3 dbg dp sr vdi vm ; - let unix_socket_path, export_name = - Storage_interface.parse_nbd_uri nbd - in - ( Attach_helpers.NbdClient.start_nbd_client ~unix_socket_path - ~export_name - , true - ) - | [], [], [] -> - raise - (Storage_interface.Storage_error - (Backend_error - ( Api_errors.internal_error - , [ - "No File, BlockDevice or Nbd implementation in \ - Datapath.attach response: " - ^ (Storage_interface.(rpc_of backend) backend - |> Jsonrpc.to_string - ) - ] - ) - ) - ) - ) - attached_vdi - in - finally - (fun () -> f (Option.map (function path, _ -> path) path_and_nbd)) - (fun () -> - Option.iter - (function - | path, true -> - Attach_helpers.NbdClient.stop_nbd_client ~nbd_device:path - | _ -> - () - ) - path_and_nbd ; - Option.iter (fun vdi -> Local.VDI.deactivate dbg dp sr vdi vm) vdi - ) - ) - (fun () -> - Option.iter - (fun (vdi, _) -> Local.VDI.detach dbg dp sr vdi vm) - attached_vdi - ) - -let perform_cleanup_actions = - List.iter (fun f -> - try f () - with e -> - error "Caught %s while performing cleanup actions" (Printexc.to_string e) - ) - -let progress_callback start len t y = - let new_progress = start +. (y *. len) in - Storage_task.set_state t (Task.Pending new_progress) ; - signal (Storage_task.id_of_handle t) - -let remove_from_sm_config vdi_info key = - { - vdi_info with - sm_config= List.filter (fun (k, _) -> k <> key) vdi_info.sm_config - } - -let add_to_sm_config vdi_info key value = - let vdi_info = remove_from_sm_config vdi_info key in - {vdi_info with sm_config= (key, value) :: vdi_info.sm_config} - (** This module [MigrateLocal] consists of the concrete implementations of the migration part of SMAPI. Functions inside this module are sender driven, which means they tend to be executed on the sender side. although there is not a hard rule on what is executed on the sender side, this provides some heuristics. *) module MigrateLocal = struct - (** [copy_into_vdi] is similar to [copy_into_sr] but requires a [dest_vdi] parameter *) - let copy_into_vdi ~task ~dbg ~sr ~vdi ~vm ~url ~dest ~dest_vdi ~verify_dest = - let remote_url = Storage_utils.connection_args_of_uri ~verify_dest url in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url - end)) in - debug "copy local=%s/%s url=%s remote=%s/%s verify_dest=%B" - (Storage_interface.Sr.string_of sr) - (Storage_interface.Vdi.string_of vdi) - url - (Storage_interface.Sr.string_of dest) - (Storage_interface.Vdi.string_of dest_vdi) - verify_dest ; - (* Check the remote SR exists *) - let srs = Remote.SR.list dbg in - if not (List.mem dest srs) then - failwith - (Printf.sprintf "Remote SR %s not found" - (Storage_interface.Sr.string_of dest) - ) ; - let vdis = Remote.SR.scan dbg dest in - let remote_vdi = - try List.find (fun x -> x.vdi = dest_vdi) vdis - with Not_found -> - failwith - (Printf.sprintf "Remote VDI %s not found" - (Storage_interface.Vdi.string_of dest_vdi) - ) - in - let dest_content_id = remote_vdi.content_id in + let stop_internal ~dbg ~id = (* Find the local VDI *) - let vdis = Local.SR.scan dbg sr in - let local_vdi = - try List.find (fun x -> x.vdi = vdi) vdis - with Not_found -> - failwith - (Printf.sprintf "Local VDI %s not found" - (Storage_interface.Vdi.string_of vdi) - ) - in - debug "copy local content_id=%s" local_vdi.content_id ; - debug "copy remote content_id=%s" dest_content_id ; - if local_vdi.virtual_size > remote_vdi.virtual_size then ( - (* This should never happen provided the higher-level logic is working properly *) - error "copy local virtual_size=%Ld > remote virtual_size = %Ld" - local_vdi.virtual_size remote_vdi.virtual_size ; - failwith "local VDI is larger than the remote VDI" - ) ; - let on_fail : (unit -> unit) list ref = ref [] in - let base_vdi = - try - let x = - (List.find (fun x -> x.content_id = dest_content_id) vdis).vdi - in - debug - "local VDI has content_id = %s; we will perform an incremental copy" - dest_content_id ; - Some x - with _ -> - debug "no local VDI has content_id = %s; we will perform a full copy" - dest_content_id ; - None - in - try - let remote_dp = Uuidx.(to_string (make ())) in - let base_dp = Uuidx.(to_string (make ())) in - let leaf_dp = Uuidx.(to_string (make ())) in - let dest_vdi_url = - let url' = Http.Url.of_string url in - Http.Url.set_uri url' - (Printf.sprintf "%s/nbdproxy/%s/%s/%s/%s" (Http.Url.get_uri url') - (Storage_interface.Vm.string_of vm) - (Storage_interface.Sr.string_of dest) - (Storage_interface.Vdi.string_of dest_vdi) - remote_dp - ) - |> Http.Url.to_string - in - debug "%s copy remote NBD URL = %s" __FUNCTION__ dest_vdi_url ; - let id = State.copy_id_of (sr, vdi) in - debug "Persisting state for copy (id=%s)" id ; - State.add id - State.( - Copy_op - Copy_state. - { - base_dp - ; leaf_dp - ; remote_dp - ; dest_sr= dest - ; copy_vdi= remote_vdi.vdi - ; remote_url= url - ; verify_dest - } - ) ; - SXM.info "%s: copy initiated local_vdi:%s dest_vdi:%s" __FUNCTION__ - (Storage_interface.Vdi.string_of vdi) - (Storage_interface.Vdi.string_of dest_vdi) ; - finally - (fun () -> - debug "activating RW datapath %s on remote" remote_dp ; - let backend = - Remote.VDI.attach3 dbg remote_dp dest dest_vdi vm true - in - let _, _, _, nbds = - Storage_interface.implementations_of_backend backend - in - let proto = - match nbds with - | [] -> - None - | uri :: _ -> - let _socket, export = Storage_interface.parse_nbd_uri uri in - Some (StreamCommon.Nbd export) - in - Remote.VDI.activate3 dbg remote_dp dest dest_vdi vm ; - with_activated_disk ~dbg ~sr ~vdi:base_vdi ~dp:base_dp ~vm - (fun base_path -> - with_activated_disk ~dbg ~sr ~vdi:(Some vdi) ~dp:leaf_dp ~vm - (fun src -> - let verify_cert = - if verify_dest then Stunnel_client.pool () else None - in - let dd = - Sparse_dd_wrapper.start - ~progress_cb:(progress_callback 0.05 0.9 task) - ~verify_cert ~proto ?base:base_path true (Option.get src) - dest_vdi_url remote_vdi.virtual_size - in - Storage_task.with_cancel task - (fun () -> Sparse_dd_wrapper.cancel dd) - (fun () -> - try Sparse_dd_wrapper.wait dd - with Sparse_dd_wrapper.Cancelled -> - Storage_task.raise_cancelled task - ) - ) + let alm = State.find_active_local_mirror id in + match alm with + | Some alm -> + ( match alm.State.Send_state.remote_info with + | Some remote_info -> ( + let sr, vdi = State.of_mirror_id id in + let vdis = Local.SR.scan dbg sr in + let local_vdi = + match List.find_opt (fun x -> x.vdi = vdi) vdis with + | None -> + failwith_fmt "Local VDI %s not found" + (Storage_interface.Vdi.string_of vdi) + | Some v -> + v + in + let local_vdi = add_to_sm_config local_vdi "mirror" "null" in + let local_vdi = remove_from_sm_config local_vdi "base_mirror" in + (* Disable mirroring on the local machine *) + let snapshot = Local.VDI.snapshot dbg sr local_vdi in + Local.VDI.destroy dbg sr snapshot.vdi ; + (* Destroy the snapshot, if it still exists *) + let snap = + List.find_opt + (fun x -> List.assoc_opt "base_mirror" x.sm_config = Some id) + vdis + in + ( match snap with + | Some s -> + debug "Found snapshot VDI: %s" + (Storage_interface.Vdi.string_of s.vdi) ; + Local.VDI.destroy dbg sr s.vdi + | None -> + debug "Snapshot VDI already cleaned up" + ) ; + try + MigrateRemote.receive_cancel2 ~dbg ~mirror_id:id ~sr + ~url:remote_info.url ~verify_dest:remote_info.verify_dest + with _ -> () ) - ) - (fun () -> - Remote.DP.destroy dbg remote_dp false ; - State.remove_copy id + | None -> + () ) ; - SXM.info "%s: copy complete for local_vdi:%s dest_vdi:%s" __FUNCTION__ - (Storage_interface.Vdi.string_of vdi) - (Storage_interface.Vdi.string_of dest_vdi) ; - debug "setting remote content_id <- %s" local_vdi.content_id ; - Remote.VDI.set_content_id dbg dest dest_vdi local_vdi.content_id ; - (* PR-1255: XXX: this is useful because we don't have content_ids by default *) - debug "setting local content_id <- %s" local_vdi.content_id ; - Local.VDI.set_content_id dbg sr local_vdi.vdi local_vdi.content_id ; - Some (Vdi_info remote_vdi) - with e -> - error "Caught %s: performing cleanup actions" (Printexc.to_string e) ; - perform_cleanup_actions !on_fail ; - raise e + State.remove_local_mirror id + | None -> + raise (Storage_interface.Storage_error (Does_not_exist ("mirror", id))) - (** [copy_into_sr] does not requires a dest vdi to be provided, instead, it will - find the nearest vdi on the [dest] sr, and if there is no such vdi, it will - create one. *) - let copy_into_sr ~task ~dbg ~sr ~vdi ~vm ~url ~dest ~verify_dest = - debug "copy sr:%s vdi:%s url:%s dest:%s verify_dest:%B" - (Storage_interface.Sr.string_of sr) - (Storage_interface.Vdi.string_of vdi) - url - (Storage_interface.Sr.string_of dest) - verify_dest ; - let remote_url = Storage_utils.connection_args_of_uri ~verify_dest url in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url - end)) in - (* Find the local VDI *) - try - let vdis = Local.SR.scan dbg sr in - let local_vdi = - try List.find (fun x -> x.vdi = vdi) vdis - with Not_found -> failwith (Printf.sprintf "Local VDI not found") - in - try - let similar_vdis = Local.VDI.similar_content dbg sr vdi in - let similars = List.map (fun vdi -> vdi.content_id) similar_vdis in - debug "Similar VDIs = [ %s ]" - (String.concat "; " - (List.map - (fun x -> - Printf.sprintf "(vdi=%s,content_id=%s)" - (Storage_interface.Vdi.string_of x.vdi) - x.content_id - ) - similar_vdis - ) - ) ; - let remote_vdis = Remote.SR.scan dbg dest in - (* We drop cbt_metadata VDIs that do not have any actual data *) - let remote_vdis = - List.filter (fun vdi -> vdi.ty <> "cbt_metadata") remote_vdis - in - let nearest = - List.fold_left - (fun acc content_id -> - match acc with - | Some _ -> - acc - | None -> ( - try - Some - (List.find - (fun vdi -> - vdi.content_id = content_id - && vdi.virtual_size <= local_vdi.virtual_size - ) - remote_vdis - ) - with Not_found -> None - ) - ) - None similars - in - debug "Nearest VDI: content_id=%s vdi=%s" - (Option.fold ~none:"None" ~some:(fun x -> x.content_id) nearest) - (Option.fold ~none:"None" - ~some:(fun x -> Storage_interface.Vdi.string_of x.vdi) - nearest - ) ; - let remote_base = - match nearest with - | Some vdi -> - debug "Cloning VDI" ; - let vdi_clone = Remote.VDI.clone dbg dest vdi in - debug "Clone: %s" (Storage_interface.Vdi.string_of vdi_clone.vdi) ; - ( if vdi_clone.virtual_size <> local_vdi.virtual_size then - let new_size = - Remote.VDI.resize dbg dest vdi_clone.vdi - local_vdi.virtual_size - in - debug "Resize remote clone VDI to %Ld: result %Ld" - local_vdi.virtual_size new_size - ) ; - vdi_clone - | None -> - debug "Creating a blank remote VDI" ; - Remote.VDI.create dbg dest {local_vdi with sm_config= []} - in - let remote_copy = - copy_into_vdi ~task ~dbg ~sr ~vdi ~vm ~url ~dest - ~dest_vdi:remote_base.vdi ~verify_dest - |> vdi_info - in - let snapshot = Remote.VDI.snapshot dbg dest remote_copy in - Remote.VDI.destroy dbg dest remote_copy.vdi ; - Some (Vdi_info snapshot) - with e -> - error "Caught %s: copying snapshots vdi" (Printexc.to_string e) ; - raise (Storage_error (Internal_error (Printexc.to_string e))) - with + let stop ~dbg ~id = + try stop_internal ~dbg ~id with | Storage_error (Backend_error (code, params)) | Api_errors.Server_error (code, params) -> raise (Storage_error (Backend_error (code, params))) | e -> - raise (Storage_error (Internal_error (Printexc.to_string e))) + raise e + + let prepare ~dbg ~sr ~vdi ~dest ~local_vdi ~mirror_id ~mirror_vm ~url + ~verify_dest = + try + let (module Migrate_Backend) = choose_backend dbg sr in + let similars = similar_vdis ~dbg ~sr ~vdi in + Migrate_Backend.receive_start3 () ~dbg ~sr:dest ~vdi_info:local_vdi + ~mirror_id ~similar:similars ~vm:mirror_vm ~url ~verify_dest + with e -> + error "%s Caught error %s while preparing for SXM" __FUNCTION__ + (Printexc.to_string e) ; + raise + (Storage_error (Migration_preparation_failure (Printexc.to_string e))) - let start ~task ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest ~verify_dest - = + let start ~task_id ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~live_vm ~url ~dest + ~verify_dest = SXM.info "%s sr:%s vdi:%s dp: %s mirror_vm: %s copy_vm: %s url:%s dest:%s \ verify_dest:%B" @@ -735,18 +139,10 @@ module MigrateLocal = struct url (Storage_interface.Sr.string_of dest) verify_dest ; - let remote_url = Http.Url.of_string url in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" - (Storage_utils.connection_args_of_uri ~verify_dest url) - end)) in + + let (module Remote) = get_remote_backend url verify_dest in (* Find the local VDI *) - let vdis = Local.SR.scan dbg sr in - let local_vdi = - try List.find (fun x -> x.vdi = vdi) vdis - with Not_found -> failwith "Local VDI not found" - in + let local_vdi, _ = find_vdi ~dbg ~sr ~vdi (module Local) in let mirror_id = State.mirror_id_of (sr, local_vdi.vdi) in debug "%s: Adding to active local mirrors before sending: id=%s" __FUNCTION__ mirror_id ; @@ -760,268 +156,64 @@ module MigrateLocal = struct ; tapdev= None ; failed= false ; watchdog= None + ; live_vm + ; vdi + ; mirror_key= None } in State.add mirror_id (State.Send_op alm) ; debug "%s Added mirror %s to active local mirrors" __FUNCTION__ mirror_id ; (* A list of cleanup actions to perform if the operation should fail. *) - let on_fail : (unit -> unit) list ref = ref [] in + let (module Migrate_Backend) = choose_backend dbg sr in try - let similar_vdis = Local.VDI.similar_content dbg sr vdi in - let similars = - List.filter - (fun x -> x <> "") - (List.map (fun vdi -> vdi.content_id) similar_vdis) - in - debug "Similar VDIs to = [ %s ]" - (String.concat "; " - (List.map - (fun x -> - Printf.sprintf "(vdi=%s,content_id=%s)" - (Storage_interface.Vdi.string_of x.vdi) - x.content_id - ) - similar_vdis - ) - ) ; - let (Mirror.Vhd_mirror result) = - Remote.DATA.MIRROR.receive_start2 dbg dest local_vdi mirror_id similars - mirror_vm + let remote_mirror = + prepare ~dbg ~sr ~vdi ~dest ~local_vdi ~mirror_id ~mirror_vm ~url + ~verify_dest in - (* Enable mirroring on the local machine *) - let mirror_dp = result.Mirror.mirror_datapath in - let uri = - Printf.sprintf "/services/SM/nbd/%s/%s/%s/%s" - (Storage_interface.Vm.string_of mirror_vm) - (Storage_interface.Sr.string_of dest) - (Storage_interface.Vdi.string_of result.Mirror.mirror_vdi.vdi) - mirror_dp - in - debug "%s: uri of http request for mirroring is %s" __FUNCTION__ uri ; - let dest_url = Http.Url.set_uri remote_url uri in - let request = - Http.Request.make - ~query:(Http.Url.get_query_params dest_url) - ~version:"1.0" ~user_agent:"smapiv2" Http.Put uri - in - let verify_cert = if verify_dest then Stunnel_client.pool () else None in - let transport = Xmlrpc_client.transport_of_url ~verify_cert dest_url in - debug "Searching for data path: %s" dp ; - let attach_info = Local.DP.attach_info dbg sr vdi dp mirror_vm in - on_fail := - (fun () -> Remote.DATA.MIRROR.receive_cancel dbg mirror_id) :: !on_fail ; - let tapdev = - match tapdisk_of_attach_info attach_info with - | Some tapdev -> - let pid = Tapctl.get_tapdisk_pid tapdev in - let path = - Printf.sprintf "/var/run/blktap-control/nbdclient%d" pid - in - with_transport ~stunnel_wait_disconnect:false transport - (with_http request (fun (_response, s) -> - let control_fd = - Unix.socket Unix.PF_UNIX Unix.SOCK_STREAM 0 - in - finally - (fun () -> - Unix.connect control_fd (Unix.ADDR_UNIX path) ; - let msg = dp in - let len = String.length msg in - let written = - Unixext.send_fd_substring control_fd msg 0 len [] s - in - if written <> len then ( - error "Failed to transfer fd to %s" path ; - failwith "Internal error transferring fd to tapdisk" - ) - ) - (fun () -> Unix.close control_fd) - ) - ) ; - tapdev - | None -> - failwith "Not attached" - in - debug "%s Updating active local mirrors: id=%s" __FUNCTION__ mirror_id ; - let alm = - State.Send_state. - { - url - ; dest_sr= dest - ; remote_info= - Some - { - dp= mirror_dp - ; vdi= result.Mirror.mirror_vdi.vdi - ; url - ; verify_dest - } - ; local_dp= dp - ; tapdev= Some tapdev - ; failed= false - ; watchdog= None - } - in - - State.add mirror_id (State.Send_op alm) ; - debug "%s Updated mirror_id %s in the active local mirror" __FUNCTION__ - mirror_id ; - - SXM.info "%s About to snapshot VDI = %s" __FUNCTION__ - (string_of_vdi_info local_vdi) ; - let local_vdi = add_to_sm_config local_vdi "mirror" ("nbd:" ^ dp) in - let local_vdi = add_to_sm_config local_vdi "base_mirror" mirror_id in - let snapshot = - try Local.VDI.snapshot dbg sr local_vdi with - | Storage_interface.Storage_error (Backend_error (code, _)) - when code = "SR_BACKEND_FAILURE_44" -> - raise - (Api_errors.Server_error - ( Api_errors.sr_source_space_insufficient - , [Storage_interface.Sr.string_of sr] - ) - ) - | e -> - raise e - in - SXM.info "%s: snapshot created, mirror initiated vdi:%s snapshot_of:%s" - __FUNCTION__ - (Storage_interface.Vdi.string_of snapshot.vdi) - (Storage_interface.Vdi.string_of local_vdi.vdi) ; - on_fail := (fun () -> Local.VDI.destroy dbg sr snapshot.vdi) :: !on_fail ; - (let rec inner () = - let alm_opt = State.find_active_local_mirror mirror_id in - match alm_opt with - | Some alm -> - let stats = Tapctl.stats (Tapctl.create ()) tapdev in - if stats.Tapctl.Stats.nbd_mirror_failed = 1 then ( - error "Tapdisk mirroring has failed" ; - Updates.add (Dynamic.Mirror mirror_id) updates - ) ; - alm.State.Send_state.watchdog <- - Some - (Scheduler.one_shot scheduler (Scheduler.Delta 5) - "tapdisk_watchdog" inner - ) - | None -> - () - in - inner () - ) ; - on_fail := (fun () -> Local.DATA.MIRROR.stop dbg mirror_id) :: !on_fail ; - (* Copy the snapshot to the remote *) - let new_parent = - Storage_task.with_subtask task "copy" (fun () -> - copy_into_vdi ~task ~dbg ~sr ~vdi:snapshot.vdi ~vm:copy_vm ~url - ~dest ~dest_vdi:result.Mirror.copy_diffs_to ~verify_dest - ) - |> vdi_info - in - debug "Local VDI %s = remote VDI %s" - (Storage_interface.Vdi.string_of snapshot.vdi) - (Storage_interface.Vdi.string_of new_parent.vdi) ; - debug "Local VDI %s now mirrored to remote VDI: %s" - (Storage_interface.Vdi.string_of local_vdi.vdi) - (Storage_interface.Vdi.string_of result.Mirror.mirror_vdi.vdi) ; - debug "Destroying snapshot on src" ; - Local.VDI.destroy dbg sr snapshot.vdi ; + Migrate_Backend.send_start () ~dbg ~task_id ~dp ~sr ~vdi ~mirror_vm + ~mirror_id ~local_vdi ~copy_vm ~live_vm ~url ~remote_mirror + ~dest_sr:dest ~verify_dest ; Some (Mirror_id mirror_id) with | Storage_error (Sr_not_attached sr_uuid) -> error " Caught exception %s:%s. Performing cleanup." Api_errors.sr_not_attached sr_uuid ; - perform_cleanup_actions !on_fail ; raise (Api_errors.Server_error (Api_errors.sr_not_attached, [sr_uuid])) + | ( Storage_error (Migration_mirror_fd_failure reason) + | Storage_error (Migration_mirror_snapshot_failure reason) ) as e -> + error "%s: Caught %s: during SMAPIv1 storage migration mirror " + __FUNCTION__ reason ; + MigrateRemote.receive_cancel2 ~dbg ~mirror_id ~sr ~url ~verify_dest ; + raise e + | Storage_error (Migration_mirror_failure reason) as e -> + error "%s: Caught :%s: during SMAPIv3 storage migration mirror" + __FUNCTION__ reason ; + MigrateRemote.receive_cancel2 ~dbg ~mirror_id ~sr ~url ~verify_dest ; + raise e + | Storage_error (Migration_mirror_copy_failure reason) as e -> + error "%s: Caught %s: during storage migration copy" __FUNCTION__ reason ; + stop ~dbg ~id:mirror_id ; + raise e | e -> - error "Caught %s: performing cleanup actions" (Api_errors.to_string e) ; - perform_cleanup_actions !on_fail ; + error "Caught %s during SXM: " (Api_errors.to_string e) ; + stop ~dbg ~id:mirror_id ; raise e - let stop ~dbg ~id = - (* Find the local VDI *) - let alm = State.find_active_local_mirror id in - match alm with - | Some alm -> - ( match alm.State.Send_state.remote_info with - | Some remote_info -> ( - let sr, vdi = State.of_mirror_id id in - let vdis = Local.SR.scan dbg sr in - let local_vdi = - try List.find (fun x -> x.vdi = vdi) vdis - with Not_found -> - failwith - (Printf.sprintf "Local VDI %s not found" - (Storage_interface.Vdi.string_of vdi) - ) - in - let local_vdi = add_to_sm_config local_vdi "mirror" "null" in - let local_vdi = remove_from_sm_config local_vdi "base_mirror" in - (* Disable mirroring on the local machine *) - let snapshot = Local.VDI.snapshot dbg sr local_vdi in - Local.VDI.destroy dbg sr snapshot.vdi ; - (* Destroy the snapshot, if it still exists *) - let snap = - try - Some - (List.find - (fun x -> - List.mem_assoc "base_mirror" x.sm_config - && List.assoc "base_mirror" x.sm_config = id - ) - vdis - ) - with _ -> None - in - ( match snap with - | Some s -> - debug "Found snapshot VDI: %s" - (Storage_interface.Vdi.string_of s.vdi) ; - Local.VDI.destroy dbg sr s.vdi - | None -> - debug "Snapshot VDI already cleaned up" - ) ; - let remote_url = - Storage_utils.connection_args_of_uri - ~verify_dest:remote_info.State.Send_state.verify_dest - remote_info.State.Send_state.url - in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" - remote_url - end)) in - try Remote.DATA.MIRROR.receive_cancel dbg id with _ -> () - ) - | None -> - () - ) ; - State.remove_local_mirror id - | None -> - raise (Storage_interface.Storage_error (Does_not_exist ("mirror", id))) - - let stat ~dbg:_ ~id = + let stat ~dbg ~id = let recv_opt = State.find_active_receive_mirror id in let send_opt = State.find_active_local_mirror id in let copy_opt = State.find_active_copy id in + let sr, _vdi = State.of_mirror_id id in let open State in let failed = match send_opt with | Some send_state -> + let (module Migrate_Backend) = choose_backend dbg sr in let failed = - match send_state.Send_state.tapdev with - | Some tapdev -> ( - try - let stats = Tapctl.stats (Tapctl.create ()) tapdev in - stats.Tapctl.Stats.nbd_mirror_failed = 1 - with _ -> - debug "Using cached copy of failure status" ; - send_state.Send_state.failed - ) - | None -> - false + Migrate_Backend.has_mirror_failed () ~dbg ~mirror_id:id ~sr in - send_state.Send_state.failed <- failed ; + send_state.failed <- failed ; failed | None -> false @@ -1078,7 +270,7 @@ module MigrateLocal = struct ) send_ops ; List.iter - (fun (id, copy_state) -> + (fun (id, (copy_state : State.Copy_state.t)) -> debug "Copy in progress: %s" id ; List.iter log_and_ignore_exn [ @@ -1089,15 +281,9 @@ module MigrateLocal = struct Local.DP.destroy dbg copy_state.State.Copy_state.base_dp true ) ] ; - let remote_url = - Storage_utils.connection_args_of_uri - ~verify_dest:copy_state.State.Copy_state.verify_dest - copy_state.State.Copy_state.remote_url + let (module Remote) = + get_remote_backend copy_state.remote_url copy_state.verify_dest in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url - end)) in List.iter log_and_ignore_exn [ (fun () -> @@ -1111,214 +297,21 @@ module MigrateLocal = struct ) copy_ops ; List.iter - (fun (id, _recv_state) -> - debug "Receive in progress: %s" id ; - log_and_ignore_exn (fun () -> Local.DATA.MIRROR.receive_cancel dbg id) + (fun (mirror_id, (recv_state : State.Receive_state.t)) -> + let sr, _vdi = State.of_mirror_id mirror_id in + debug "Receive in progress: %s" mirror_id ; + log_and_ignore_exn (fun () -> + MigrateRemote.receive_cancel2 ~dbg ~mirror_id ~sr + ~url:recv_state.url ~verify_dest:recv_state.verify_dest + ) ) recv_ops ; State.clear () end -(** module [MigrateRemote] is similar to [MigrateLocal], but most of these functions -tend to be executed on the receiver side. *) -module MigrateRemote = struct - let receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm = - let on_fail : (unit -> unit) list ref = ref [] in - let vdis = Local.SR.scan dbg sr in - (* We drop cbt_metadata VDIs that do not have any actual data *) - let vdis = List.filter (fun vdi -> vdi.ty <> "cbt_metadata") vdis in - let leaf_dp = Local.DP.create dbg Uuidx.(to_string (make ())) in - try - let vdi_info = {vdi_info with sm_config= [("base_mirror", id)]} in - let leaf = Local.VDI.create dbg sr vdi_info in - info "Created leaf VDI for mirror receive: %s" (string_of_vdi_info leaf) ; - on_fail := (fun () -> Local.VDI.destroy dbg sr leaf.vdi) :: !on_fail ; - (* dummy VDI is created so that the leaf VDI becomes a differencing disk, - useful for calling VDI.compose later on *) - let dummy = Local.VDI.snapshot dbg sr leaf in - on_fail := (fun () -> Local.VDI.destroy dbg sr dummy.vdi) :: !on_fail ; - debug "%s Created dummy snapshot for mirror receive: %s" __FUNCTION__ - (string_of_vdi_info dummy) ; - let _ : backend = Local.VDI.attach3 dbg leaf_dp sr leaf.vdi vm true in - Local.VDI.activate3 dbg leaf_dp sr leaf.vdi vm ; - let nearest = - List.fold_left - (fun acc content_id -> - match acc with - | Some _ -> - acc - | None -> ( - try - Some - (List.find - (fun vdi -> - vdi.content_id = content_id - && vdi.virtual_size <= vdi_info.virtual_size - ) - vdis - ) - with Not_found -> None - ) - ) - None similar - in - debug "Nearest VDI: content_id=%s vdi=%s" - (Option.fold ~none:"None" ~some:(fun x -> x.content_id) nearest) - (Option.fold ~none:"None" - ~some:(fun x -> Storage_interface.Vdi.string_of x.vdi) - nearest - ) ; - let parent = - match nearest with - | Some vdi -> - debug "Cloning VDI" ; - let vdi = add_to_sm_config vdi "base_mirror" id in - let vdi_clone = Local.VDI.clone dbg sr vdi in - debug "Clone: %s" (Storage_interface.Vdi.string_of vdi_clone.vdi) ; - ( if vdi_clone.virtual_size <> vdi_info.virtual_size then - let new_size = - Local.VDI.resize dbg sr vdi_clone.vdi vdi_info.virtual_size - in - debug "Resize local clone VDI to %Ld: result %Ld" - vdi_info.virtual_size new_size - ) ; - vdi_clone - | None -> - debug "Creating a blank remote VDI" ; - Local.VDI.create dbg sr vdi_info - in - debug "Parent disk content_id=%s" parent.content_id ; - State.add id - State.( - Recv_op - Receive_state. - { - sr - ; dummy_vdi= dummy.vdi - ; leaf_vdi= leaf.vdi - ; leaf_dp - ; parent_vdi= parent.vdi - ; remote_vdi= vdi_info.vdi - ; mirror_vm= vm - } - ) ; - let nearest_content_id = Option.map (fun x -> x.content_id) nearest in - Mirror.Vhd_mirror - { - Mirror.mirror_vdi= leaf - ; mirror_datapath= leaf_dp - ; copy_diffs_from= nearest_content_id - ; copy_diffs_to= parent.vdi - ; dummy_vdi= dummy.vdi - } - with e -> - List.iter - (fun op -> - try op () - with e -> - debug "Caught exception in on_fail: %s" (Printexc.to_string e) - ) - !on_fail ; - raise e - - let receive_start ~dbg ~sr ~vdi_info ~id ~similar = - receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm:(Vm.of_string "0") - - let receive_start2 ~dbg ~sr ~vdi_info ~id ~similar ~vm = - receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm - - let receive_finalize ~dbg ~id = - let recv_state = State.find_active_receive_mirror id in - let open State.Receive_state in - Option.iter (fun r -> Local.DP.destroy dbg r.leaf_dp false) recv_state ; - State.remove_receive_mirror id - - let receive_finalize2 ~dbg ~id = - let recv_state = State.find_active_receive_mirror id in - let open State.Receive_state in - Option.iter - (fun r -> - SXM.info - "%s Mirror done. Compose on the dest sr %s parent %s and leaf %s" - __FUNCTION__ (Sr.string_of r.sr) - (Vdi.string_of r.parent_vdi) - (Vdi.string_of r.leaf_vdi) ; - Local.DP.destroy2 dbg r.leaf_dp r.sr r.leaf_vdi r.mirror_vm false ; - Local.VDI.compose dbg r.sr r.parent_vdi r.leaf_vdi ; - (* On SMAPIv3, compose would have removed the now invalid dummy vdi, so - there is no need to destroy it anymore, while this is necessary on SMAPIv1 SRs. *) - log_and_ignore_exn (fun () -> Local.VDI.destroy dbg r.sr r.dummy_vdi) ; - Local.VDI.remove_from_sm_config dbg r.sr r.leaf_vdi "base_mirror" - ) - recv_state ; - State.remove_receive_mirror id - - let receive_cancel ~dbg ~id = - let receive_state = State.find_active_receive_mirror id in - let open State.Receive_state in - Option.iter - (fun r -> - log_and_ignore_exn (fun () -> Local.DP.destroy dbg r.leaf_dp false) ; - List.iter - (fun v -> log_and_ignore_exn (fun () -> Local.VDI.destroy dbg r.sr v)) - [r.dummy_vdi; r.leaf_vdi; r.parent_vdi] - ) - receive_state ; - State.remove_receive_mirror id -end - -exception Timeout of Mtime.Span.t - -let reqs_outstanding_timeout = Mtime.Span.(150 * s) - -let pp_time () = Fmt.str "%a" Mtime.Span.pp - -(* Tapdisk should time out after 2 mins. We can wait a little longer *) - -let pre_deactivate_hook ~dbg:_ ~dp:_ ~sr ~vdi = - let open State.Send_state in - let id = State.mirror_id_of (sr, vdi) in - let start = Mtime_clock.counter () in - State.find_active_local_mirror id - |> Option.iter (fun s -> - (* We used to pause here and then check the nbd_mirror_failed key. Now, we poll - until the number of outstanding requests has gone to zero, then check the - status. This avoids confusing the backend (CA-128460) *) - try - match s.tapdev with - | None -> - () - | Some tapdev -> - let open Tapctl in - let ctx = create () in - let rec wait () = - let elapsed = Mtime_clock.count start in - if Mtime.Span.compare elapsed reqs_outstanding_timeout > 0 then - raise (Timeout elapsed) ; - let st = stats ctx tapdev in - if st.Stats.reqs_outstanding > 0 then ( - Thread.delay 1.0 ; wait () - ) else - (st, elapsed) - in - let st, elapsed = wait () in - debug "Got final stats after waiting %a" pp_time elapsed ; - if st.Stats.nbd_mirror_failed = 1 then ( - error "tapdisk reports mirroring failed" ; - s.failed <- true - ) - with - | Timeout elapsed -> - error - "Timeout out after %a waiting for tapdisk to complete all \ - outstanding requests" - pp_time elapsed ; - s.failed <- true - | e -> - error "Caught exception while finally checking mirror state: %s" - (Printexc.to_string e) ; - s.failed <- true - ) +let pre_deactivate_hook ~dbg ~dp ~sr ~vdi = + let (module Migrate_Backend) = choose_backend dbg sr in + Migrate_Backend.pre_deactivate_hook () ~dbg ~dp ~sr ~vdi let post_deactivate_hook ~sr ~vdi ~dp:_ = let open State.Send_state in @@ -1330,22 +323,15 @@ let post_deactivate_hook ~sr ~vdi ~dp:_ = ~some:(fun ri -> ri.verify_dest) r.remote_info in - let remote_url = - Storage_utils.connection_args_of_uri ~verify_dest r.url - in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" - remote_url - end)) in - debug "Calling receive_finalize2" ; + let (module Remote) = get_remote_backend r.url verify_dest in + debug "Calling receive_finalize3" ; log_and_ignore_exn (fun () -> - Remote.DATA.MIRROR.receive_finalize2 "Mirror-cleanup" id + MigrateRemote.receive_finalize3 ~dbg:"Mirror-cleanup" ~mirror_id:id + ~sr ~url:r.url ~verify_dest ) ; - debug "Finished calling receive_finalize2" ; + debug "Finished calling receive_finalize3" ; State.remove_local_mirror id ; - debug "Removed active local mirror: %s" id ; - Option.iter (fun id -> Scheduler.cancel scheduler id) r.watchdog + debug "Removed active local mirror: %s" id ) let nbd_handler req s ?(vm = "0") sr vdi dp = @@ -1355,7 +341,7 @@ let nbd_handler req s ?(vm = "0") sr vdi dp = let vm = Vm.of_string vm in let path = Storage_utils.transform_storage_exn (fun () -> - Local.DATA.MIRROR.import_activate "nbd" dp sr vdi vm + Local.DATA.import_activate "nbd" dp sr vdi vm ) in Http_svr.headers s (Http.http_200_ok () @ ["Transfer-encoding: nbd"]) ; @@ -1378,14 +364,14 @@ let nbd_handler req s ?(vm = "0") sr vdi dp = (** nbd_proxy is a http handler but will turn the http connection into an nbd connection. It proxies the connection between the sender and the generic nbd server, as returned by [get_nbd_server dp sr vdi vm]. *) -let nbd_proxy req s vm sr vdi dp = +let import_nbd_proxy req s vm sr vdi dp = debug "%s: vm=%s sr=%s vdi=%s dp=%s" __FUNCTION__ vm sr vdi dp ; let sr, vdi = Storage_interface.(Sr.of_string sr, Vdi.of_string vdi) in req.Http.Request.close <- true ; let vm = Vm.of_string vm in let path = Storage_utils.transform_storage_exn (fun () -> - Local.DATA.MIRROR.get_nbd_server "nbd" dp sr vdi vm + Local.DATA.get_nbd_server "nbd" dp sr vdi vm ) in debug "%s got nbd server path %s" __FUNCTION__ path ; @@ -1403,6 +389,9 @@ let nbd_proxy req s vm sr vdi dp = ) (fun () -> Unix.close control_fd) +let with_dbg ~name ~dbg f = + Debug_info.with_dbg ~with_thread:true ~module_name:__MODULE__ ~name ~dbg f + let with_task_and_thread ~dbg f = let task = Storage_task.add tasks dbg.Debug_info.log (fun task -> @@ -1434,52 +423,38 @@ let with_task_and_thread ~dbg f = let copy ~dbg ~sr ~vdi ~vm ~url ~dest ~verify_dest = with_task_and_thread ~dbg (fun task -> - MigrateLocal.copy_into_sr ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~vm ~url - ~dest ~verify_dest + Storage_smapiv1_migrate.Copy.copy_into_sr ~task ~dbg:dbg.Debug_info.log + ~sr ~vdi ~vm ~url ~dest ~verify_dest ) -let start ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest ~verify_dest = +let start ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~live_vm ~url ~dest ~verify_dest + = + with_dbg ~name:__FUNCTION__ ~dbg @@ fun dbg -> with_task_and_thread ~dbg (fun task -> - MigrateLocal.start ~task ~dbg:dbg.Debug_info.log ~sr ~vdi ~dp ~mirror_vm - ~copy_vm ~url ~dest ~verify_dest + MigrateLocal.start + ~task_id:(Storage_task.id_of_handle task) + ~dbg:dbg.Debug_info.log ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~live_vm ~url + ~dest ~verify_dest ) (* XXX: PR-1255: copy the xenopsd 'raise Exception' pattern *) -let stop ~dbg ~id = - try MigrateLocal.stop ~dbg ~id with - | Storage_error (Backend_error (code, params)) - | Api_errors.Server_error (code, params) -> - raise (Storage_error (Backend_error (code, params))) - | e -> - raise e +let stop = MigrateLocal.stop let list = MigrateLocal.list -let killall = MigrateLocal.killall +let killall ~dbg = + with_dbg ~name:__FUNCTION__ ~dbg @@ fun di -> + MigrateLocal.killall ~dbg:(Debug_info.to_string di) let stat = MigrateLocal.stat -let receive_start = MigrateRemote.receive_start - -let receive_start2 = MigrateRemote.receive_start2 - -let receive_finalize = MigrateRemote.receive_finalize - -let receive_finalize2 = MigrateRemote.receive_finalize2 - -let receive_cancel = MigrateRemote.receive_cancel - (* The remote end of this call, SR.update_snapshot_info_dest, is implemented in * the SMAPIv1 section of storage_migrate.ml. It needs to access the setters * for snapshot_of, snapshot_time and is_a_snapshot, which we don't want to add * to SMAPI. *) let update_snapshot_info_src ~dbg ~sr ~vdi ~url ~dest ~dest_vdi ~snapshot_pairs ~verify_dest = - let remote_url = Storage_utils.connection_args_of_uri ~verify_dest url in - let module Remote = StorageAPI (Idl.Exn.GenClient (struct - let rpc = - Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url - end)) in + let (module Remote) = get_remote_backend url verify_dest in let local_vdis = Local.SR.scan dbg sr in let find_vdi ~vdi ~vdi_info_list = try List.find (fun x -> x.vdi = vdi) vdi_info_list diff --git a/ocaml/xapi/storage_migrate_helper.ml b/ocaml/xapi/storage_migrate_helper.ml new file mode 100644 index 00000000000..f4c5d46c39c --- /dev/null +++ b/ocaml/xapi/storage_migrate_helper.ml @@ -0,0 +1,390 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** As SXM is such a long running process, we dedicate this to log important + milestones during the SXM process *) +module SXM = Debug.Make (struct + let name = "SXM" +end) + +module Listext = Xapi_stdext_std.Listext +module Unixext = Xapi_stdext_unix.Unixext +open Storage_interface +open Xapi_stdext_pervasives.Pervasiveext +open Xmlrpc_client + +let failwith_fmt fmt = Printf.ksprintf failwith fmt + +module State = struct + module Receive_state = struct + type t = { + sr: Sr.t + ; dummy_vdi: Vdi.t + ; leaf_vdi: Vdi.t + ; leaf_dp: dp + ; parent_vdi: Vdi.t + ; remote_vdi: Vdi.t + ; mirror_vm: Vm.t + ; url: string [@default ""] + ; verify_dest: bool [@default false] + } + [@@deriving rpcty] + + let rpc_of_t = Rpcmarshal.marshal t.Rpc.Types.ty + + let t_of_rpc x = + match Rpcmarshal.unmarshal t.Rpc.Types.ty x with + | Ok y -> + y + | Error (`Msg m) -> + failwith_fmt "Failed to unmarshal Receive_state.t: %s" m + end + + module Send_state = struct + type remote_info = { + dp: dp + ; vdi: Vdi.t + ; url: string + ; verify_dest: bool [@default false] + } + [@@deriving rpcty] + + type tapdev = Tapctl.tapdev + + let typ_of_tapdev = + Rpc.Types.( + Abstract + { + aname= "tapdev" + ; test_data= [] + ; rpc_of= Tapctl.rpc_of_tapdev + ; of_rpc= (fun x -> Ok (Tapctl.tapdev_of_rpc x)) + } + ) + + type handle = Scheduler.handle + + let typ_of_handle = + Rpc.Types.( + Abstract + { + aname= "handle" + ; test_data= [] + ; rpc_of= Scheduler.rpc_of_handle + ; of_rpc= (fun x -> Ok (Scheduler.handle_of_rpc x)) + } + ) + + type t = { + url: string + ; dest_sr: Sr.t + ; remote_info: remote_info option + ; local_dp: dp + ; tapdev: tapdev option + ; mutable failed: bool + ; mutable watchdog: handle option + ; vdi: Vdi.t [@default Vdi.of_string ""] (* source vdi *) + ; live_vm: Vm.t + [@default Vm.of_string "0"] + (* vm to which the source vdi is attached *) + ; mirror_key: Mirror.operation option [@default None] + } + [@@deriving rpcty] + + let rpc_of_t = Rpcmarshal.marshal t.Rpc.Types.ty + + let t_of_rpc x = + match Rpcmarshal.unmarshal t.Rpc.Types.ty x with + | Ok y -> + y + | Error (`Msg m) -> + failwith_fmt "Failed to unmarshal Send_state.t: %s" m + end + + module Copy_state = struct + type t = { + base_dp: dp + ; leaf_dp: dp + ; remote_dp: dp + ; dest_sr: Sr.t + ; copy_vdi: Vdi.t + ; remote_url: string + ; verify_dest: bool [@default false] + } + [@@deriving rpcty] + + let rpc_of_t = Rpcmarshal.marshal t.Rpc.Types.ty + + let t_of_rpc x = + match Rpcmarshal.unmarshal t.Rpc.Types.ty x with + | Ok y -> + y + | Error (`Msg m) -> + failwith_fmt "Failed to unmarshal Copy_state.t: %s" m + end + + let loaded = ref false + + let mutex = Mutex.create () + + type send_table = (string, Send_state.t) Hashtbl.t + + type recv_table = (string, Receive_state.t) Hashtbl.t + + type copy_table = (string, Copy_state.t) Hashtbl.t + + type osend + + type orecv + + type ocopy + + type _ operation = + | Send_op : Send_state.t -> osend operation + | Recv_op : Receive_state.t -> orecv operation + | Copy_op : Copy_state.t -> ocopy operation + + type _ table = + | Send_table : send_table -> osend table + | Recv_table : recv_table -> orecv table + | Copy_table : copy_table -> ocopy table + + let active_send : send_table = Hashtbl.create 10 + + let active_recv : recv_table = Hashtbl.create 10 + + let active_copy : copy_table = Hashtbl.create 10 + + let table_of_op : type a. a operation -> a table = function + | Send_op _ -> + Send_table active_send + | Recv_op _ -> + Recv_table active_recv + | Copy_op _ -> + Copy_table active_copy + + let persist_root = ref "/var/run/nonpersistent" + + let path_of_table : type a. a table -> string = function + | Send_table _ -> + Filename.concat !persist_root "storage_mirrors_send.json" + | Recv_table _ -> + Filename.concat !persist_root "storage_mirrors_recv.json" + | Copy_table _ -> + Filename.concat !persist_root "storage_mirrors_copy.json" + + let rpc_of_table : type a. a table -> Rpc.t = + let open Rpc_std_helpers in + function + | Send_table send_table -> + rpc_of_hashtbl ~rpc_of:Send_state.rpc_of_t send_table + | Recv_table recv_table -> + rpc_of_hashtbl ~rpc_of:Receive_state.rpc_of_t recv_table + | Copy_table copy_table -> + rpc_of_hashtbl ~rpc_of:Copy_state.rpc_of_t copy_table + + let to_string : type a. a table -> string = + fun table -> rpc_of_table table |> Jsonrpc.to_string + + let rpc_of_path path = Unixext.string_of_file path |> Jsonrpc.of_string + + let load_one : type a. a table -> unit = + fun table -> + let rpc = path_of_table table |> rpc_of_path in + let open Rpc_std_helpers in + match table with + | Send_table table -> + Hashtbl.iter (Hashtbl.replace table) + (hashtbl_of_rpc ~of_rpc:Send_state.t_of_rpc rpc) + | Recv_table table -> + Hashtbl.iter (Hashtbl.replace table) + (hashtbl_of_rpc ~of_rpc:Receive_state.t_of_rpc rpc) + | Copy_table table -> + Hashtbl.iter (Hashtbl.replace table) + (hashtbl_of_rpc ~of_rpc:Copy_state.t_of_rpc rpc) + + let save_one : type a. a table -> unit = + fun table -> + to_string table |> Unixext.write_string_to_file (path_of_table table) + + let save () = + Unixext.mkdir_rec !persist_root 0o700 ; + save_one (Send_table active_send) ; + save_one (Recv_table active_recv) ; + save_one (Copy_table active_copy) + + let access_table ~save_after f table = + let load () = + ignore_exn (fun () -> load_one (Send_table active_send)) ; + ignore_exn (fun () -> load_one (Recv_table active_recv)) ; + ignore_exn (fun () -> load_one (Copy_table active_copy)) ; + loaded := true + in + Xapi_stdext_threads.Threadext.Mutex.execute mutex (fun () -> + if not !loaded then load () ; + let result = f table in + if save_after then save () ; + result + ) + + let map_of () = + let contents_of table = + Hashtbl.fold (fun k v acc -> (k, v) :: acc) table [] + in + let send_ops = access_table ~save_after:false contents_of active_send in + let recv_ops = access_table ~save_after:false contents_of active_recv in + let copy_ops = access_table ~save_after:false contents_of active_copy in + (send_ops, recv_ops, copy_ops) + + let add : type a. string -> a operation -> unit = + fun id op -> + let add' : type a. string -> a operation -> a table -> unit = + fun id op table -> + match (table, op) with + | Send_table table, Send_op op -> + Hashtbl.replace table id op + | Recv_table table, Recv_op op -> + Hashtbl.replace table id op + | Copy_table table, Copy_op op -> + Hashtbl.replace table id op + in + access_table ~save_after:true + (fun table -> add' id op table) + (table_of_op op) + + let find id table = + access_table ~save_after:false + (fun table -> Hashtbl.find_opt table id) + table + + let remove id table = + access_table ~save_after:true (fun table -> Hashtbl.remove table id) table + + let clear () = + let clear_one (type a) (tbl : (string, a) Hashtbl.t) : unit = + access_table ~save_after:true Hashtbl.clear tbl + in + clear_one active_send ; clear_one active_recv ; clear_one active_copy + + let remove_local_mirror id = remove id active_send + + let remove_receive_mirror id = remove id active_recv + + let remove_copy id = remove id active_copy + + let find_active_local_mirror id = find id active_send + + let find_active_receive_mirror id = find id active_recv + + let find_active_copy id = find id active_copy + + let mirror_id_of (sr, vdi) = + Printf.sprintf "%s/%s" + (Storage_interface.Sr.string_of sr) + (Storage_interface.Vdi.string_of vdi) + + let of_mirror_id id = + match String.split_on_char '/' id with + | sr :: rest -> + Storage_interface. + (Sr.of_string sr, Vdi.of_string (String.concat "/" rest)) + | _ -> + failwith "Bad id" + + let copy_id_of (sr, vdi) = + Printf.sprintf "copy/%s/%s" + (Storage_interface.Sr.string_of sr) + (Storage_interface.Vdi.string_of vdi) + + let of_copy_id id = + match String.split_on_char '/' id with + | op :: sr :: rest when op = "copy" -> + Storage_interface. + (Sr.of_string sr, Vdi.of_string (String.concat "/" rest)) + | _ -> + failwith "Bad id" +end + +let vdi_info = function + | Some (Vdi_info v) -> + v + | _ -> + failwith "Runtime type error: expecting Vdi_info" + +let remove_from_sm_config vdi_info key = + { + vdi_info with + sm_config= List.filter (fun (k, _) -> k <> key) vdi_info.sm_config + } + +let add_to_sm_config vdi_info key value = + let vdi_info = remove_from_sm_config vdi_info key in + {vdi_info with sm_config= (key, value) :: vdi_info.sm_config} + +let with_http request f s = + try Http_client.rpc s request (fun response s -> f (response, s)) + with Unix.Unix_error (Unix.ECONNRESET, _, _) -> raise Connection_reset + +module Local = StorageAPI (Idl.Exn.GenClient (struct + let rpc call = + Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"smapiv2" + (Storage_utils.localhost_connection_args ()) + call +end)) + +module type SMAPIv2 = module type of Local + +let get_remote_backend url verify_dest = + let remote_url = Storage_utils.connection_args_of_uri ~verify_dest url in + let module Remote = StorageAPI (Idl.Exn.GenClient (struct + let rpc = + Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" remote_url + end)) in + (module Remote : SMAPIv2) + +let find_vdi ~dbg ~sr ~vdi (module SMAPIv2 : SMAPIv2) = + let vdis, _ = SMAPIv2.SR.scan2 dbg sr in + match List.find_opt (fun x -> x.vdi = vdi) vdis with + | None -> + failwith_fmt "VDI %s not found" (Storage_interface.Vdi.string_of vdi) + | Some v -> + (v, vdis) + +(** [similar_vdis dbg sr vdi] returns a list of content_ids of vdis + which are similar to the input [vdi] in [sr] *) +let similar_vdis ~dbg ~sr ~vdi = + let similar_vdis = Local.VDI.similar_content dbg sr vdi in + let similars = + List.filter_map + (function + | {content_id; _} when content_id = "" -> + None + | {content_id; _} -> + Some content_id + ) + similar_vdis + in + + D.debug "%s Similar VDIs to = [ %s ]" __FUNCTION__ + (String.concat "; " + (List.map + (fun x -> + Printf.sprintf "(vdi=%s,content_id=%s)" + (Storage_interface.Vdi.string_of x.vdi) + x.content_id + ) + similar_vdis + ) + ) ; + similars diff --git a/ocaml/xapi/storage_migrate_helper.mli b/ocaml/xapi/storage_migrate_helper.mli new file mode 100644 index 00000000000..0f3a6ee8e11 --- /dev/null +++ b/ocaml/xapi/storage_migrate_helper.mli @@ -0,0 +1,272 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module SXM : Debug.DEBUG + +open Storage_interface + +val failwith_fmt : ('a, unit, string, 'b) format4 -> 'a + +module State : sig + module Receive_state : sig + type t = { + sr: Storage_interface.sr + ; dummy_vdi: Storage_interface.vdi + ; leaf_vdi: Storage_interface.vdi + ; leaf_dp: string + ; parent_vdi: Storage_interface.vdi + ; remote_vdi: Storage_interface.vdi + ; mirror_vm: Storage_interface.vm + ; url: string + ; verify_dest: bool + } + + val t_sr : (Storage_interface.sr, t) Rpc.Types.field + + val t_dummy_vdi : (Storage_interface.vdi, t) Rpc.Types.field + + val t_leaf_vdi : (Storage_interface.vdi, t) Rpc.Types.field + + val t_leaf_dp : (string, t) Rpc.Types.field + + val t_parent_vdi : (Storage_interface.vdi, t) Rpc.Types.field + + val t_remote_vdi : (Storage_interface.vdi, t) Rpc.Types.field + + val t_mirror_vm : (Storage_interface.vm, t) Rpc.Types.field + + val typ_of : t Rpc.Types.typ + + val t : t Rpc.Types.def + + val rpc_of_t : t -> Rpc.t + + val t_of_rpc : Rpc.t -> t + end + + module Send_state : sig + type remote_info = { + dp: string + ; vdi: Storage_interface.vdi + ; url: string + ; verify_dest: bool + } + + val remote_info_dp : (string, remote_info) Rpc.Types.field + + val remote_info_vdi : (Storage_interface.vdi, remote_info) Rpc.Types.field + + val remote_info_url : (string, remote_info) Rpc.Types.field + + val remote_info_verify_dest : (bool, remote_info) Rpc.Types.field + + val typ_of_remote_info : remote_info Rpc.Types.typ + + val remote_info : remote_info Rpc.Types.def + + type tapdev = Tapctl.tapdev + + val typ_of_tapdev : Tapctl.tapdev Rpc.Types.typ + + type handle = Scheduler.handle + + val typ_of_handle : Scheduler.handle Rpc.Types.typ + + type t = { + url: string + ; dest_sr: Storage_interface.sr + ; remote_info: remote_info option + ; local_dp: string + ; tapdev: tapdev option + ; mutable failed: bool + ; mutable watchdog: handle option + ; vdi: Vdi.t [@default Vdi.of_string ""] + ; live_vm: Vm.t [@default Vm.of_string "0"] + ; mirror_key: Mirror.operation option [@default None] + } + + val t_url : (string, t) Rpc.Types.field + + val t_dest_sr : (Storage_interface.sr, t) Rpc.Types.field + + val t_remote_info : (remote_info option, t) Rpc.Types.field + + val t_local_dp : (string, t) Rpc.Types.field + + val t_tapdev : (tapdev option, t) Rpc.Types.field + + val t_failed : (bool, t) Rpc.Types.field + + val t_watchdog : (handle option, t) Rpc.Types.field + + val typ_of : t Rpc.Types.typ + + val t : t Rpc.Types.def + + val rpc_of_t : t -> Rpc.t + + val t_of_rpc : Rpc.t -> t + end + + module Copy_state : sig + type t = { + base_dp: string + ; leaf_dp: string + ; remote_dp: string + ; dest_sr: Storage_interface.sr + ; copy_vdi: Storage_interface.vdi + ; remote_url: string + ; verify_dest: bool + } + + val t_base_dp : (string, t) Rpc.Types.field + + val t_leaf_dp : (string, t) Rpc.Types.field + + val t_remote_dp : (string, t) Rpc.Types.field + + val t_dest_sr : (Storage_interface.sr, t) Rpc.Types.field + + val t_copy_vdi : (Storage_interface.vdi, t) Rpc.Types.field + + val t_remote_url : (string, t) Rpc.Types.field + + val t_verify_dest : (bool, t) Rpc.Types.field + + val typ_of : t Rpc.Types.typ + + val t : t Rpc.Types.def + + val rpc_of_t : t -> Rpc.t + + val t_of_rpc : Rpc.t -> t + end + + val loaded : bool ref + + val mutex : Mutex.t + + type send_table = (string, Send_state.t) Hashtbl.t + + type recv_table = (string, Receive_state.t) Hashtbl.t + + type copy_table = (string, Copy_state.t) Hashtbl.t + + type osend + + type orecv + + type ocopy + + type _ operation = + | Send_op : Send_state.t -> osend operation + | Recv_op : Receive_state.t -> orecv operation + | Copy_op : Copy_state.t -> ocopy operation + + type _ table = + | Send_table : send_table -> osend table + | Recv_table : recv_table -> orecv table + | Copy_table : copy_table -> ocopy table + + val active_send : send_table + + val active_recv : recv_table + + val active_copy : copy_table + + val table_of_op : 'a operation -> 'a table + + val persist_root : string ref + + val path_of_table : 'a table -> string + + val rpc_of_table : 'a table -> Rpc.t + + val to_string : 'a table -> string + + val rpc_of_path : string -> Rpc.t + + val save_one : 'a table -> unit + + val save : unit -> unit + + val access_table : save_after:bool -> ('a -> 'b) -> 'a -> 'b + + val map_of : + unit + -> (string * Send_state.t) list + * (string * Receive_state.t) list + * (string * Copy_state.t) list + + val add : string -> 'a operation -> unit + + val find : 'a -> ('a, 'b) Hashtbl.t -> 'b option + + val remove : 'a -> ('a, 'b) Hashtbl.t -> unit + + val clear : unit -> unit + + val remove_local_mirror : string -> unit + + val remove_receive_mirror : string -> unit + + val remove_copy : string -> unit + + val find_active_local_mirror : string -> Send_state.t option + + val find_active_receive_mirror : string -> Receive_state.t option + + val find_active_copy : string -> Copy_state.t option + + val mirror_id_of : Storage_interface.sr * Storage_interface.vdi -> string + + val of_mirror_id : string -> Storage_interface.sr * Storage_interface.vdi + + val copy_id_of : Storage_interface.sr * Storage_interface.vdi -> string + + val of_copy_id : string -> Storage_interface.sr * Storage_interface.vdi +end + +val vdi_info : + Storage_interface.async_result_t option -> Storage_interface.vdi_info + +val remove_from_sm_config : + Storage_interface.vdi_info -> string -> Storage_interface.vdi_info + +val add_to_sm_config : + Storage_interface.vdi_info -> string -> string -> Storage_interface.vdi_info + +val with_http : + Http.Request.t + -> (Http.Response.t * Unix.file_descr -> 'a) + -> Unix.file_descr + -> 'a + +module type SMAPIv2 = sig + include module type of Storage_interface.StorageAPI (Idl.Exn.GenClient (struct + let rpc call = + Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"smapiv2" + (Storage_utils.localhost_connection_args ()) + call + end)) +end + +module Local : SMAPIv2 + +val get_remote_backend : string -> bool -> (module SMAPIv2) + +val find_vdi : + dbg:string -> sr:sr -> vdi:vdi -> (module SMAPIv2) -> vdi_info * vdi_info list + +val similar_vdis : dbg:string -> sr:sr -> vdi:vdi -> uuid list diff --git a/ocaml/xapi/storage_mux.ml b/ocaml/xapi/storage_mux.ml index 7acba0c8823..0427f76ca54 100644 --- a/ocaml/xapi/storage_mux.ml +++ b/ocaml/xapi/storage_mux.ml @@ -17,113 +17,20 @@ module Unixext = Xapi_stdext_unix.Unixext module D = Debug.Make (struct let name = "mux" end) open D - -let with_dbg ~name ~dbg f = - Debug_info.with_dbg ~with_thread:true ~module_name:"SMAPIv2" ~name ~dbg f - -type processor = Rpc.call -> Rpc.response - -let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute - open Storage_interface +open Storage_mux_reg +open Storage_utils -let s_of_sr = Sr.string_of - -let s_of_vdi = Vdi.string_of - -let s_of_vm = Vm.string_of - -type plugin = { - processor: processor - ; backend_domain: string - ; query_result: query_result - ; features: Smint.Feature.t list -} - -let plugins : (sr, plugin) Hashtbl.t = Hashtbl.create 10 - -let m = Mutex.create () - -let debug_printer rpc call = - (* debug "Rpc.call = %s" (Xmlrpc.string_of_call call); *) - let result = rpc call in - (* debug "Rpc.response = %s" (Xmlrpc.string_of_response result); *) - result - -let register sr rpc d info = - with_lock m (fun () -> - let features = - Smint.Feature.parse_capability_int64 info.Storage_interface.features - in - Hashtbl.replace plugins sr - { - processor= debug_printer rpc - ; backend_domain= d - ; query_result= info - ; features - } ; - debug "register SR %s (currently-registered = [ %s ])" (s_of_sr sr) - (String.concat ", " - (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) - ) - ) - -let unregister sr = - with_lock m (fun () -> - Hashtbl.remove plugins sr ; - debug "unregister SR %s (currently-registered = [ %s ])" (s_of_sr sr) - (String.concat ", " - (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) - ) - ) - -(* This function is entirely unused, but I am not sure if it should be - deleted or not *) -let query_result_of_sr sr = - with_lock m (fun () -> - Option.map (fun x -> x.query_result) (Hashtbl.find_opt plugins sr) - ) - -let sr_has_capability sr capability = - with_lock m (fun () -> - match Hashtbl.find_opt plugins sr with - | Some x -> - Smint.Feature.has_capability capability x.features - | None -> - false - ) - -(* This is the policy: *) -let of_sr sr = - with_lock m (fun () -> - match Hashtbl.find_opt plugins sr with - | Some x -> - x.processor - | None -> - error "No storage plugin for SR: %s (currently-registered = [ %s ])" - (s_of_sr sr) - (String.concat ", " - (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) - ) ; - raise (Storage_error (No_storage_plugin_for_sr (s_of_sr sr))) - ) +let s_of_sr = Storage_interface.Sr.string_of -type 'a sm_result = SMSuccess of 'a | SMFailure of exn +let s_of_vdi = Storage_interface.Vdi.string_of -let multicast f = - Hashtbl.fold - (fun sr plugin acc -> - (sr, try SMSuccess (f sr plugin.processor) with e -> SMFailure e) :: acc - ) - plugins [] +let s_of_vm = Storage_interface.Vm.string_of -let success = function SMSuccess _ -> true | _ -> false +let s_of_operation = Storage_interface.Mirror.show_operation -let string_of_sm_result f = function - | SMSuccess x -> - Printf.sprintf "Success: %s" (f x) - | SMFailure e -> - Printf.sprintf "Failure: %s" (Printexc.to_string e) +let with_dbg ~name ~dbg f = + Debug_info.with_dbg ~with_thread:true ~module_name:"SMAPIv2" ~name ~dbg f let partition l = List.partition (fun (_, x) -> success x) l @@ -146,7 +53,7 @@ module Mux = struct List.fold_left (fun acc (sr, result) -> Printf.sprintf "For SR: %s" (s_of_sr sr) - :: string_of_sm_result (fun s -> s) result + :: s_of_sm_result (fun s -> s) result :: acc ) [] results @@ -169,6 +76,7 @@ module Mux = struct ; features= [] ; configuration= [] ; required_cluster_stack= [] + ; smapi_version= SMAPIv2 } let diagnostics () ~dbg = @@ -425,11 +333,32 @@ module Mux = struct Storage_migrate.update_snapshot_info_src ~dbg:(Debug_info.to_string di) ~sr ~vdi ~url ~dest ~dest_vdi ~snapshot_pairs + let set_is_a_snapshot _context ~dbg ~sr ~vdi ~is_a_snapshot = + Server_helpers.exec_with_new_task "VDI.set_is_a_snapshot" + ~subtask_of:(Ref.of_string dbg) (fun __context -> + let vdi, _ = find_vdi ~__context sr vdi in + Db.VDI.set_is_a_snapshot ~__context ~self:vdi ~value:is_a_snapshot + ) + + let set_snapshot_time _context ~dbg ~sr ~vdi ~snapshot_time = + let module Date = Clock.Date in + Server_helpers.exec_with_new_task "VDI.set_snapshot_time" + ~subtask_of:(Ref.of_string dbg) (fun __context -> + let vdi, _ = find_vdi ~__context sr vdi in + let snapshot_time = Date.of_iso8601 snapshot_time in + Db.VDI.set_snapshot_time ~__context ~self:vdi ~value:snapshot_time + ) + + let set_snapshot_of _context ~dbg ~sr ~vdi ~snapshot_of = + Server_helpers.exec_with_new_task "VDI.set_snapshot_of" + ~subtask_of:(Ref.of_string dbg) (fun __context -> + let vdi, _ = find_vdi ~__context sr vdi in + let snapshot_of, _ = find_vdi ~__context sr snapshot_of in + Db.VDI.set_snapshot_of ~__context ~self:vdi ~value:snapshot_of + ) + let update_snapshot_info_dest () ~dbg ~sr ~vdi ~src_vdi ~snapshot_pairs = - with_dbg ~name:"SR.update_snapshot_info_dest" ~dbg @@ fun di -> - let module C = StorageAPI (Idl.Exn.GenClient (struct - let rpc = of_sr sr - end)) in + with_dbg ~name:"SR.update_snapshot_info_dest" ~dbg @@ fun _di -> info "SR.update_snapshot_info_dest dbg:%s sr:%s vdi:%s ~src_vdi:%s \ snapshot_pairs:%s" @@ -443,8 +372,44 @@ module Mux = struct |> String.concat "; " |> Printf.sprintf "[%s]" ) ; - C.SR.update_snapshot_info_dest (Debug_info.to_string di) sr vdi src_vdi - snapshot_pairs + Server_helpers.exec_with_new_task "SR.update_snapshot_info_dest" + ~subtask_of:(Ref.of_string dbg) (fun __context -> + let local_vdis, _ = scan2 () ~dbg ~sr in + let find_sm_vdi ~vdi ~vdi_info_list = + try List.find (fun x -> x.vdi = vdi) vdi_info_list + with Not_found -> + raise (Storage_error (Vdi_does_not_exist (s_of_vdi vdi))) + in + let assert_content_ids_match ~vdi_info1 ~vdi_info2 = + if vdi_info1.content_id <> vdi_info2.content_id then + raise + (Storage_error + (Content_ids_do_not_match + (s_of_vdi vdi_info1.vdi, s_of_vdi vdi_info2.vdi) + ) + ) + in + (* For each (local snapshot vdi, source snapshot vdi) pair: + * - Check that the content_ids are the same + * - Copy snapshot_time from the source VDI to the local VDI + * - Set the local VDI's snapshot_of to vdi + * - Set is_a_snapshot = true for the local snapshot *) + List.iter + (fun (local_snapshot, src_snapshot_info) -> + let local_snapshot_info = + find_sm_vdi ~vdi:local_snapshot ~vdi_info_list:local_vdis + in + assert_content_ids_match ~vdi_info1:local_snapshot_info + ~vdi_info2:src_snapshot_info ; + set_snapshot_time __context ~dbg ~sr ~vdi:local_snapshot + ~snapshot_time:src_snapshot_info.snapshot_time ; + set_snapshot_of __context ~dbg ~sr ~vdi:local_snapshot + ~snapshot_of:vdi ; + set_is_a_snapshot __context ~dbg ~sr ~vdi:local_snapshot + ~is_a_snapshot:true + ) + snapshot_pairs + ) end module VDI = struct @@ -679,10 +644,23 @@ module Mux = struct with_dbg ~name:"VDI.deativate" ~dbg @@ fun di -> info "VDI.deactivate dbg:%s dp:%s sr:%s vdi:%s vm:%s" dbg dp (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) ; - let module C = StorageAPI (Idl.Exn.GenClient (struct - let rpc = of_sr sr - end)) in - C.VDI.deactivate (Debug_info.to_string di) dp sr vdi vm + let open DP_info in + match read dp with + | Some {sr; vdi; vm; _} -> + let module C = StorageAPI (Idl.Exn.GenClient (struct + let rpc = of_sr sr + end)) in + C.VDI.deactivate (Debug_info.to_string di) dp sr vdi vm ; + (*XX The hook should not be called here, nor should storage_mux care about + the SMAPI version of the SR, but as xapi-storage-script cannot call code + xapi, and smapiv1_wrapper has state tracking logic, the hook has to be placed + here for now. *) + if smapi_version_of_sr sr = SMAPIv3 then + Storage_migrate.post_deactivate_hook ~sr ~vdi ~dp + | None -> + info + "dp %s is not associated with a locally attached VDI; nothing to do" + dp let detach () ~dbg ~dp ~sr ~vdi ~vm = with_dbg ~name:"VDI.detach" ~dbg @@ fun di -> @@ -834,44 +812,65 @@ module Mux = struct let copy () ~dbg = with_dbg ~name:"DATA.copy" ~dbg @@ fun dbg -> Storage_migrate.copy ~dbg - module MIRROR = struct - let start () ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest ~verify_dest - = - with_dbg ~name:"DATA.MIRROR.start" ~dbg @@ fun di -> - info - "%s dbg:%s sr: %s vdi: %s dp:%s mirror_vm: %s copy_vm: %s url: %s \ - dest sr: %s verify_dest: %B" - __FUNCTION__ dbg (s_of_sr sr) (s_of_vdi vdi) dp (s_of_vm mirror_vm) - (s_of_vm copy_vm) url (s_of_sr dest) verify_dest ; - Storage_migrate.start ~dbg:di ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url - ~dest ~verify_dest - - let stop () ~dbg ~id = - with_dbg ~name:"DATA.MIRROR.stop" ~dbg @@ fun di -> - info "%s dbg:%s mirror_id: %s" __FUNCTION__ dbg id ; - Storage_migrate.stop ~dbg:di.log ~id + let mirror () ~dbg ~sr ~vdi ~vm ~dest = + with_dbg ~name:"DATA.mirror" ~dbg @@ fun di -> + info "%s dbg:%s sr: %s vdi: %s vm:%s remote:%s" __FUNCTION__ dbg + (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) dest ; + let module C = StorageAPI (Idl.Exn.GenClient (struct + let rpc = of_sr sr + end)) in + C.DATA.mirror (Debug_info.to_string di) sr vdi vm dest - let list () ~dbg = - with_dbg ~name:"DATA.MIRROR.list" ~dbg @@ fun di -> - info "%s dbg: %s" __FUNCTION__ dbg ; - Storage_migrate.list ~dbg:di.log + let stat () ~dbg ~sr ~vdi ~vm ~key = + with_dbg ~name:"DATA.stat" ~dbg @@ fun di -> + info "%s dbg:%s sr: %s vdi: %s vm: %s opeartion_key: %s" __FUNCTION__ dbg + (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) (s_of_operation key) ; + let module C = StorageAPI (Idl.Exn.GenClient (struct + let rpc = of_sr sr + end)) in + C.DATA.stat (Debug_info.to_string di) sr vdi vm key - let stat () ~dbg ~id = - with_dbg ~name:"DATA.MIRROR.stat" ~dbg @@ fun di -> - info "%s dbg: %s mirror_id: %s" __FUNCTION__ di.log id ; - Storage_migrate.stat ~dbg:di.log ~id + let import_activate () ~dbg ~dp ~sr ~vdi ~vm = + with_dbg ~name:"DATA.import_activate" ~dbg @@ fun di -> + info "%s dbg:%s dp:%s sr:%s vdi:%s vm:%s" __FUNCTION__ dbg dp (s_of_sr sr) + (s_of_vdi vdi) (s_of_vm vm) ; + let module C = StorageAPI (Idl.Exn.GenClient (struct + let rpc = of_sr sr + end)) in + C.DATA.import_activate (Debug_info.to_string di) dp sr vdi vm + + let get_nbd_server () ~dbg ~dp ~sr ~vdi ~vm = + with_dbg ~name:"DATA.get_nbd_server" ~dbg @@ fun di -> + info "%s dbg:%s dp:%s sr:%s vdi:%s vm:%s" __FUNCTION__ dbg dp (s_of_sr sr) + (s_of_vdi vdi) (s_of_vm vm) ; + let module C = StorageAPI (Idl.Exn.GenClient (struct + let rpc = of_sr sr + end)) in + C.DATA.get_nbd_server (Debug_info.to_string di) dp sr vdi vm + + module MIRROR = struct + type context = unit + + let send_start _ctx ~dbg:_ ~task_id:_ ~dp:_ ~sr:_ ~vdi:_ ~mirror_vm:_ + ~mirror_id:_ ~local_vdi:_ ~copy_vm:_ ~live_vm:_ ~url:_ + ~remote_mirror:_ ~dest_sr:_ ~verify_dest:_ = + Storage_interface.unimplemented + __FUNCTION__ (* see storage_smapi{v1,v3}_migrate.ml *) let receive_start () ~dbg ~sr ~vdi_info ~id ~similar = - with_dbg ~name:"DATA.MIRROR.receive_start" ~dbg @@ fun di -> + with_dbg ~name:"DATA.MIRROR.receive_start" ~dbg @@ fun _di -> info "%s dbg: %s sr: %s vdi_info: %s mirror_id: %s similar: %s" __FUNCTION__ dbg (s_of_sr sr) (string_of_vdi_info vdi_info) id (String.concat ";" similar) ; - Storage_migrate.receive_start ~dbg:di.log ~sr ~vdi_info ~id ~similar + (* This goes straight to storage_smapiv1_migrate for backwards compatability + reasons, new code should not call receive_start any more *) + Storage_smapiv1_migrate.MIRROR.receive_start () ~dbg ~sr ~vdi_info ~id + ~similar let receive_start2 () ~dbg ~sr ~vdi_info ~id ~similar ~vm = - with_dbg ~name:"DATA.MIRROR.receive_start2" ~dbg @@ fun di -> + with_dbg ~name:"DATA.MIRROR.receive_start2" ~dbg @@ fun _di -> info "%s dbg: %s sr: %s vdi_info: %s mirror_id: %s similar: %s vm: %s" __FUNCTION__ dbg (s_of_sr sr) (string_of_vdi_info vdi_info) @@ -879,41 +878,52 @@ module Mux = struct (String.concat ";" similar) (s_of_vm vm) ; info "%s dbg:%s" __FUNCTION__ dbg ; - Storage_migrate.receive_start2 ~dbg:di.log ~sr ~vdi_info ~id ~similar - ~vm + (* This goes straight to storage_smapiv1_migrate for backwards compatability + reasons, new code should not call receive_start any more *) + Storage_smapiv1_migrate.MIRROR.receive_start2 () ~dbg ~sr ~vdi_info ~id + ~similar ~vm + + (** see storage_smapiv{1,3}_migrate.receive_start3 *) + let receive_start3 () ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ ~similar:_ + ~vm:_ = + Storage_interface.unimplemented __FUNCTION__ let receive_finalize () ~dbg ~id = with_dbg ~name:"DATA.MIRROR.receive_finalize" ~dbg @@ fun di -> info "%s dbg: %s mirror_id: %s" __FUNCTION__ dbg id ; - Storage_migrate.receive_finalize ~dbg:di.log ~id + Storage_smapiv1_migrate.MIRROR.receive_finalize () ~dbg:di.log ~id let receive_finalize2 () ~dbg ~id = with_dbg ~name:"DATA.MIRROR.receive_finalize2" ~dbg @@ fun di -> info "%s dbg: %s mirror_id: %s" __FUNCTION__ dbg id ; - Storage_migrate.receive_finalize2 ~dbg:di.log ~id + Storage_smapiv1_migrate.MIRROR.receive_finalize2 () ~dbg:di.log ~id + + let receive_finalize3 () ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ ~verify_dest:_ = + Storage_interface.unimplemented __FUNCTION__ let receive_cancel () ~dbg ~id = with_dbg ~name:"DATA.MIRROR.receive_cancel" ~dbg @@ fun di -> info "%s dbg: %s mirror_id: %s" __FUNCTION__ dbg id ; - Storage_migrate.receive_cancel ~dbg:di.log ~id + Storage_smapiv1_migrate.MIRROR.receive_cancel () ~dbg:di.log ~id - let import_activate () ~dbg ~dp ~sr ~vdi ~vm = - with_dbg ~name:"DATA.MIRROR.import_activate" ~dbg @@ fun di -> - info "%s dbg:%s dp:%s sr:%s vdi:%s vm:%s" __FUNCTION__ dbg dp - (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) ; - let module C = StorageAPI (Idl.Exn.GenClient (struct - let rpc = of_sr sr - end)) in - C.DATA.MIRROR.import_activate (Debug_info.to_string di) dp sr vdi vm + let receive_cancel2 () ~dbg:_ ~mirror_id:_ ~url:_ ~verify_dest:_ = + Storage_interface.unimplemented __FUNCTION__ - let get_nbd_server () ~dbg ~dp ~sr ~vdi ~vm = - with_dbg ~name:"DATA.MIRROR.get_nbd_server" ~dbg @@ fun di -> - info "%s dbg:%s dp:%s sr:%s vdi:%s vm:%s" __FUNCTION__ dbg dp - (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) ; - let module C = StorageAPI (Idl.Exn.GenClient (struct - let rpc = of_sr sr - end)) in - C.DATA.MIRROR.get_nbd_server (Debug_info.to_string di) dp sr vdi vm + let pre_deactivate_hook _ctx ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ = + Storage_interface.unimplemented __FUNCTION__ + + let has_mirror_failed _ctx ~dbg:_ ~mirror_id:_ ~sr:_ = + Storage_interface.unimplemented __FUNCTION__ + + let list () ~dbg = + with_dbg ~name:"DATA.MIRROR.list" ~dbg @@ fun di -> + info "%s dbg: %s" __FUNCTION__ dbg ; + Storage_migrate.list ~dbg:di.log + + let stat () ~dbg ~id = + with_dbg ~name:"DATA.MIRROR.stat" ~dbg @@ fun di -> + info "%s dbg: %s mirror_id: %s" __FUNCTION__ di.log id ; + Storage_migrate.stat ~dbg:di.log ~id end end diff --git a/ocaml/xapi/storage_mux_reg.ml b/ocaml/xapi/storage_mux_reg.ml new file mode 100644 index 00000000000..488fcd9aa89 --- /dev/null +++ b/ocaml/xapi/storage_mux_reg.ml @@ -0,0 +1,125 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** This module contains the code for registering storage plugins (SMAPIv1 and SMAPIv3) +and multiplexing between them according to the sr type *) + +module D = Debug.Make (struct let name = __MODULE__ end) + +type processor = Rpc.call -> Rpc.response + +let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute + +open Storage_interface + +let s_of_sr = Storage_interface.Sr.string_of + +type plugin = { + processor: processor + ; backend_domain: string + ; query_result: query_result + ; features: Smint.Feature.t list +} + +let plugins : (sr, plugin) Hashtbl.t = Hashtbl.create 10 + +let m = Mutex.create () + +let debug_printer rpc call = + (* debug "Rpc.call = %s" (Xmlrpc.string_of_call call); *) + let result = rpc call in + (* debug "Rpc.response = %s" (Xmlrpc.string_of_response result); *) + result + +let register sr rpc d info = + with_lock m (fun () -> + let features = + Smint.Feature.parse_capability_int64 info.Storage_interface.features + in + Hashtbl.replace plugins sr + { + processor= debug_printer rpc + ; backend_domain= d + ; query_result= info + ; features + } ; + D.debug "register SR %s (currently-registered = [ %s ])" (s_of_sr sr) + (String.concat ", " + (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) + ) + ) + +let unregister sr = + with_lock m (fun () -> + Hashtbl.remove plugins sr ; + D.debug "unregister SR %s (currently-registered = [ %s ])" (s_of_sr sr) + (String.concat ", " + (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) + ) + ) + +let sr_has_capability sr capability = + with_lock m (fun () -> + match Hashtbl.find_opt plugins sr with + | Some x -> + Smint.Feature.has_capability capability x.features + | None -> + false + ) + +(* This is the policy: *) +let of_sr sr = + with_lock m (fun () -> + match Hashtbl.find_opt plugins sr with + | Some x -> + x.processor + | None -> + D.error "No storage plugin for SR: %s (currently-registered = [ %s ])" + (s_of_sr sr) + (String.concat ", " + (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) + ) ; + raise (Storage_error (No_storage_plugin_for_sr (s_of_sr sr))) + ) + +let smapi_version_of_sr sr = + with_lock m (fun () -> + match Hashtbl.find_opt plugins sr with + | Some x -> + x.query_result.smapi_version + | None -> + D.error "No storage plugin for SR: %s (currently-registered = [ %s ])" + (s_of_sr sr) + (String.concat ", " + (Hashtbl.fold (fun sr _ acc -> s_of_sr sr :: acc) plugins []) + ) ; + raise (Storage_error (No_storage_plugin_for_sr (s_of_sr sr))) + ) + +type 'a sm_result = SMSuccess of 'a | SMFailure of exn + +let s_of_sm_result f = function + | SMSuccess x -> + Printf.sprintf "Success: %s" (f x) + | SMFailure e -> + Printf.sprintf "Failure: %s" (Printexc.to_string e) + +let success = function SMSuccess _ -> true | _ -> false + +let multicast f = + Hashtbl.fold + (fun sr plugin acc -> + (sr, try SMSuccess (f sr plugin.processor) with e -> SMFailure e) :: acc + ) + plugins [] diff --git a/ocaml/xapi/storage_mux_reg.mli b/ocaml/xapi/storage_mux_reg.mli new file mode 100644 index 00000000000..7d4eee95214 --- /dev/null +++ b/ocaml/xapi/storage_mux_reg.mli @@ -0,0 +1,54 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type processor = Rpc.call -> Rpc.response + +val with_lock : Mutex.t -> (unit -> 'a) -> 'a + +type plugin = { + processor: processor + ; backend_domain: string + ; query_result: Storage_interface.query_result + ; features: Smint.Feature.t list +} + +val plugins : (Storage_interface.sr, plugin) Hashtbl.t + +val debug_printer : ('a -> 'b) -> 'a -> 'b + +val register : + Storage_interface.sr + -> (Rpc.call -> Rpc.response) + -> string + -> Storage_interface.query_result + -> unit + +val unregister : Storage_interface.sr -> unit + +val sr_has_capability : Storage_interface.sr -> Smint.Feature.capability -> bool + +val of_sr : Storage_interface.sr -> processor + +val smapi_version_of_sr : + Storage_interface.sr -> Storage_interface.smapi_version + +type 'a sm_result = SMSuccess of 'a | SMFailure of exn + +val s_of_sm_result : ('a -> string) -> 'a sm_result -> string + +val success : 'a sm_result -> bool + +val multicast : + (Storage_interface.sr -> processor -> 'a) + -> (Storage_interface.sr * 'a sm_result) list diff --git a/ocaml/xapi/storage_smapiv1.ml b/ocaml/xapi/storage_smapiv1.ml index b71dea3d1c6..0995edc35c4 100644 --- a/ocaml/xapi/storage_smapiv1.ml +++ b/ocaml/xapi/storage_smapiv1.ml @@ -18,8 +18,7 @@ open D module Date = Clock.Date module XenAPI = Client.Client open Storage_interface - -exception No_VDI +open Storage_utils let s_of_vdi = Vdi.string_of @@ -30,26 +29,6 @@ let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute let with_dbg ~name ~dbg f = Debug_info.with_dbg ~module_name:"SMAPIv1" ~name ~dbg f -(* Find a VDI given a storage-layer SR and VDI *) -let find_vdi ~__context sr vdi = - let sr = s_of_sr sr in - let vdi = s_of_vdi vdi in - let open Xapi_database.Db_filter_types in - let sr = Db.SR.get_by_uuid ~__context ~uuid:sr in - match - Db.VDI.get_records_where ~__context - ~expr: - (And - ( Eq (Field "location", Literal vdi) - , Eq (Field "SR", Literal (Ref.string_of sr)) - ) - ) - with - | x :: _ -> - x - | _ -> - raise No_VDI - (* Find a VDI reference given a name *) let find_content ~__context ?sr name = (* PR-1255: the backend should do this for us *) @@ -132,32 +111,6 @@ module SMAPIv1 : Server_impl = struct let vdi_rec = Db.VDI.get_record ~__context ~self in vdi_info_of_vdi_rec __context vdi_rec - (* For SMAPIv1, is_a_snapshot, snapshot_time and snapshot_of are stored in - * xapi's database. For SMAPIv2 they should be implemented by the storage - * backend. *) - let set_is_a_snapshot _context ~dbg ~sr ~vdi ~is_a_snapshot = - Server_helpers.exec_with_new_task "VDI.set_is_a_snapshot" - ~subtask_of:(Ref.of_string dbg) (fun __context -> - let vdi, _ = find_vdi ~__context sr vdi in - Db.VDI.set_is_a_snapshot ~__context ~self:vdi ~value:is_a_snapshot - ) - - let set_snapshot_time _context ~dbg ~sr ~vdi ~snapshot_time = - Server_helpers.exec_with_new_task "VDI.set_snapshot_time" - ~subtask_of:(Ref.of_string dbg) (fun __context -> - let vdi, _ = find_vdi ~__context sr vdi in - let snapshot_time = Date.of_iso8601 snapshot_time in - Db.VDI.set_snapshot_time ~__context ~self:vdi ~value:snapshot_time - ) - - let set_snapshot_of _context ~dbg ~sr ~vdi ~snapshot_of = - Server_helpers.exec_with_new_task "VDI.set_snapshot_of" - ~subtask_of:(Ref.of_string dbg) (fun __context -> - let vdi, _ = find_vdi ~__context sr vdi in - let snapshot_of, _ = find_vdi ~__context sr snapshot_of in - Db.VDI.set_snapshot_of ~__context ~self:vdi ~value:snapshot_of - ) - module Query = struct let query _context ~dbg:_ = { @@ -172,6 +125,7 @@ module SMAPIv1 : Server_impl = struct ; features= [] ; configuration= [] ; required_cluster_stack= [] + ; smapi_version= SMAPIv1 } let diagnostics _context ~dbg:_ = @@ -432,46 +386,9 @@ module SMAPIv1 : Server_impl = struct ~dest_vdi:_ ~snapshot_pairs:_ = assert false - let update_snapshot_info_dest _context ~dbg ~sr ~vdi ~src_vdi:_ - ~snapshot_pairs = - Server_helpers.exec_with_new_task "SR.update_snapshot_info_dest" - ~subtask_of:(Ref.of_string dbg) (fun __context -> - let local_vdis = scan __context ~dbg ~sr in - let find_sm_vdi ~vdi ~vdi_info_list = - try List.find (fun x -> x.vdi = vdi) vdi_info_list - with Not_found -> - raise (Storage_error (Vdi_does_not_exist (s_of_vdi vdi))) - in - let assert_content_ids_match ~vdi_info1 ~vdi_info2 = - if vdi_info1.content_id <> vdi_info2.content_id then - raise - (Storage_error - (Content_ids_do_not_match - (s_of_vdi vdi_info1.vdi, s_of_vdi vdi_info2.vdi) - ) - ) - in - (* For each (local snapshot vdi, source snapshot vdi) pair: - * - Check that the content_ids are the same - * - Copy snapshot_time from the source VDI to the local VDI - * - Set the local VDI's snapshot_of to vdi - * - Set is_a_snapshot = true for the local snapshot *) - List.iter - (fun (local_snapshot, src_snapshot_info) -> - let local_snapshot_info = - find_sm_vdi ~vdi:local_snapshot ~vdi_info_list:local_vdis - in - assert_content_ids_match ~vdi_info1:local_snapshot_info - ~vdi_info2:src_snapshot_info ; - set_snapshot_time __context ~dbg ~sr ~vdi:local_snapshot - ~snapshot_time:src_snapshot_info.snapshot_time ; - set_snapshot_of __context ~dbg ~sr ~vdi:local_snapshot - ~snapshot_of:vdi ; - set_is_a_snapshot __context ~dbg ~sr ~vdi:local_snapshot - ~is_a_snapshot:true - ) - snapshot_pairs - ) + let update_snapshot_info_dest _context ~dbg:_ ~sr:_ ~vdi:_ ~src_vdi:_ + ~snapshot_pairs:_ = + assert false end module VDI = struct @@ -561,6 +478,7 @@ module SMAPIv1 : Server_impl = struct ; backend_type= "vbd3" } ; BlockDevice {path= params} + ; Nbd {uri= attach_info_v1.Smint.params_nbd} ] ) } @@ -1211,16 +1129,21 @@ module SMAPIv1 : Server_impl = struct let copy _context ~dbg:_ ~sr:_ ~vdi:_ ~vm:_ ~url:_ ~dest:_ ~verify_dest:_ = assert false - module MIRROR = struct - let start _context ~dbg:_ ~sr:_ ~vdi:_ ~dp:_ ~mirror_vm:_ ~copy_vm:_ - ~url:_ ~dest:_ ~verify_dest:_ = - assert false + let mirror _context ~dbg:_ ~sr:_ ~vdi:_ ~vm:_ ~dest:_ = assert false - let stop _context ~dbg:_ ~id:_ = assert false + let stat _context ~dbg:_ ~sr:_ ~vdi:_ ~vm:_ ~key:_ = assert false - let list _context ~dbg:_ = assert false + let import_activate _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ ~vm:_ = assert false - let stat _context ~dbg:_ ~id:_ = assert false + let get_nbd_server _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ ~vm:_ = assert false + + module MIRROR = struct + type context = unit + + let send_start _ctx ~dbg:_ ~task_id:_ ~dp:_ ~sr:_ ~vdi:_ ~mirror_vm:_ + ~mirror_id:_ ~local_vdi:_ ~copy_vm:_ ~live_vm:_ ~url:_ + ~remote_mirror:_ ~dest_sr:_ ~verify_dest:_ = + assert false let receive_start _context ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ = assert false @@ -1229,16 +1152,30 @@ module SMAPIv1 : Server_impl = struct ~vm:_ = assert false + let receive_start3 _context ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ + ~similar:_ ~vm:_ ~url:_ ~verify_dest:_ = + assert false + let receive_finalize _context ~dbg:_ ~id:_ = assert false let receive_finalize2 _context ~dbg:_ ~id:_ = assert false + let receive_finalize3 _context ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ + ~verify_dest:_ = + assert false + let receive_cancel _context ~dbg:_ ~id:_ = assert false - let import_activate _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ ~vm:_ = + let receive_cancel2 _context ~dbg:_ ~mirror_id:_ ~url:_ ~verify_dest:_ = assert false - let get_nbd_server _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ ~vm:_ = assert false + let pre_deactivate_hook _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ = assert false + + let has_mirror_failed _context ~dbg:_ ~mirror_id:_ ~sr:_ = assert false + + let list _context ~dbg:_ = assert false + + let stat _context ~dbg:_ ~id:_ = assert false end end diff --git a/ocaml/xapi/storage_smapiv1.mli b/ocaml/xapi/storage_smapiv1.mli index 69a0a22aa9f..f991e6f82c3 100644 --- a/ocaml/xapi/storage_smapiv1.mli +++ b/ocaml/xapi/storage_smapiv1.mli @@ -20,7 +20,4 @@ val vdi_read_write : (Sr.t * Vdi.t, bool) Hashtbl.t val vdi_info_of_vdi_rec : Context.t -> API.vDI_t -> Storage_interface.vdi_info -val find_vdi : __context:Context.t -> Sr.t -> Vdi.t -> [`VDI] Ref.t * API.vDI_t -(** Find a VDI given a storage-layer SR and VDI *) - module SMAPIv1 : Server_impl diff --git a/ocaml/xapi/storage_smapiv1_migrate.ml b/ocaml/xapi/storage_smapiv1_migrate.ml new file mode 100644 index 00000000000..c850d61f842 --- /dev/null +++ b/ocaml/xapi/storage_smapiv1_migrate.ml @@ -0,0 +1,900 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module D = Debug.Make (struct let name = "storage_smapiv1_migrate" end) + +module Unixext = Xapi_stdext_unix.Unixext +open Xapi_stdext_pervasives.Pervasiveext +open Storage_interface +open Xmlrpc_client +open Storage_migrate_helper +open Storage_task +module State = Storage_migrate_helper.State +module SXM = Storage_migrate_helper.SXM + +module type SMAPIv2_MIRROR = Storage_interface.MIRROR + +let s_of_sr = Storage_interface.Sr.string_of + +let s_of_vdi = Storage_interface.Vdi.string_of + +let s_of_vm = Storage_interface.Vm.string_of + +let with_activated_disk ~dbg ~sr ~vdi ~dp ~vm f = + let attached_vdi = + Option.map + (fun vdi -> + let backend = Local.VDI.attach3 dbg dp sr vdi vm false in + (vdi, backend) + ) + vdi + in + finally + (fun () -> + let path_and_nbd = + Option.map + (fun (vdi, backend) -> + let _xendisks, blockdevs, files, nbds = + Storage_interface.implementations_of_backend backend + in + match (files, blockdevs, nbds) with + | {path} :: _, _, _ | _, {path} :: _, _ -> + Local.VDI.activate3 dbg dp sr vdi vm ; + (path, false) + | _, _, nbd :: _ -> + Local.VDI.activate3 dbg dp sr vdi vm ; + let unix_socket_path, export_name = + Storage_interface.parse_nbd_uri nbd + in + ( Attach_helpers.NbdClient.start_nbd_client ~unix_socket_path + ~export_name + , true + ) + | [], [], [] -> + raise + (Storage_interface.Storage_error + (Backend_error + ( Api_errors.internal_error + , [ + "No File, BlockDevice or Nbd implementation in \ + Datapath.attach response: " + ^ (Storage_interface.(rpc_of backend) backend + |> Jsonrpc.to_string + ) + ] + ) + ) + ) + ) + attached_vdi + in + finally + (fun () -> f (Option.map (function path, _ -> path) path_and_nbd)) + (fun () -> + Option.iter + (function + | path, true -> + Attach_helpers.NbdClient.stop_nbd_client ~nbd_device:path + | _ -> + () + ) + path_and_nbd ; + Option.iter (fun vdi -> Local.VDI.deactivate dbg dp sr vdi vm) vdi + ) + ) + (fun () -> + Option.iter + (fun (vdi, _) -> Local.VDI.detach dbg dp sr vdi vm) + attached_vdi + ) + +let tapdisk_of_attach_info (backend : Storage_interface.backend) = + let _, blockdevices, _, nbds = + Storage_interface.implementations_of_backend backend + in + match (blockdevices, nbds) with + | blockdevice :: _, _ -> ( + let path = blockdevice.Storage_interface.path in + try + match Tapctl.of_device (Tapctl.create ()) path with + | tapdev, _, _ -> + Some tapdev + with + | Tapctl.Not_blktap -> + D.debug "Device %s is not controlled by blktap" path ; + None + | Tapctl.Not_a_device -> + D.debug "%s is not a device" path ; + None + | _ -> + D.debug "Device %s has an unknown driver" path ; + None + ) + | _, nbd :: _ -> ( + try + let path, _ = Storage_interface.parse_nbd_uri nbd in + let filename = Unix.realpath path |> Filename.basename in + Scanf.sscanf filename "nbd%d.%d" (fun pid minor -> + Some (Tapctl.tapdev_of ~pid ~minor) + ) + with _ -> + D.debug "No tapdisk found for NBD backend: %s" nbd.Storage_interface.uri ; + None + ) + | _ -> + D.debug "No tapdisk found for backend: %s" + (Storage_interface.(rpc_of backend) backend |> Rpc.to_string) ; + None + +let progress_callback start len t y = + let new_progress = start +. (y *. len) in + Storage_task.set_state t (Task.Pending new_progress) ; + signal (Storage_task.id_of_handle t) + +let perform_cleanup_actions = + List.iter (fun f -> + try f () + with e -> + D.error "Caught %s while performing cleanup actions" + (Printexc.to_string e) + ) + +module Copy = struct + (** [copy_into_vdi] is similar to [copy_into_sr] but requires a [dest_vdi] parameter *) + let copy_into_vdi ~task ~dbg ~sr ~vdi ~vm ~url ~dest ~dest_vdi ~verify_dest = + let (module Remote) = get_remote_backend url verify_dest in + D.debug "copy local=%s/%s url=%s remote=%s/%s verify_dest=%B" + (Storage_interface.Sr.string_of sr) + (Storage_interface.Vdi.string_of vdi) + url + (Storage_interface.Sr.string_of dest) + (Storage_interface.Vdi.string_of dest_vdi) + verify_dest ; + (* Check the remote SR exists *) + let srs = Remote.SR.list dbg in + if not (List.mem dest srs) then + failwith + (Printf.sprintf "Remote SR %s not found" + (Storage_interface.Sr.string_of dest) + ) ; + + let remote_vdi, _ = find_vdi ~dbg ~sr:dest ~vdi:dest_vdi (module Remote) in + let dest_content_id = remote_vdi.content_id in + (* Find the local VDI *) + let local_vdi, vdis = find_vdi ~dbg ~sr ~vdi (module Local) in + D.debug "copy local content_id=%s" local_vdi.content_id ; + D.debug "copy remote content_id=%s" dest_content_id ; + if local_vdi.virtual_size > remote_vdi.virtual_size then ( + (* This should never happen provided the higher-level logic is working properly *) + D.error "copy local virtual_size=%Ld > remote virtual_size = %Ld" + local_vdi.virtual_size remote_vdi.virtual_size ; + failwith "local VDI is larger than the remote VDI" + ) ; + let on_fail : (unit -> unit) list ref = ref [] in + let base_vdi = + try + let x = + (List.find (fun x -> x.content_id = dest_content_id) vdis).vdi + in + D.debug + "local VDI has content_id = %s; we will perform an incremental copy" + dest_content_id ; + Some x + with _ -> + D.debug "no local VDI has content_id = %s; we will perform a full copy" + dest_content_id ; + None + in + try + let remote_dp = Uuidx.(to_string (make ())) in + let base_dp = Uuidx.(to_string (make ())) in + let leaf_dp = Uuidx.(to_string (make ())) in + let dest_vdi_url = + let url' = Http.Url.of_string url in + Http.Url.set_uri url' + (Printf.sprintf "%s/nbdproxy/import/%s/%s/%s/%s" + (Http.Url.get_uri url') + (Storage_interface.Vm.string_of vm) + (Storage_interface.Sr.string_of dest) + (Storage_interface.Vdi.string_of dest_vdi) + remote_dp + ) + |> Http.Url.to_string + in + D.debug "%s copy remote NBD URL = %s" __FUNCTION__ dest_vdi_url ; + let id = State.copy_id_of (sr, vdi) in + D.debug "Persisting state for copy (id=%s)" id ; + State.add id + State.( + Copy_op + Copy_state. + { + base_dp + ; leaf_dp + ; remote_dp + ; dest_sr= dest + ; copy_vdi= remote_vdi.vdi + ; remote_url= url + ; verify_dest + } + ) ; + SXM.info "%s: copy initiated local_vdi:%s dest_vdi:%s" __FUNCTION__ + (Storage_interface.Vdi.string_of vdi) + (Storage_interface.Vdi.string_of dest_vdi) ; + finally + (fun () -> + D.debug "activating RW datapath %s on remote" remote_dp ; + let backend = + Remote.VDI.attach3 dbg remote_dp dest dest_vdi vm true + in + let _, _, _, nbds = + Storage_interface.implementations_of_backend backend + in + let proto = + match nbds with + | [] -> + None + | uri :: _ -> + let _socket, export = Storage_interface.parse_nbd_uri uri in + Some (`NBD export) + in + Remote.VDI.activate3 dbg remote_dp dest dest_vdi vm ; + with_activated_disk ~dbg ~sr ~vdi:base_vdi ~dp:base_dp ~vm + (fun base_path -> + with_activated_disk ~dbg ~sr ~vdi:(Some vdi) ~dp:leaf_dp ~vm + (fun src -> + let verify_cert = + if verify_dest then Stunnel_client.pool () else None + in + let dd = + Sparse_dd_wrapper.start + ~progress_cb:(progress_callback 0.05 0.9 task) + ~verify_cert ~proto ?base:base_path true (Option.get src) + dest_vdi_url remote_vdi.virtual_size + in + Storage_task.with_cancel task + (fun () -> Sparse_dd_wrapper.cancel dd) + (fun () -> + try Sparse_dd_wrapper.wait dd + with Sparse_dd_wrapper.Cancelled -> + Storage_task.raise_cancelled task + ) + ) + ) + ) + (fun () -> + Remote.DP.destroy dbg remote_dp false ; + State.remove_copy id + ) ; + SXM.info "%s: copy complete for local_vdi:%s dest_vdi:%s" __FUNCTION__ + (Storage_interface.Vdi.string_of vdi) + (Storage_interface.Vdi.string_of dest_vdi) ; + D.debug "setting remote content_id <- %s" local_vdi.content_id ; + Remote.VDI.set_content_id dbg dest dest_vdi local_vdi.content_id ; + (* PR-1255: XXX: this is useful because we don't have content_ids by default *) + D.debug "setting local content_id <- %s" local_vdi.content_id ; + Local.VDI.set_content_id dbg sr local_vdi.vdi local_vdi.content_id ; + (* Re-find the VDI to get the updated content_id info *) + let remote_vdi, _ = + find_vdi ~dbg ~sr:dest ~vdi:dest_vdi (module Remote) + in + Some (Vdi_info remote_vdi) + with e -> + D.error "Caught %s: performing cleanup actions" (Printexc.to_string e) ; + perform_cleanup_actions !on_fail ; + raise e + + (** [copy_into_sr] does not requires a dest vdi to be provided, instead, it will + find the nearest vdi on the [dest] sr, and if there is no such vdi, it will + create one. *) + let copy_into_sr ~task ~dbg ~sr ~vdi ~vm ~url ~dest ~verify_dest = + D.debug "copy sr:%s vdi:%s url:%s dest:%s verify_dest:%B" + (Storage_interface.Sr.string_of sr) + (Storage_interface.Vdi.string_of vdi) + url + (Storage_interface.Sr.string_of dest) + verify_dest ; + let (module Remote) = get_remote_backend url verify_dest in + (* Find the local VDI *) + try + let local_vdi, _ = find_vdi ~dbg ~sr ~vdi (module Local) in + try + let similar_vdis = Local.VDI.similar_content dbg sr vdi in + let similars = List.map (fun vdi -> vdi.content_id) similar_vdis in + D.debug "Similar VDIs = [ %s ]" + (String.concat "; " + (List.map + (fun x -> + Printf.sprintf "(vdi=%s,content_id=%s)" + (Storage_interface.Vdi.string_of x.vdi) + x.content_id + ) + similar_vdis + ) + ) ; + let remote_vdis = Remote.SR.scan dbg dest in + (* We drop cbt_metadata VDIs that do not have any actual data *) + let remote_vdis = + List.filter (fun vdi -> vdi.ty <> "cbt_metadata") remote_vdis + in + let nearest = + List.fold_left + (fun acc content_id -> + match acc with + | Some _ -> + acc + | None -> ( + try + Some + (List.find + (fun vdi -> + vdi.content_id = content_id + && vdi.virtual_size <= local_vdi.virtual_size + ) + remote_vdis + ) + with Not_found -> None + ) + ) + None similars + in + D.debug "Nearest VDI: content_id=%s vdi=%s" + (Option.fold ~none:"None" ~some:(fun x -> x.content_id) nearest) + (Option.fold ~none:"None" + ~some:(fun x -> Storage_interface.Vdi.string_of x.vdi) + nearest + ) ; + let remote_base = + match nearest with + | Some vdi -> + D.debug "Cloning VDI" ; + let vdi_clone = Remote.VDI.clone dbg dest vdi in + D.debug "Clone: %s" (Storage_interface.Vdi.string_of vdi_clone.vdi) ; + ( if vdi_clone.virtual_size <> local_vdi.virtual_size then + let new_size = + Remote.VDI.resize dbg dest vdi_clone.vdi + local_vdi.virtual_size + in + D.debug "Resize remote clone VDI to %Ld: result %Ld" + local_vdi.virtual_size new_size + ) ; + vdi_clone + | None -> + D.debug "Creating a blank remote VDI" ; + Remote.VDI.create dbg dest {local_vdi with sm_config= []} + in + let remote_copy = + copy_into_vdi ~task ~dbg ~sr ~vdi ~vm ~url ~dest + ~dest_vdi:remote_base.vdi ~verify_dest + |> vdi_info + in + let snapshot = Remote.VDI.snapshot dbg dest remote_copy in + Remote.VDI.destroy dbg dest remote_copy.vdi ; + Some (Vdi_info snapshot) + with e -> + D.error "Caught %s: copying snapshots vdi" (Printexc.to_string e) ; + raise (Storage_error (Internal_error (Printexc.to_string e))) + with + | Storage_error (Backend_error (code, params)) + | Api_errors.Server_error (code, params) -> + raise (Storage_error (Backend_error (code, params))) + | e -> + raise (Storage_error (Internal_error (Printexc.to_string e))) +end + +let mirror_pass_fds ~dbg ~dp ~sr ~vdi ~mirror_vm ~live_vm ~mirror_id ~url + ~dest_sr ~verify_dest ~(remote_mirror : Mirror.mirror_receive_result_vhd_t) + = + D.debug + "%s dbg:%s dp:%s sr:%s vdi:%s mirror_vm:%s live_vm:%s mirror_id:%s url:%s \ + dest_sr:%s verify_dest:%B" + __FUNCTION__ dbg dp (s_of_sr sr) (s_of_vdi vdi) (s_of_vm mirror_vm) + (s_of_vm live_vm) mirror_id url (s_of_sr dest_sr) verify_dest ; + let remote_vdi = remote_mirror.mirror_vdi.vdi in + let mirror_dp = remote_mirror.mirror_datapath in + + let uri = + Printf.sprintf "/services/SM/nbd/%s/%s/%s/%s" + (Storage_interface.Vm.string_of mirror_vm) + (Storage_interface.Sr.string_of dest_sr) + (Storage_interface.Vdi.string_of remote_vdi) + mirror_dp + in + D.debug "%s: uri of http request for mirroring is %s" __FUNCTION__ uri ; + let dest_url = Http.Url.set_uri (Http.Url.of_string url) uri in + D.debug "%s url of http request for mirroring is %s" __FUNCTION__ + (Http.Url.to_string dest_url) ; + let request = + Http.Request.make + ~query:(Http.Url.get_query_params dest_url) + ~version:"1.0" ~user_agent:"smapiv2" Http.Put uri + in + let verify_cert = if verify_dest then Stunnel_client.pool () else None in + let transport = Xmlrpc_client.transport_of_url ~verify_cert dest_url in + D.debug "Searching for data path: %s" dp ; + let attach_info = Local.DP.attach_info dbg sr vdi dp mirror_vm in + + let tapdev = + match tapdisk_of_attach_info attach_info with + | Some tapdev -> + let pid = Tapctl.get_tapdisk_pid tapdev in + let path = Printf.sprintf "/var/run/blktap-control/nbdclient%d" pid in + with_transport ~stunnel_wait_disconnect:false transport + (with_http request (fun (_response, s) -> + (* Enable mirroring on the local machine *) + let control_fd = Unix.socket Unix.PF_UNIX Unix.SOCK_STREAM 0 in + finally + (fun () -> + Unix.connect control_fd (Unix.ADDR_UNIX path) ; + let msg = dp in + let len = String.length msg in + let written = + Unixext.send_fd_substring control_fd msg 0 len [] s + in + if written <> len then ( + D.error "Failed to transfer fd to %s" path ; + failwith "Internal error transferring fd to tapdisk" + ) + ) + (fun () -> Unix.close control_fd) + ) + ) ; + tapdev + | None -> + D.error "%s: vdi %s not attached" __FUNCTION__ (Vdi.string_of vdi) ; + raise + (Storage_interface.Storage_error + (Migration_mirror_fd_failure "VDI Not Attached") + ) + | exception e -> + D.error "%s Caught exception %s:. Performing cleanup." __FUNCTION__ + (Printexc.to_string e) ; + raise + (Storage_interface.Storage_error + (Migration_mirror_fd_failure (Printexc.to_string e)) + ) + in + D.debug "%s Updating active local mirrors: id=%s" __FUNCTION__ mirror_id ; + let alm = + State.Send_state. + { + url + ; dest_sr + ; remote_info= + Some + { + dp= remote_mirror.mirror_datapath + ; vdi= remote_mirror.mirror_vdi.vdi + ; url + ; verify_dest + } + ; local_dp= dp + ; tapdev= Some tapdev + ; failed= false + ; watchdog= None + ; vdi + ; live_vm + ; mirror_key= None + } + in + State.add mirror_id (State.Send_op alm) ; + D.debug "%s Updated mirror_id %s in the active local mirror" __FUNCTION__ + mirror_id ; + tapdev + +let mirror_snapshot ~dbg ~sr ~dp ~mirror_id ~local_vdi = + D.debug "%s dbg:%s sr:%s dp:%s mirror_id:%s local_vdi:%s" __FUNCTION__ dbg + (s_of_sr sr) dp mirror_id + (string_of_vdi_info local_vdi) ; + SXM.info "%s About to snapshot VDI = %s" __FUNCTION__ + (string_of_vdi_info local_vdi) ; + let local_vdi = add_to_sm_config local_vdi "mirror" ("nbd:" ^ dp) in + let local_vdi = add_to_sm_config local_vdi "base_mirror" mirror_id in + let snapshot = + try Local.VDI.snapshot dbg sr local_vdi with + | Storage_interface.Storage_error (Backend_error (code, _)) + when code = "SR_BACKEND_FAILURE_44" -> + raise + (Storage_interface.Storage_error + (Migration_mirror_snapshot_failure + (Printf.sprintf "%s:%s" Api_errors.sr_source_space_insufficient + (Storage_interface.Sr.string_of sr) + ) + ) + ) + | e -> + raise + (Storage_interface.Storage_error + (Migration_mirror_snapshot_failure (Printexc.to_string e)) + ) + in + + SXM.info "%s: snapshot created, mirror initiated vdi:%s snapshot_of:%s" + __FUNCTION__ + (Storage_interface.Vdi.string_of snapshot.vdi) + (Storage_interface.Vdi.string_of local_vdi.vdi) ; + + snapshot + +let mirror_checker mirror_id tapdev = + let rec inner () = + let alm_opt = State.find_active_local_mirror mirror_id in + match alm_opt with + | Some alm -> + let stats = Tapctl.stats (Tapctl.create ()) tapdev in + if stats.Tapctl.Stats.nbd_mirror_failed = 1 then ( + D.error "Tapdisk mirroring has failed" ; + Updates.add (Dynamic.Mirror mirror_id) updates + ) ; + alm.State.Send_state.watchdog <- + Some + (Scheduler.one_shot scheduler (Scheduler.Delta 5) "tapdisk_watchdog" + inner + ) + | None -> + () + in + inner () + +let mirror_copy ~task ~dbg ~sr ~snapshot ~copy_vm ~url ~dest_sr ~remote_mirror + ~verify_dest = + (* Copy the snapshot to the remote *) + try + Storage_task.with_subtask task "copy" (fun () -> + Copy.copy_into_vdi ~task ~dbg ~sr ~vdi:snapshot.vdi ~vm:copy_vm ~url + ~dest:dest_sr ~dest_vdi:remote_mirror.Mirror.copy_diffs_to + ~verify_dest + ) + |> vdi_info + with e -> + raise (Storage_error (Migration_mirror_copy_failure (Printexc.to_string e))) + +let mirror_cleanup ~dbg ~sr ~snapshot = + D.debug "Destroying snapshot on src" ; + Local.VDI.destroy dbg sr snapshot.vdi + +module MIRROR : SMAPIv2_MIRROR = struct + type context = unit + + let send_start _ctx ~dbg ~task_id ~dp ~sr ~vdi ~mirror_vm ~mirror_id + ~local_vdi ~copy_vm ~live_vm ~url ~remote_mirror ~dest_sr ~verify_dest = + D.debug + "%s dbg: %s dp: %s sr: %s vdi:%s mirror_vm:%s mirror_id: %s live_vm: %s \ + url:%s dest_sr:%s verify_dest:%B" + __FUNCTION__ dbg dp (s_of_sr sr) (s_of_vdi vdi) (s_of_vm mirror_vm) + mirror_id (s_of_vm live_vm) url (s_of_sr dest_sr) verify_dest ; + let (module Remote) = + Storage_migrate_helper.get_remote_backend url verify_dest + in + + let read_write = true in + (* DP set up is only essential for MIRROR.start/stop due to their open ended pattern. + It's not necessary for copy which will take care of that itself. *) + ignore (Local.VDI.attach3 dbg dp sr vdi (Vm.of_string "0") read_write) ; + Local.VDI.activate3 dbg dp sr vdi (Vm.of_string "0") ; + match remote_mirror with + | Mirror.SMAPIv3_mirror _ -> + (* this should never happen *) + raise + (Storage_error + (Migration_mirror_failure + "Incorrect remote mirror format for SMAPIv1" + ) + ) + | Mirror.Vhd_mirror mirror_res -> + let tapdev = + mirror_pass_fds ~dbg ~dp ~sr ~vdi ~mirror_vm ~live_vm ~mirror_id ~url + ~dest_sr ~verify_dest ~remote_mirror:mirror_res + in + + let snapshot = mirror_snapshot ~dbg ~sr ~dp ~mirror_id ~local_vdi in + + mirror_checker mirror_id tapdev ; + let task = Storage_task.(handle_of_id tasks) task_id in + let new_parent = + mirror_copy ~task ~dbg ~sr ~snapshot ~copy_vm ~url ~dest_sr + ~remote_mirror:mirror_res ~verify_dest + in + + D.debug "Local VDI %s = remote VDI %s" + (Storage_interface.Vdi.string_of snapshot.vdi) + (Storage_interface.Vdi.string_of new_parent.vdi) ; + D.debug "Local VDI %s now mirrored to remote VDI: %s" + (Storage_interface.Vdi.string_of local_vdi.vdi) + (Storage_interface.Vdi.string_of mirror_res.Mirror.mirror_vdi.vdi) ; + mirror_cleanup ~dbg ~sr ~snapshot + + let receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm + (module SMAPI : SMAPIv2) = + let on_fail : (unit -> unit) list ref = ref [] in + let vdis = SMAPI.SR.scan dbg sr in + (* We drop cbt_metadata VDIs that do not have any actual data *) + let vdis = List.filter (fun vdi -> vdi.ty <> "cbt_metadata") vdis in + let leaf_dp = SMAPI.DP.create dbg Uuidx.(to_string (make ())) in + try + let vdi_info = {vdi_info with sm_config= [("base_mirror", id)]} in + let leaf = SMAPI.VDI.create dbg sr vdi_info in + D.info "Created leaf VDI for mirror receive: %s" (string_of_vdi_info leaf) ; + on_fail := (fun () -> SMAPI.VDI.destroy dbg sr leaf.vdi) :: !on_fail ; + (* dummy VDI is created so that the leaf VDI becomes a differencing disk, + useful for calling VDI.compose later on *) + let dummy = SMAPI.VDI.snapshot dbg sr leaf in + on_fail := (fun () -> SMAPI.VDI.destroy dbg sr dummy.vdi) :: !on_fail ; + D.debug "%s Created dummy snapshot for mirror receive: %s" __FUNCTION__ + (string_of_vdi_info dummy) ; + let _ : backend = SMAPI.VDI.attach3 dbg leaf_dp sr leaf.vdi vm true in + SMAPI.VDI.activate3 dbg leaf_dp sr leaf.vdi vm ; + let nearest = + List.fold_left + (fun acc content_id -> + match acc with + | Some _ -> + acc + | None -> ( + try + Some + (List.find + (fun vdi -> + vdi.content_id = content_id + && vdi.virtual_size <= vdi_info.virtual_size + ) + vdis + ) + with Not_found -> None + ) + ) + None similar + in + D.debug "Nearest VDI: content_id=%s vdi=%s" + (Option.fold ~none:"None" ~some:(fun x -> x.content_id) nearest) + (Option.fold ~none:"None" + ~some:(fun x -> Storage_interface.Vdi.string_of x.vdi) + nearest + ) ; + let parent = + match nearest with + | Some vdi -> + D.debug "Cloning VDI" ; + let vdi = add_to_sm_config vdi "base_mirror" id in + let vdi_clone = SMAPI.VDI.clone dbg sr vdi in + D.debug "Clone: %s" (Storage_interface.Vdi.string_of vdi_clone.vdi) ; + ( if vdi_clone.virtual_size <> vdi_info.virtual_size then + let new_size = + SMAPI.VDI.resize dbg sr vdi_clone.vdi vdi_info.virtual_size + in + D.debug "Resize clone VDI to %Ld: result %Ld" + vdi_info.virtual_size new_size + ) ; + vdi_clone + | None -> + D.debug "Creating a blank remote VDI" ; + SMAPI.VDI.create dbg sr vdi_info + in + D.debug "Parent disk content_id=%s" parent.content_id ; + (* The state tracking here does not need to be changed, however, it will be + stored in memory on different hosts. If receive_start is called, by an older + host, this State.add is run on the destination host. On the other hand, if + receive_start3 is called, this will be stored in memory on the source host. + receive_finalize3 and receive_cancel2 handles this similarly. *) + State.add id + State.( + Recv_op + Receive_state. + { + sr + ; dummy_vdi= dummy.vdi + ; leaf_vdi= leaf.vdi + ; leaf_dp + ; parent_vdi= parent.vdi + ; remote_vdi= vdi_info.vdi + ; mirror_vm= vm + ; url= "" + ; verify_dest= false + } + ) ; + let nearest_content_id = Option.map (fun x -> x.content_id) nearest in + Mirror.Vhd_mirror + { + Mirror.mirror_vdi= leaf + ; mirror_datapath= leaf_dp + ; copy_diffs_from= nearest_content_id + ; copy_diffs_to= parent.vdi + ; dummy_vdi= dummy.vdi + } + with e -> + List.iter + (fun op -> + try op () + with e -> + D.debug "Caught exception in on_fail: %s" (Printexc.to_string e) + ) + !on_fail ; + raise e + + let receive_start _ctx ~dbg ~sr ~vdi_info ~id ~similar = + D.debug "%s dbg: %s sr: %s vdi: %s id: %s" __FUNCTION__ dbg (s_of_sr sr) + (string_of_vdi_info vdi_info) + id ; + receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm:(Vm.of_string "0") + (module Local) + + let receive_start2 _ctx ~dbg ~sr ~vdi_info ~id ~similar ~vm = + D.debug "%s dbg: %s sr: %s vdi: %s id: %s" __FUNCTION__ dbg (s_of_sr sr) + (string_of_vdi_info vdi_info) + id ; + receive_start_common ~dbg ~sr ~vdi_info ~id ~similar ~vm (module Local) + + let receive_start3 _ctx ~dbg ~sr ~vdi_info ~mirror_id ~similar ~vm ~url + ~verify_dest = + D.debug "%s dbg: %s sr: %s vdi: %s id: %s vm: %s url: %s verify_dest: %B" + __FUNCTION__ dbg (s_of_sr sr) + (string_of_vdi_info vdi_info) + mirror_id (s_of_vm vm) url verify_dest ; + let (module Remote) = + Storage_migrate_helper.get_remote_backend url verify_dest + in + receive_start_common ~dbg ~sr ~vdi_info ~id:mirror_id ~similar ~vm + (module Remote) + + let receive_finalize _ctx ~dbg ~id = + D.debug "%s dbg:%s id: %s" __FUNCTION__ dbg id ; + let recv_state = State.find_active_receive_mirror id in + let open State.Receive_state in + Option.iter (fun r -> Local.DP.destroy dbg r.leaf_dp false) recv_state ; + State.remove_receive_mirror id + + let receive_finalize_common ~dbg ~mirror_id (module SMAPI : SMAPIv2) = + let recv_state = State.find_active_receive_mirror mirror_id in + let open State.Receive_state in + Option.iter + (fun r -> + SXM.info + "%s Mirror done. Compose on the dest sr %s parent %s and leaf %s" + __FUNCTION__ (Sr.string_of r.sr) + (Vdi.string_of r.parent_vdi) + (Vdi.string_of r.leaf_vdi) ; + SMAPI.DP.destroy2 dbg r.leaf_dp r.sr r.leaf_vdi r.mirror_vm false ; + SMAPI.VDI.compose dbg r.sr r.parent_vdi r.leaf_vdi ; + (* On SMAPIv3, compose would have removed the now invalid dummy vdi, so + there is no need to destroy it anymore, while this is necessary on SMAPIv1 SRs. *) + D.log_and_ignore_exn (fun () -> SMAPI.VDI.destroy dbg r.sr r.dummy_vdi) ; + SMAPI.VDI.remove_from_sm_config dbg r.sr r.leaf_vdi "base_mirror" + ) + recv_state ; + State.remove_receive_mirror mirror_id + + let receive_finalize2 _ctx ~dbg ~id = + D.debug "%s dbg:%s id: %s" __FUNCTION__ dbg id ; + receive_finalize_common ~dbg ~mirror_id:id (module Local) + + let receive_finalize3 _ctx ~dbg ~mirror_id ~sr ~url ~verify_dest = + D.debug "%s dbg:%s id: %s sr: %s url: %s verify_dest: %B" __FUNCTION__ dbg + mirror_id (s_of_sr sr) url verify_dest ; + let (module Remote) = + Storage_migrate_helper.get_remote_backend url verify_dest + in + receive_finalize_common ~dbg ~mirror_id (module Remote) + + let receive_cancel _ctx ~dbg ~id = + D.debug "%s dbg:%s mirror_id:%s" __FUNCTION__ dbg id ; + let receive_state = State.find_active_receive_mirror id in + let open State.Receive_state in + Option.iter + (fun r -> + D.log_and_ignore_exn (fun () -> Local.DP.destroy dbg r.leaf_dp false) ; + List.iter + (fun v -> + D.log_and_ignore_exn (fun () -> Local.VDI.destroy dbg r.sr v) + ) + [r.dummy_vdi; r.leaf_vdi; r.parent_vdi] + ) + receive_state ; + State.remove_receive_mirror id + + exception Timeout of Mtime.Span.t + + let reqs_outstanding_timeout = Mtime.Span.(150 * s) + + let pp_time () = Fmt.str "%a" Mtime.Span.pp + + (* Tapdisk should time out after 2 mins. We can wait a little longer *) + + let pre_deactivate_hook _ctx ~dbg ~dp ~sr ~vdi = + D.debug "%s dbg:%s dp:%s sr:%s vdi:%s" __FUNCTION__ dbg dp (s_of_sr sr) + (s_of_vdi vdi) ; + let open State.Send_state in + let id = State.mirror_id_of (sr, vdi) in + let start = Mtime_clock.counter () in + State.find_active_local_mirror id + |> Option.iter (fun s -> + (* We used to pause here and then check the nbd_mirror_failed key. Now, we poll + until the number of outstanding requests has gone to zero, then check the + status. This avoids confusing the backend (CA-128460) *) + try + match s.tapdev with + | None -> + () + | Some tapdev -> + let open Tapctl in + let ctx = create () in + let rec wait () = + let elapsed = Mtime_clock.count start in + if Mtime.Span.compare elapsed reqs_outstanding_timeout > 0 + then + raise (Timeout elapsed) ; + let st = stats ctx tapdev in + if st.Stats.reqs_outstanding > 0 then ( + Thread.delay 1.0 ; wait () + ) else + (st, elapsed) + in + let st, elapsed = wait () in + D.debug "Got final stats after waiting %a" pp_time elapsed ; + if st.Stats.nbd_mirror_failed = 1 then ( + D.error "tapdisk reports mirroring failed" ; + s.failed <- true + ) ; + Option.iter + (fun id -> Scheduler.cancel scheduler id) + s.watchdog + with + | Timeout elapsed -> + D.error + "Timeout out after %a waiting for tapdisk to complete all \ + outstanding requests while migrating vdi %s of domain %s" + pp_time elapsed (s_of_vdi vdi) (s_of_vm s.live_vm) ; + s.failed <- true + | e -> + D.error + "Caught exception while finally checking mirror state: %s \ + when migrating vdi %s of domain %s" + (Printexc.to_string e) (s_of_vdi vdi) (s_of_vm s.live_vm) ; + s.failed <- true + ) + + let has_mirror_failed _ctx ~dbg:_ ~mirror_id ~sr:_ = + match State.find_active_local_mirror mirror_id with + | Some {tapdev= Some tapdev; failed; _} -> ( + try + let stats = Tapctl.stats (Tapctl.create ()) tapdev in + stats.Tapctl.Stats.nbd_mirror_failed = 1 + with _ -> + D.debug "Using cached copy of failure status" ; + failed + ) + | _ -> + false + + let list _ctx = Storage_interface.unimplemented __FUNCTION__ + + let stat _ctx = Storage_interface.unimplemented __FUNCTION__ + + let receive_cancel2 _ctx ~dbg ~mirror_id ~url ~verify_dest = + let (module Remote) = + Storage_migrate_helper.get_remote_backend url verify_dest + in + let receive_state = State.find_active_receive_mirror mirror_id in + let open State.Receive_state in + Option.iter + (fun r -> + D.log_and_ignore_exn (fun () -> Remote.DP.destroy dbg r.leaf_dp false) ; + List.iter + (fun v -> + D.log_and_ignore_exn (fun () -> Remote.VDI.destroy dbg r.sr v) + ) + [r.dummy_vdi; r.leaf_vdi; r.parent_vdi] + ) + receive_state ; + State.remove_receive_mirror mirror_id +end diff --git a/ocaml/xapi/storage_smapiv1_migrate.mli b/ocaml/xapi/storage_smapiv1_migrate.mli new file mode 100644 index 00000000000..a1021858e46 --- /dev/null +++ b/ocaml/xapi/storage_smapiv1_migrate.mli @@ -0,0 +1,89 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module type SMAPIv2_MIRROR = Storage_interface.MIRROR + +val with_activated_disk : + dbg:string + -> sr:Storage_interface.sr + -> vdi:Storage_interface.vdi option + -> dp:string + -> vm:Storage_interface.vm + -> (string option -> 'a) + -> 'a + +val tapdisk_of_attach_info : Storage_interface.backend -> Tapctl.tapdev option + +module Copy : sig + val copy_into_vdi : + task:Storage_task.Storage_task.task_handle + -> dbg:string + -> sr:Storage_interface.sr + -> vdi:Storage_interface.vdi + -> vm:Storage_interface.vm + -> url:string + -> dest:Storage_interface.sr + -> dest_vdi:Storage_interface.vdi + -> verify_dest:bool + -> Storage_interface.async_result_t option + + val copy_into_sr : + task:Storage_task.Storage_task.task_handle + -> dbg:string + -> sr:Storage_interface.sr + -> vdi:Storage_interface.vdi + -> vm:Storage_interface.vm + -> url:string + -> dest:Storage_interface.sr + -> verify_dest:bool + -> Storage_interface.async_result_t option +end + +val mirror_pass_fds : + dbg:string + -> dp:string + -> sr:Storage_interface.sr + -> vdi:Storage_interface.vdi + -> mirror_vm:Storage_interface.vm + -> live_vm:Storage_interface.vm + -> mirror_id:string + -> url:string + -> dest_sr:Storage_interface.sr + -> verify_dest:bool + -> remote_mirror:Storage_interface.Mirror.mirror_receive_result_vhd_t + -> Tapctl.tapdev + +val mirror_snapshot : + dbg:string + -> sr:Storage_interface.sr + -> dp:string + -> mirror_id:string + -> local_vdi:Storage_interface.vdi_info + -> Storage_interface.vdi_info + +val mirror_checker : string -> Tapctl.tapdev -> unit + +val mirror_copy : + task:Storage_task.Storage_task.task_handle + -> dbg:string + -> sr:Storage_interface.sr + -> snapshot:Storage_interface.vdi_info + -> copy_vm:Storage_interface.vm + -> url:string + -> dest_sr:Storage_interface.sr + -> remote_mirror:Storage_interface.Mirror.mirror_receive_result_vhd_t + -> verify_dest:bool + -> Storage_interface.vdi_info + +module MIRROR : SMAPIv2_MIRROR diff --git a/ocaml/xapi/storage_smapiv1_wrapper.ml b/ocaml/xapi/storage_smapiv1_wrapper.ml index 8d6de8e8e84..86879780fba 100644 --- a/ocaml/xapi/storage_smapiv1_wrapper.ml +++ b/ocaml/xapi/storage_smapiv1_wrapper.ml @@ -1142,24 +1142,60 @@ functor (s_of_vdi vdi) url (s_of_sr dest) ; Impl.DATA.copy context ~dbg ~sr ~vdi ~vm ~url ~dest - module MIRROR = struct - let start context ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm ~url ~dest = - info "DATA.MIRROR.start dbg:%s sr:%s vdi:%s url:%s dest:%s" dbg - (s_of_sr sr) (s_of_vdi vdi) url (s_of_sr dest) ; - Impl.DATA.MIRROR.start context ~dbg ~sr ~vdi ~dp ~mirror_vm ~copy_vm - ~url ~dest + let mirror _context ~dbg:_ ~sr:_ ~vdi:_ ~vm:_ ~dest:_ = + Storage_interface.unimplemented __FUNCTION__ + + let stat _context ~dbg:_ ~sr:_ ~vdi:_ ~vm:_ ~key:_ = + Storage_interface.unimplemented __FUNCTION__ + + (* tapdisk supports three kind of nbd servers, the old style nbdserver, + the new style nbd server and a real nbd server. The old and new style nbd servers + are "special" nbd servers that accept fds passed via SCM_RIGHTS and handle + connection based on that fd. The real nbd server is a "normal" nbd server + that accepts nbd connections from nbd clients, and it does not support fd + passing. *) + let get_nbd_server_common context ~dbg ~dp ~sr ~vdi ~vm ~style = + info "%s DATA.get_nbd_server dbg:%s dp:%s sr:%s vdi:%s vm:%s" + __FUNCTION__ dbg dp (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) ; + let attach_info = DP.attach_info context ~dbg:"nbd" ~sr ~vdi ~dp ~vm in + match Storage_smapiv1_migrate.tapdisk_of_attach_info attach_info with + | Some tapdev -> + let minor = Tapctl.get_minor tapdev in + let pid = Tapctl.get_tapdisk_pid tapdev in + let path = + match style with + | `newstyle -> + Printf.sprintf "/var/run/blktap-control/nbdserver-new%d.%d" + pid minor + | `oldstyle -> + Printf.sprintf "/var/run/blktap-control/nbdserver%d.%d" pid + minor + | `real -> + Printf.sprintf "/var/run/blktap-control/nbd%d.%d" pid minor + in + debug "%s nbd server path is %s" __FUNCTION__ path ; + path + | None -> + raise + (Storage_interface.Storage_error + (Backend_error + (Api_errors.internal_error, ["No tapdisk attach info found"]) + ) + ) + + let import_activate context ~dbg ~dp ~sr ~vdi ~vm = + get_nbd_server_common context ~dbg ~dp ~sr ~vdi ~vm ~style:`oldstyle - let stop context ~dbg ~id = - info "DATA.MIRROR.stop dbg:%s id:%s" dbg id ; - Impl.DATA.MIRROR.stop context ~dbg ~id + let get_nbd_server context ~dbg ~dp ~sr ~vdi ~vm = + get_nbd_server_common context ~dbg ~dp ~sr ~vdi ~vm ~style:`real - let list context ~dbg = - info "DATA.MIRROR.active dbg:%s" dbg ; - Impl.DATA.MIRROR.list context ~dbg + module MIRROR = struct + type context = unit - let stat context ~dbg ~id = - info "DATA.MIRROR.stat dbg:%s id:%s" dbg id ; - Impl.DATA.MIRROR.stat context ~dbg ~id + let send_start _ctx ~dbg:_ ~task_id:_ ~dp:_ ~sr:_ ~vdi:_ ~mirror_vm:_ + ~mirror_id:_ ~local_vdi:_ ~copy_vm:_ ~live_vm:_ ~url:_ + ~remote_mirror:_ ~dest_sr:_ ~verify_dest:_ = + Storage_interface.unimplemented __FUNCTION__ let receive_start context ~dbg ~sr ~vdi_info ~id ~similar = info "DATA.MIRROR.receive_start dbg:%s sr:%s id:%s similar:[%s]" dbg @@ -1176,6 +1212,11 @@ functor Impl.DATA.MIRROR.receive_start2 context ~dbg ~sr ~vdi_info ~id ~similar ~vm + let receive_start3 _context ~dbg:_ ~sr:_ ~vdi_info:_ ~mirror_id:_ + ~similar:_ ~vm:_ = + (* See Storage_smapiv1_migrate.receive_start3 *) + Storage_interface.unimplemented __FUNCTION__ + let receive_finalize context ~dbg ~id = info "DATA.MIRROR.receive_finalize dbg:%s id:%s" dbg id ; Impl.DATA.MIRROR.receive_finalize context ~dbg ~id @@ -1184,54 +1225,28 @@ functor info "DATA.MIRROR.receive_finalize2 dbg:%s id:%s" dbg id ; Impl.DATA.MIRROR.receive_finalize2 context ~dbg ~id + let receive_finalize3 _context ~dbg:_ ~mirror_id:_ ~sr:_ ~url:_ + ~verify_dest:_ = + (* see storage_smapiv{1,3}_migrate *) + Storage_interface.unimplemented __FUNCTION__ + let receive_cancel context ~dbg ~id = info "DATA.MIRROR.receive_cancel dbg:%s id:%s" dbg id ; Impl.DATA.MIRROR.receive_cancel context ~dbg ~id - (* tapdisk supports three kind of nbd servers, the old style nbdserver, - the new style nbd server and a real nbd server. The old and new style nbd servers - are "special" nbd servers that accept fds passed via SCM_RIGHTS and handle - connection based on that fd. The real nbd server is a "normal" nbd server - that accepts nbd connections from nbd clients, and it does not support fd - passing. *) - let get_nbd_server_common context ~dbg ~dp ~sr ~vdi ~vm ~style = - info "%s DATA.MIRROR.get_nbd_server dbg:%s dp:%s sr:%s vdi:%s vm:%s" - __FUNCTION__ dbg dp (s_of_sr sr) (s_of_vdi vdi) (s_of_vm vm) ; - let attach_info = - DP.attach_info context ~dbg:"nbd" ~sr ~vdi ~dp ~vm - in - match Storage_migrate.tapdisk_of_attach_info attach_info with - | Some tapdev -> - let minor = Tapctl.get_minor tapdev in - let pid = Tapctl.get_tapdisk_pid tapdev in - let path = - match style with - | `newstyle -> - Printf.sprintf "/var/run/blktap-control/nbdserver-new%d.%d" - pid minor - | `oldstyle -> - Printf.sprintf "/var/run/blktap-control/nbdserver%d.%d" pid - minor - | `real -> - Printf.sprintf "/var/run/blktap-control/nbd%d.%d" pid minor - in - debug "%s nbd server path is %s" __FUNCTION__ path ; - path - | None -> - raise - (Storage_interface.Storage_error - (Backend_error - ( Api_errors.internal_error - , ["No tapdisk attach info found"] - ) - ) - ) + let receive_cancel2 _context ~dbg:_ ~mirror_id:_ ~url:_ ~verify_dest:_ = + Storage_interface.unimplemented __FUNCTION__ + + let pre_deactivate_hook _context ~dbg:_ ~dp:_ ~sr:_ ~vdi:_ = + Storage_interface.unimplemented __FUNCTION__ + + let has_mirror_failed _context ~dbg:_ ~mirror_id:_ ~sr:_ = + Storage_interface.unimplemented __FUNCTION__ - let import_activate context ~dbg ~dp ~sr ~vdi ~vm = - get_nbd_server_common context ~dbg ~dp ~sr ~vdi ~vm ~style:`oldstyle + let list _context ~dbg:_ = Storage_interface.unimplemented __FUNCTION__ - let get_nbd_server context ~dbg ~dp ~sr ~vdi ~vm = - get_nbd_server_common context ~dbg ~dp ~sr ~vdi ~vm ~style:`real + let stat _context ~dbg:_ ~id:_ = + Storage_interface.unimplemented __FUNCTION__ end end diff --git a/ocaml/xapi/storage_smapiv3_migrate.ml b/ocaml/xapi/storage_smapiv3_migrate.ml new file mode 100644 index 00000000000..774239c0804 --- /dev/null +++ b/ocaml/xapi/storage_smapiv3_migrate.ml @@ -0,0 +1,337 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module D = Debug.Make (struct let name = __MODULE__ end) + +module Unixext = Xapi_stdext_unix.Unixext +module State = Storage_migrate_helper.State +module SXM = Storage_migrate_helper.SXM +open Storage_interface +open Storage_task +open Xmlrpc_client +open Storage_migrate_helper + +module type SMAPIv2_MIRROR = Storage_interface.MIRROR + +let s_of_sr = Storage_interface.Sr.string_of + +let s_of_vdi = Storage_interface.Vdi.string_of + +let s_of_vm = Storage_interface.Vm.string_of + +let export_nbd_proxy ~remote_url ~mirror_vm ~sr ~vdi ~dp ~verify_dest = + D.debug "%s spawning exporting nbd proxy" __FUNCTION__ ; + let path = + Printf.sprintf "/var/run/nbdproxy/export/%s" (Vm.string_of mirror_vm) + in + let proxy_srv = Fecomms.open_unix_domain_sock_server path in + try + let uri = + Printf.sprintf "/services/SM/nbdproxy/import/%s/%s/%s/%s" + (Vm.string_of mirror_vm) (Sr.string_of sr) (Vdi.string_of vdi) dp + in + + let dest_url = Http.Url.set_uri (Http.Url.of_string remote_url) uri in + D.debug "%s now waiting for connection at %s" __FUNCTION__ path ; + let nbd_client, _addr = Unix.accept proxy_srv in + D.debug "%s connection accepted" __FUNCTION__ ; + let request = + Http.Request.make + ~query:(Http.Url.get_query_params dest_url) + ~version:"1.0" ~user_agent:"export_nbd_proxy" Http.Put uri + in + D.debug "%s making request to dest %s" __FUNCTION__ + (Http.Url.to_string dest_url) ; + let verify_cert = if verify_dest then Stunnel_client.pool () else None in + let transport = Xmlrpc_client.transport_of_url ~verify_cert dest_url in + with_transport ~stunnel_wait_disconnect:false transport + (with_http request (fun (_response, s) -> + D.debug "%s starting proxy" __FUNCTION__ ; + Unixext.proxy (Unix.dup s) (Unix.dup nbd_client) + ) + ) ; + Unix.close proxy_srv + with e -> + D.debug "%s did not get connection due to %s, closing" __FUNCTION__ + (Printexc.to_string e) ; + Unix.close proxy_srv ; + raise e + +let mirror_wait ~dbg ~sr ~vdi ~vm ~mirror_id mirror_key = + let rec mirror_wait_rec key = + let {failed; complete; progress} : Mirror.status = + Local.DATA.stat dbg sr vdi vm key + in + if complete then ( + Option.fold ~none:() + ~some:(fun p -> D.info "%s progress is %f" __FUNCTION__ p) + progress ; + D.info "%s qemu mirror %s completed" mirror_id __FUNCTION__ + ) else if failed then ( + Option.iter + (fun (snd_state : State.Send_state.t) -> snd_state.failed <- true) + (State.find_active_local_mirror mirror_id) ; + D.info "%s qemu mirror %s failed" mirror_id __FUNCTION__ ; + State.find_active_local_mirror mirror_id + |> Option.iter (fun (s : State.Send_state.t) -> s.failed <- true) ; + Updates.add (Dynamic.Mirror mirror_id) updates ; + raise + (Storage_interface.Storage_error + (Migration_mirror_failure "Mirror failed during syncing") + ) + ) else ( + Option.fold ~none:() + ~some:(fun p -> D.info "%s progress is %f" __FUNCTION__ p) + progress ; + mirror_wait_rec key + ) + in + + match mirror_key with + | Storage_interface.Mirror.CopyV1 _ -> + () + | Storage_interface.Mirror.MirrorV1 _ -> + D.debug "%s waiting for mirroring to be done" __FUNCTION__ ; + mirror_wait_rec mirror_key + +module MIRROR : SMAPIv2_MIRROR = struct + type context = unit + + let send_start _ctx ~dbg ~task_id:_ ~dp ~sr ~vdi ~mirror_vm ~mirror_id + ~local_vdi:_ ~copy_vm:_ ~live_vm ~url ~remote_mirror ~dest_sr ~verify_dest + = + D.debug + "%s dbg: %s dp: %s sr: %s vdi:%s mirror_vm:%s mirror_id: %s live_vm: %s \ + url:%s dest_sr:%s verify_dest:%B" + __FUNCTION__ dbg dp (s_of_sr sr) (s_of_vdi vdi) (s_of_vm mirror_vm) + mirror_id (s_of_vm live_vm) url (s_of_sr dest_sr) verify_dest ; + ignore (Local.VDI.attach3 dbg dp sr vdi (Vm.of_string "0") true) ; + (* TODO we are not activating the VDI here because SMAPIv3 does not support + activating the VDI again on dom 0 when it is already activated on the live_vm. + This means that if the VM shutsdown while SXM is in progress the + mirroring for SMAPIv3 will fail.*) + let nbd_proxy_path = + Printf.sprintf "/var/run/nbdproxy/export/%s" (Vm.string_of mirror_vm) + in + match remote_mirror with + | Mirror.Vhd_mirror _ -> + raise + (Storage_error + (Migration_preparation_failure + "Incorrect remote mirror format for SMAPIv3" + ) + ) + | Mirror.SMAPIv3_mirror {nbd_export; mirror_datapath; mirror_vdi} -> ( + try + let nbd_uri = + Uri.make ~scheme:"nbd+unix" ~host:"" ~path:nbd_export + ~query:[("socket", [nbd_proxy_path])] + () + |> Uri.to_string + in + let _ : Thread.t = + Thread.create + (fun () -> + export_nbd_proxy ~remote_url:url ~mirror_vm ~sr:dest_sr + ~vdi:mirror_vdi.vdi ~dp:mirror_datapath ~verify_dest + ) + () + in + + D.info "%s nbd_proxy_path: %s nbd_url %s" __FUNCTION__ nbd_proxy_path + nbd_uri ; + let mk = Local.DATA.mirror dbg sr vdi live_vm nbd_uri in + + D.debug "%s Updating active local mirrors: id=%s" __FUNCTION__ mirror_id ; + let alm = + State.Send_state. + { + url + ; dest_sr + ; remote_info= + Some + {dp= mirror_datapath; vdi= mirror_vdi.vdi; url; verify_dest} + ; local_dp= dp + ; tapdev= None + ; failed= false + ; watchdog= None + ; vdi + ; live_vm + ; mirror_key= Some mk + } + in + State.add mirror_id (State.Send_op alm) ; + D.debug "%s Updated mirror_id %s in the active local mirror" + __FUNCTION__ mirror_id ; + mirror_wait ~dbg ~sr ~vdi ~vm:live_vm ~mirror_id mk + with e -> + D.error "%s caught exception during mirror: %s" __FUNCTION__ + (Printexc.to_string e) ; + raise + (Storage_interface.Storage_error + (Migration_mirror_failure (Printexc.to_string e)) + ) + ) + + let receive_start _ctx ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ = + Storage_interface.unimplemented __FUNCTION__ + + let receive_start2 _ctx ~dbg:_ ~sr:_ ~vdi_info:_ ~id:_ ~similar:_ ~vm:_ = + Storage_interface.unimplemented __FUNCTION__ + + let receive_start3 _ctx ~dbg ~sr ~vdi_info ~mirror_id ~similar:_ ~vm ~url + ~verify_dest = + D.debug "%s dbg: %s sr: %s vdi: %s id: %s vm: %s url: %s verify_dest: %B" + __FUNCTION__ dbg (s_of_sr sr) + (string_of_vdi_info vdi_info) + mirror_id (s_of_vm vm) url verify_dest ; + let module Remote = StorageAPI (Idl.Exn.GenClient (struct + let rpc = + Storage_utils.rpc ~srcstr:"smapiv2" ~dststr:"dst_smapiv2" + (Storage_utils.connection_args_of_uri ~verify_dest url) + end)) in + let on_fail : (unit -> unit) list ref = ref [] in + try + (* We drop cbt_metadata VDIs that do not have any actual data *) + let (vdi_info : vdi_info) = + {vdi_info with sm_config= [("base_mirror", mirror_id)]} + in + let leaf_dp = Remote.DP.create dbg Uuidx.(to_string (make ())) in + let leaf = Remote.VDI.create dbg sr vdi_info in + D.info "Created leaf VDI for mirror receive: %s" (string_of_vdi_info leaf) ; + on_fail := (fun () -> Remote.VDI.destroy dbg sr leaf.vdi) :: !on_fail ; + let backend = Remote.VDI.attach3 dbg leaf_dp sr leaf.vdi vm true in + let nbd_export = + match nbd_export_of_attach_info backend with + | None -> + raise + (Storage_error + (Migration_preparation_failure "Cannot parse nbd uri") + ) + | Some export -> + export + in + D.debug "%s activating dp %s sr: %s vdi: %s vm: %s" __FUNCTION__ leaf_dp + (s_of_sr sr) (s_of_vdi leaf.vdi) (s_of_vm vm) ; + Remote.VDI.activate3 dbg leaf_dp sr leaf.vdi vm ; + let qcow2_res = + {Mirror.mirror_vdi= leaf; mirror_datapath= leaf_dp; nbd_export} + in + let remote_mirror = Mirror.SMAPIv3_mirror qcow2_res in + D.debug + "%s updating receiving state lcoally to id: %s vm: %s vdi_info: %s" + __FUNCTION__ mirror_id (s_of_vm vm) + (string_of_vdi_info vdi_info) ; + State.add mirror_id + State.( + Recv_op + Receive_state. + { + sr + ; leaf_vdi= qcow2_res.mirror_vdi.vdi + ; leaf_dp= qcow2_res.mirror_datapath + ; remote_vdi= vdi_info.vdi + ; mirror_vm= vm + ; dummy_vdi= + Vdi.of_string "dummy" + (* No dummy_vdi is needed when migrating from SMAPIv3 SRs, having a + "dummy" VDI here is fine as cleanup code for SMAPIv3 will not + access dummy_vdi, and all the clean up functions will ignore + exceptions when trying to clean up the dummy VDIs even if they + do access dummy_vdi. The same applies to parent_vdi *) + ; parent_vdi= Vdi.of_string "dummy" + ; url + ; verify_dest + } + ) ; + remote_mirror + with e -> + List.iter + (fun op -> + try op () + with e -> + D.warn "Caught exception in on_fail: %s performing cleaning up" + (Printexc.to_string e) + ) + !on_fail ; + raise e + + let receive_finalize _ctx ~dbg:_ ~id:_ = + Storage_interface.unimplemented __FUNCTION__ + + let receive_finalize2 _ctx ~dbg:_ ~id:_ = + Storage_interface.unimplemented __FUNCTION__ + + let receive_finalize3 _ctx ~dbg ~mirror_id ~sr ~url ~verify_dest = + D.debug "%s dbg:%s id: %s sr: %s url: %s verify_dest: %B" __FUNCTION__ dbg + mirror_id (s_of_sr sr) url verify_dest ; + let (module Remote) = + Storage_migrate_helper.get_remote_backend url verify_dest + in + let open State.Receive_state in + let recv_state = State.find_active_receive_mirror mirror_id in + Option.iter + (fun r -> + Remote.DP.destroy2 dbg r.leaf_dp r.sr r.leaf_vdi r.mirror_vm false ; + Remote.VDI.remove_from_sm_config dbg r.sr r.leaf_vdi "base_mirror" + ) + recv_state ; + State.remove_receive_mirror mirror_id + + let receive_cancel _ctx ~dbg:_ ~id:_ = + Storage_interface.unimplemented __FUNCTION__ + + let list _ctx = Storage_interface.unimplemented __FUNCTION__ + + let stat _ctx = Storage_interface.unimplemented __FUNCTION__ + + let receive_cancel2 _ctx ~dbg ~mirror_id ~url ~verify_dest = + D.debug "%s dbg:%s mirror_id:%s url:%s verify_dest:%B" __FUNCTION__ dbg + mirror_id url verify_dest ; + let (module Remote) = + Storage_migrate_helper.get_remote_backend url verify_dest + in + let receive_state = State.find_active_receive_mirror mirror_id in + let open State.Receive_state in + Option.iter + (fun r -> + D.log_and_ignore_exn (fun () -> Remote.DP.destroy dbg r.leaf_dp false) ; + D.log_and_ignore_exn (fun () -> Remote.VDI.destroy dbg r.sr r.leaf_vdi) + ) + receive_state ; + State.remove_receive_mirror mirror_id + + let has_mirror_failed _ctx ~dbg ~mirror_id ~sr = + match State.find_active_local_mirror mirror_id with + | Some ({mirror_key= Some mk; vdi; live_vm; _} : State.Send_state.t) -> + let {failed; _} : Mirror.status = + Local.DATA.stat dbg sr vdi live_vm mk + in + failed + | _ -> + false + + (* TODO currently we make the pre_deactivate_hook for SMAPIv3 a noop while for + SMAPIv1 it will do a final check of the state of the mirror and report error + if there is a mirror failure. We leave this for SMAPIv3 because the Data.stat + call, which checks for the state of the mirror stops working once the domain + has been paused, which happens before VDI.deactivate, hence we cannot do this check in + pre_deactivate_hook. Instead we work around this by doing mirror check in mirror_wait + as we repeatedly poll the state of the mirror job. In the future we might + want to invent a different hook that can be called to do a final check just + before the VM is paused. *) + let pre_deactivate_hook _ctx ~dbg ~dp ~sr ~vdi = + D.debug "%s dbg: %s dp: %s sr: %s vdi: %s" __FUNCTION__ dbg dp (s_of_sr sr) + (s_of_vdi vdi) +end diff --git a/ocaml/xapi/storage_smapiv3_migrate.mli b/ocaml/xapi/storage_smapiv3_migrate.mli new file mode 100644 index 00000000000..d47b82cd86c --- /dev/null +++ b/ocaml/xapi/storage_smapiv3_migrate.mli @@ -0,0 +1,17 @@ +(* + * Copyright (c) Cloud Software Group + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module type SMAPIv2_MIRROR = Storage_interface.MIRROR + +module MIRROR : SMAPIv2_MIRROR diff --git a/ocaml/xapi/storage_utils.ml b/ocaml/xapi/storage_utils.ml index dd7d6b6e63d..8c2398619ff 100644 --- a/ocaml/xapi/storage_utils.ml +++ b/ocaml/xapi/storage_utils.ml @@ -14,6 +14,10 @@ open Storage_interface +let s_of_sr = Storage_interface.Sr.string_of + +let s_of_vdi = Storage_interface.Vdi.string_of + let string_of_vdi_type vdi_type = Rpc.string_of_rpc (API.rpc_of_vdi_type vdi_type) @@ -173,3 +177,24 @@ let transform_storage_exn f = (Api_errors.Server_error (Api_errors.internal_error, [Printexc.to_string e]) ) + +exception No_VDI + +let find_vdi ~__context sr vdi = + let sr = s_of_sr sr in + let vdi = s_of_vdi vdi in + let open Xapi_database.Db_filter_types in + let sr = Db.SR.get_by_uuid ~__context ~uuid:sr in + match + Db.VDI.get_records_where ~__context + ~expr: + (And + ( Eq (Field "location", Literal vdi) + , Eq (Field "SR", Literal (Ref.string_of sr)) + ) + ) + with + | x :: _ -> + x + | _ -> + raise No_VDI diff --git a/ocaml/xapi/storage_utils.mli b/ocaml/xapi/storage_utils.mli index 50e3a80e7f8..d0a98704c8b 100644 --- a/ocaml/xapi/storage_utils.mli +++ b/ocaml/xapi/storage_utils.mli @@ -64,3 +64,12 @@ val rpc : val transform_storage_exn : (unit -> 'a) -> 'a (** [transform_storage_exn f] runs [f], rethrowing any storage error as a nice XenAPI error *) + +exception No_VDI + +val find_vdi : + __context:Context.t + -> Storage_interface.sr + -> Storage_interface.vdi + -> [`VDI] Ref.t * API.vDI_t +(** Find a VDI given a storage-layer SR and VDI *) diff --git a/ocaml/xapi/vm_evacuation.ml b/ocaml/xapi/vm_evacuation.ml index 11a7560af8a..080da5a01ea 100644 --- a/ocaml/xapi/vm_evacuation.ml +++ b/ocaml/xapi/vm_evacuation.ml @@ -117,7 +117,7 @@ let ensure_no_vms ~__context ~rpc ~session_id ~evacuate_timeout = let shutdown vms = log_and_ignore_exn (fun () -> clean_shutdown vms) ; (* We can unplug the PBD if a VM is suspended or halted, but not if - * it is running or paused, i.e. "live" *) + * it is running or paused, i.e. "live" *) vms |> List.filter (fun self -> Xapi_vm_lifecycle_helpers.is_live ~__context ~self diff --git a/ocaml/xapi/vm_sysprep.ml b/ocaml/xapi/vm_sysprep.ml new file mode 100644 index 00000000000..abc9a2f2742 --- /dev/null +++ b/ocaml/xapi/vm_sysprep.ml @@ -0,0 +1,299 @@ +(* + * Copyright (c) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module D = Debug.Make (struct let name = __MODULE__ end) + +open D +open Client +open Xapi_stdext_unix + +let ( // ) = Filename.concat + +let finally = Xapi_stdext_pervasives.Pervasiveext.finally + +let genisoimage = !Xapi_globs.genisoimage_path + +type error = + | API_not_enabled + | Other of string + | VM_CDR_not_found + | VM_CDR_eject + | VM_CDR_insert + | VM_misses_feature + | VM_not_running + | VM_sysprep_timeout + | XML_too_large + +exception Sysprep of error + +let _fail_fmt fmt = Printf.ksprintf (fun msg -> raise (Sysprep (Other msg))) fmt + +let fail error = raise (Sysprep error) + +let internal_error = Helpers.internal_error + +let prng = Random.State.make_self_init () + +let call = Helpers.call_api_functions + +(* A local ISO SR; we create an ISO that holds an unattend.xml file that + is than passed as CD to a VM *) +module SR = struct + let dir = "/var/opt/iso" + + (* We create a deterministic unique name label to protect us against a + user using the same name *) + let name hostname = + let digest str = + Digest.(string str |> to_hex) |> fun hex -> String.sub hex 0 4 + in + Printf.sprintf "SYSPREP-%s-%s" hostname (digest hostname) + + let find_opt ~__context ~label = + let check sr = + match Db.SR.get_record ~__context ~self:sr with + | API.{sR_type= "iso"; _} -> + true + | _ -> + false + in + Db.SR.get_by_name_label ~__context ~label |> List.find_opt check +end + +(** This is called on xapi startup. Opportunity to set up or clean up. + We destroy all VDIs that are unused. *) +let on_startup ~__context = + let host = Helpers.get_localhost ~__context in + let hostname = Db.Host.get_hostname ~__context ~self:host in + match SR.find_opt ~__context ~label:(SR.name hostname) with + | Some sr when !Xapi_globs.vm_sysprep_enabled -> ( + Db.SR.get_VDIs ~__context ~self:sr + |> List.iter @@ fun self -> + match Db.VDI.get_record ~__context ~self with + | API.{vDI_VBDs= []; _} -> + call ~__context @@ fun rpc session_id -> + Client.VDI.destroy ~rpc ~session_id ~self + | _ -> + () + ) + | _ -> + () (* none found or not enabled *) + +(** create a name with a random infix. We need random names for + temporary directories to avoid collisions of concurrent API calls *) +let temp_name prefix suffix = + let rnd = Random.State.bits prng land 0xFFFFFF in + Printf.sprintf "%s%06x%s" prefix rnd suffix + +let temp_dir = Filename.get_temp_dir_name () + +(** [mkdtmp] creates a directory in [dir] and returns its path. If [dir] + does not yet exist it is created. It is a an error if [dir] exists + and is not a directory. *) +let mkdtemp ?(dir = temp_dir) ?(perms = 0o700) prefix suffix = + ( match Sys.file_exists dir with + | true when not (Sys.is_directory dir) -> + internal_error "s: %s is not a directory" __FUNCTION__ dir + | true -> + () + | false -> + Unixext.mkdir_rec dir perms + ) ; + let rec try_upto = function + | n when n < 0 -> + internal_error "%s: can't create directory %S" __FUNCTION__ dir + | n -> ( + let path = Filename.concat dir (temp_name prefix suffix) in + try Sys.mkdir path perms ; path with Sys_error _ -> try_upto (n - 1) + ) + in + try_upto 20 + +(** Crteate a temporary directory, and pass its path to [f]. Once [f] + returns the directory is removed again *) +let with_temp_dir ?(dir = temp_dir) ?(perms = 0o700) prefix suffix f = + let dir = mkdtemp ~dir ~perms prefix suffix in + finally (fun () -> f dir) (fun () -> Unixext.rm_rec dir) + +(** name of the ISO we will use for a VM; this is not a path *) +let iso_basename ~vm_uuid = + let now = Ptime_clock.now () |> Ptime.to_rfc3339 in + Printf.sprintf "sysprep-%s-%s.iso" vm_uuid now + +(** Create an ISO in [SR.dir] with content [unattend]. [SR.dir] is + created if it not already exists. Returns the path of the ISO image *) +let make_iso ~vm_uuid ~unattend = + try + let basename = iso_basename ~vm_uuid in + let iso = SR.dir // basename in + Xapi_stdext_unix.Unixext.mkdir_rec SR.dir 0o755 ; + with_temp_dir ~dir:"/var/tmp/xapi" "sysprep-" "-iso" (fun temp_dir -> + let path = temp_dir // "unattend.xml" in + SecretString.write_to_file path unattend ; + debug "%s: written to %s" __FUNCTION__ path ; + let args = ["-r"; "-J"; "-o"; iso; temp_dir] in + Forkhelpers.execute_command_get_output genisoimage args |> ignore ; + (iso, basename) + ) + with e -> + Backtrace.is_important e ; + let msg = Printexc.to_string e in + Helpers.internal_error "%s failed: %s" __FUNCTION__ msg + +(** create a local ISO SR when necessary and update it such that it + recognises any ISO we added or removed *) +let update_sr ~__context = + let host = Helpers.get_localhost ~__context in + let hostname = Db.Host.get_hostname ~__context ~self:host in + let label = SR.name hostname in + let mib n = Int64.(n * 1024 * 1024 |> of_int) in + let sr = + match SR.find_opt ~__context ~label with + | Some sr -> + sr + | None -> + let device_config = [("location", SR.dir); ("legacy_mode", "true")] in + call ~__context @@ fun rpc session_id -> + Client.SR.create ~rpc ~session_id ~host ~name_label:label ~device_config + ~content_type:"iso" ~_type:"iso" ~name_description:"Sysprep ISOs" + ~shared:false ~sm_config:[] ~physical_size:(mib 512) + in + call ~__context @@ fun rpc session_id -> + Client.SR.scan ~rpc ~session_id ~sr ; + sr + +(** Find the VBD for the CD drive on [vm] *) +let find_cdr_vbd ~__context ~vm = + let vbds = Db.VM.get_VBDs ~__context ~self:vm in + let vbds' = + List.map (fun self -> (self, Db.VBD.get_record ~__context ~self)) vbds + in + let is_cd (_rf, rc) = + let open API in + rc.vBD_type = `CD && rc.vBD_empty + in + let uuid = Db.VM.get_uuid ~__context ~self:vm in + match List.filter is_cd vbds' with + | [] -> + fail VM_CDR_not_found + | [(rf, rc)] -> + debug "%s: for VM %s using VBD %s" __FUNCTION__ uuid rc.API.vBD_uuid ; + rf + | (rf, rc) :: _ -> + debug "%s: for VM %s using VBD %s" __FUNCTION__ uuid rc.API.vBD_uuid ; + warn "%s: for VM %s found additions VBDs" __FUNCTION__ uuid ; + rf + +(** Find the VDI that contains the unattend.xml based on its name. This + should be unique *) +let find_vdi ~__context ~label = + match Db.VDI.get_by_name_label ~__context ~label with + | [] -> + internal_error "%s: can't find VDI for %s" __FUNCTION__ label + | [vdi] -> + vdi + | vdi :: _ -> + warn "%s: more than one VDI with label %s" __FUNCTION__ label ; + vdi + +(* Ejecting the CD/VDI/ISO may fail with a timeout *) +let eject ~rpc ~session_id ~vbd ~iso = + try + Client.VBD.eject ~rpc ~session_id ~vbd ; + Sys.remove iso + with exn -> + Sys.remove iso ; + (* still remove ISO to protect it *) + warn "%s: ejecting CD failed: %s" __FUNCTION__ (Printexc.to_string exn) ; + fail VM_CDR_eject + +(** notify the VM with [domid] to run sysprep and where to find the + file. *) +let trigger ~rpc ~session_id ~domid ~uuid ~timeout ~vbd ~iso = + let open Ezxenstore_core.Xenstore in + let control = Printf.sprintf "/local/domain/%Ld/control/sysprep" domid in + let domain = Printf.sprintf "/local/domain/%Ld" domid in + with_xs (fun xs -> + xs.Xs.write (control // "filename") "D://unattend.xml" ; + xs.Xs.write (control // "vdi-uuid") uuid ; + xs.Xs.write (control // "action") "sysprep" ; + debug "%s: notified domain %Ld" __FUNCTION__ domid ; + try + finally + (fun () -> + (* wait for sysprep to start, then domain to dissapear *) + Ezxenstore_core.Watch.( + wait_for ~xs ~timeout:5.0 + (value_to_become (control // "action") "running") + ) ; + debug "%s: sysprep is running; waiting for sysprep to finish" + __FUNCTION__ ; + Ezxenstore_core.Watch.( + wait_for ~xs ~timeout (key_to_disappear (control // "action")) + ) + ) + (fun () -> eject ~rpc ~session_id ~vbd ~iso) ; + debug "%s waiting for domain to dissapear" __FUNCTION__ ; + Ezxenstore_core.Watch.(wait_for ~xs ~timeout (key_to_disappear domain)) ; + true + with Ezxenstore_core.Watch.Timeout _ -> + debug "%s: sysprep timeout" __FUNCTION__ ; + false + ) + +(* This function is executed on the host where [vm] is running *) +let sysprep ~__context ~vm ~unattend ~timeout = + debug "%s (timeout %f)" __FUNCTION__ timeout ; + if not !Xapi_globs.vm_sysprep_enabled then + fail API_not_enabled ; + let vm_uuid = Db.VM.get_uuid ~__context ~self:vm in + let domid = Db.VM.get_domid ~__context ~self:vm in + let control = Printf.sprintf "/local/domain/%Ld/control" domid in + if domid <= 0L then + fail VM_not_running ; + if SecretString.length unattend > 32 * 1024 then + fail XML_too_large ; + Ezxenstore_core.Xenstore.with_xs (fun xs -> + let open Ezxenstore_core.Xenstore in + match xs.Xs.read (control // "feature-sysprep") with + | "1" -> + debug "%s: VM %s supports sysprep" __FUNCTION__ vm_uuid + | _ -> + debug "%s: VM %s does not support sysprep" __FUNCTION__ vm_uuid ; + fail VM_misses_feature + | exception _ -> + debug "%s: VM %s does not support sysprep" __FUNCTION__ vm_uuid ; + fail VM_misses_feature + ) ; + let iso, label = make_iso ~vm_uuid ~unattend in + debug "%s: created ISO %s" __FUNCTION__ iso ; + let _sr = update_sr ~__context in + let vbd = find_cdr_vbd ~__context ~vm in + let vdi = find_vdi ~__context ~label in + let uuid = Db.VDI.get_uuid ~__context ~self:vdi in + debug "%s: inserting Sysprep VDI for VM %s" __FUNCTION__ vm_uuid ; + call ~__context @@ fun rpc session_id -> + ( try Client.VBD.insert ~rpc ~session_id ~vdi ~vbd + with e -> + debug "%s: failed to insert CD, removing ISO %s: %s" __FUNCTION__ iso + (Printexc.to_string e) ; + Sys.remove iso ; + fail VM_CDR_insert + ) ; + Thread.delay !Xapi_globs.vm_sysprep_wait ; + match trigger ~rpc ~session_id ~domid ~uuid ~timeout ~vbd ~iso with + | true -> + () + | false -> + fail VM_sysprep_timeout diff --git a/ocaml/xapi/vm_sysprep.mli b/ocaml/xapi/vm_sysprep.mli new file mode 100644 index 00000000000..76cdfb7f621 --- /dev/null +++ b/ocaml/xapi/vm_sysprep.mli @@ -0,0 +1,40 @@ +(* + * Copyright (c) Cloud Software Group, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +(** error message that may be passed to API clients *) +type error = + | API_not_enabled + | Other of string + | VM_CDR_not_found + | VM_CDR_eject + | VM_CDR_insert + | VM_misses_feature + | VM_not_running + | VM_sysprep_timeout + | XML_too_large + +exception Sysprep of error + +val on_startup : __context:Context.t -> unit +(** clean up on toolstart start up *) + +val sysprep : + __context:Context.t + -> vm:API.ref_VM + -> unattend:SecretString.t + -> timeout:float + -> unit +(** Execute sysprep on [vm] using script [unattend]. This requires + driver support from the VM and is checked. [unattend] must + not exceed 32kb. Raised [Failure] that must be handled, *) diff --git a/ocaml/xapi/workload_balancing.ml b/ocaml/xapi/workload_balancing.ml index 27fa184da84..7108032dbf7 100644 --- a/ocaml/xapi/workload_balancing.ml +++ b/ocaml/xapi/workload_balancing.ml @@ -329,8 +329,19 @@ let wlb_request ~__context ~host ~port ~auth ~meth ~params ~handler ~enable_log with | Remote_requests.Timed_out -> raise_timeout timeout - | Http_client.Http_request_rejected _ | Http_client.Http_error _ -> - raise_authentication_failed () + | Http_client.Http_error (code, _) as e -> ( + error "%s: Caught %s when contacting WLB" __FUNCTION__ + (Printexc.to_string e) ; + match code with + | "401" | "403" -> + raise_authentication_failed () + | _ -> + raise_connection_reset () + ) + | Http_client.Http_request_rejected _ as e -> + error "%s: Caught %s when contacting WLB" __FUNCTION__ + (Printexc.to_string e) ; + raise_connection_reset () | Xmlrpc_client.Connection_reset -> raise_connection_reset () | Stunnel.Stunnel_verify_error reason -> diff --git a/ocaml/xapi/xapi.ml b/ocaml/xapi/xapi.ml index 8560f8947d5..56561d76e06 100644 --- a/ocaml/xapi/xapi.ml +++ b/ocaml/xapi/xapi.ml @@ -327,6 +327,31 @@ let server_run_in_emergency_mode () = in wait_to_die () ; exit 0 +let remove_blocked_repositories ~__context () = + try + let blocklist = !Xapi_globs.repository_url_blocklist in + let repos = Db.Repository.get_all ~__context in + let pool = Helpers.get_pool ~__context in + let is_repo_blocked repo = + let binary_url = Db.Repository.get_binary_url ~__context ~self:repo in + let source_url = Db.Repository.get_source_url ~__context ~self:repo in + Repository_helpers.url_matches ~url:binary_url blocklist + || Repository_helpers.url_matches ~url:source_url blocklist + in + let remove_repo repo = + debug "%s Removing repository %s due to it being blocked" __FUNCTION__ + (Ref.string_of repo) ; + try + Xapi_pool.remove_repository ~__context ~self:pool ~value:repo ; + Db.Repository.destroy ~__context ~self:repo + with e -> + debug "%s Failed to remove repository for %s: %s" __FUNCTION__ + (Ref.string_of repo) (Printexc.to_string e) + in + List.filter (fun x -> is_repo_blocked x) repos + |> List.iter (fun x -> remove_repo x) + with e -> error "Exception in %s: %s" __FUNCTION__ (Printexc.to_string e) + let bring_up_management_if ~__context () = try let management_if = @@ -760,9 +785,9 @@ let check_network_reset () = ) ; (* Remove trigger file *) Unix.unlink Xapi_globs.network_reset_trigger - with _ -> () - -(* TODO: catch specific exception for missing fields in reset_file and inform user *) + with e -> + D.error "%s: exception: %s (%s)" __FUNCTION__ (Printexc.to_string e) + (Printexc.get_backtrace ()) (** Make sure our license is set correctly *) let handle_licensing () = @@ -1115,6 +1140,10 @@ let server_init () = , [Startup.OnlyMaster] , Xapi_db_upgrade.hi_level_db_upgrade_rules ~__context ) + ; ( "removing blocked repositories" + , [Startup.OnlyMaster] + , remove_blocked_repositories ~__context + ) ; ( "bringing up management interface" , [] , bring_up_management_if ~__context @@ -1351,6 +1380,10 @@ let server_init () = , cache_metadata_vdis ) ; ("Stats reporting thread", [], Xapi_stats.start) + ; ( "Remove local ISO SR" + , [Startup.OnThread] + , fun () -> Vm_sysprep.on_startup ~__context + ) ] ; if !debug_dummy_data then Startup.run ~__context diff --git a/ocaml/xapi/xapi_bond.ml b/ocaml/xapi/xapi_bond.ml index 72d762ff193..f0265bd50a4 100644 --- a/ocaml/xapi/xapi_bond.ml +++ b/ocaml/xapi/xapi_bond.ml @@ -427,9 +427,9 @@ let create ~__context ~network ~members ~mAC ~mode ~properties = in let disallow_unplug = (* this is always true if one of the PIFs is a cluster_host.PIF *) - List.fold_left - (fun a m -> Db.PIF.get_disallow_unplug ~__context ~self:m || a) - false members + List.exists + (fun m -> Db.PIF.get_disallow_unplug ~__context ~self:m) + members in (* Validate constraints: *) (* 1. Members must not be in a bond already *) diff --git a/ocaml/xapi/xapi_cluster.ml b/ocaml/xapi/xapi_cluster.ml index 1968e5f0774..33f9573b1e1 100644 --- a/ocaml/xapi/xapi_cluster.ml +++ b/ocaml/xapi/xapi_cluster.ml @@ -96,7 +96,7 @@ let create ~__context ~pIF ~cluster_stack ~pool_auto_join ~token_timeout ~pending_forget:[] ~pool_auto_join ~token_timeout ~token_timeout_coefficient ~current_operations:[] ~allowed_operations:[] ~cluster_config:[] ~other_config:[] - ~is_quorate:false ~quorum:0L ~live_hosts:0L ; + ~is_quorate:false ~quorum:0L ~live_hosts:0L ~expected_hosts:0L ; Db.Cluster_host.create ~__context ~ref:cluster_host_ref ~uuid:cluster_host_uuid ~cluster:cluster_ref ~host ~enabled:true ~pIF ~current_operations:[] ~allowed_operations:[] ~other_config:[] diff --git a/ocaml/xapi/xapi_cluster_helpers.ml b/ocaml/xapi/xapi_cluster_helpers.ml index a4d30bcedaa..1afdefb2864 100644 --- a/ocaml/xapi/xapi_cluster_helpers.ml +++ b/ocaml/xapi/xapi_cluster_helpers.ml @@ -24,19 +24,23 @@ let is_allowed_concurrently ~op:_ ~current_ops:_ = false let report_concurrent_operations_error ~current_ops ~ref_str = - let current_ops_str = + let current_ops_ref_str, current_ops_str = let op_to_str = Record_util.cluster_operation_to_string in + let ( >> ) f g x = g (f x) in match current_ops with | [] -> failwith "No concurrent operation to report" - | [(_, cop)] -> - op_to_str cop + | [(op_ref, cop)] -> + (op_ref, op_to_str cop) | l -> - "{" ^ String.concat "," (List.map op_to_str (List.map snd l)) ^ "}" + ( Printf.sprintf "{%s}" (String.concat "," (List.map fst l)) + , Printf.sprintf "{%s}" + (String.concat "," (List.map (snd >> op_to_str) l)) + ) in Some ( Api_errors.other_operation_in_progress - , ["Cluster." ^ current_ops_str; ref_str] + , ["Cluster"; ref_str; current_ops_str; current_ops_ref_str] ) (** Take an internal Cluster record and a proposed operation. Return None iff the operation diff --git a/ocaml/xapi/xapi_cluster_host_helpers.ml b/ocaml/xapi/xapi_cluster_host_helpers.ml index 59e5141da73..abdaa58c285 100644 --- a/ocaml/xapi/xapi_cluster_host_helpers.ml +++ b/ocaml/xapi/xapi_cluster_host_helpers.ml @@ -22,19 +22,23 @@ let is_allowed_concurrently ~op:_ ~current_ops:_ = false let report_concurrent_operations_error ~current_ops ~ref_str = - let current_ops_str = + let current_ops_ref_str, current_ops_str = let op_to_str = Record_util.cluster_host_operation_to_string in + let ( >> ) f g x = g (f x) in match current_ops with | [] -> failwith "No concurrent operation to report" - | [(_, cop)] -> - op_to_str cop + | [(op_ref, cop)] -> + (op_ref, op_to_str cop) | l -> - "{" ^ String.concat "," (List.map op_to_str (List.map snd l)) ^ "}" + ( Printf.sprintf "{%s}" (String.concat "," (List.map fst l)) + , Printf.sprintf "{%s}" + (String.concat "," (List.map (snd >> op_to_str) l)) + ) in Some ( Api_errors.other_operation_in_progress - , ["Cluster_host." ^ current_ops_str; ref_str] + , ["Cluster_host"; ref_str; current_ops_str; current_ops_ref_str] ) (** Take an internal Cluster_host record and a proposed operation. Return None iff the operation diff --git a/ocaml/xapi/xapi_clustering.ml b/ocaml/xapi/xapi_clustering.ml index efaac876d69..2553bc40202 100644 --- a/ocaml/xapi/xapi_clustering.ml +++ b/ocaml/xapi/xapi_clustering.ml @@ -338,7 +338,7 @@ let assert_cluster_host_quorate ~__context ~self = * achieved quorum yet if we have just booted and haven't seen enough hosts. * Do this via an API call rather than reading a field in the database, because the field in the * database could be out of date. - * *) + *) let result = Cluster_client.LocalClient.diagnostics (rpc ~__context) "assert_cluster_host_quorate" @@ -516,6 +516,8 @@ module Watcher = struct Db.Cluster.set_quorum ~__context ~self:cluster ~value:(Int64.of_int diag.quorum) ; Db.Cluster.set_live_hosts ~__context ~self:cluster + ~value:(Int64.of_int diag.total_votes) ; + Db.Cluster.set_expected_hosts ~__context ~self:cluster ~value:(Int64.of_int diag.total_votes) | Error (InternalError message) | Error (Unix_error message) -> warn "%s Cannot query diagnostics due to %s, not performing update" diff --git a/ocaml/xapi/xapi_db_upgrade.ml b/ocaml/xapi/xapi_db_upgrade.ml index f4102782916..e1f0eba63fc 100644 --- a/ocaml/xapi/xapi_db_upgrade.ml +++ b/ocaml/xapi/xapi_db_upgrade.ml @@ -930,7 +930,6 @@ let upgrade_ca_fingerprints = try let* certificate = Xapi_stdext_unix.Unixext.string_of_file filename - |> Cstruct.of_string |> X509.Certificate.decode_pem in let sha1 = diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index a7412790019..cd38814d7e2 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -525,6 +525,59 @@ let rec next ~__context = else rpc_of_events relevant +type time = Xapi_database.Db_cache_types.Time.t + +type entry = {table: string; obj: string; time: time} + +type acc = { + creates: entry list + ; mods: entry list + ; deletes: entry list + ; last: time +} + +let collect_events (subs, tables, last_generation) acc table = + let open Xapi_database in + let open Db_cache_types in + let table_value = TableSet.find table tables in + let prepend_recent obj stat _ ({creates; mods; last; _} as entries) = + let Stat.{created; modified; deleted} = stat in + if Subscription.object_matches subs table obj then + let last = max last (max modified deleted) in + let creates = + if created > last_generation then + {table; obj; time= created} :: creates + else + creates + in + let mods = + if modified > last_generation && not (created > last_generation) then + {table; obj; time= modified} :: mods + else + mods + in + {entries with creates; mods; last} + else + entries + in + let prepend_deleted obj stat ({deletes; last; _} as entries) = + let Stat.{created; modified; deleted} = stat in + if Subscription.object_matches subs table obj then + let last = max last (max modified deleted) in + let deletes = + if created <= last_generation then + {table; obj; time= deleted} :: deletes + else + deletes + in + {entries with deletes; last} + else + entries + in + acc + |> Table.fold_over_recent last_generation prepend_recent table_value + |> Table.fold_over_deleted last_generation prepend_deleted table_value + let from_inner __context session subs from from_t timer batching = let open Xapi_database in let open From in @@ -541,9 +594,8 @@ let from_inner __context session subs from from_t timer batching = in List.filter (fun table -> Subscription.table_matches subs table) all in - let last_generation = ref from in let last_msg_gen = ref from_t in - let grab_range t = + let grab_range ~since t = let tableset = Db_cache_types.Database.tableset (Db_ref.get_database t) in let msg_gen, messages = if Subscription.table_matches subs "message" then @@ -551,75 +603,23 @@ let from_inner __context session subs from from_t timer batching = else (0L, []) in - ( msg_gen - , messages - , tableset - , List.fold_left - (fun acc table -> - (* Fold over the live objects *) - let acc = - Db_cache_types.Table.fold_over_recent !last_generation - (fun objref {Db_cache_types.Stat.created; modified; deleted} _ - (creates, mods, deletes, last) -> - if Subscription.object_matches subs table objref then - let last = max last (max modified deleted) in - (* mtime guaranteed to always be larger than ctime *) - ( ( if created > !last_generation then - (table, objref, created) :: creates - else - creates - ) - , ( if - modified > !last_generation - && not (created > !last_generation) - then - (table, objref, modified) :: mods - else - mods - ) - , (* Only have a mod event if we don't have a created event *) - deletes - , last - ) - else - (creates, mods, deletes, last) - ) - (Db_cache_types.TableSet.find table tableset) - acc - in - (* Fold over the deleted objects *) - Db_cache_types.Table.fold_over_deleted !last_generation - (fun objref {Db_cache_types.Stat.created; modified; deleted} - (creates, mods, deletes, last) -> - if Subscription.object_matches subs table objref then - let last = max last (max modified deleted) in - (* mtime guaranteed to always be larger than ctime *) - if created > !last_generation then - (creates, mods, deletes, last) - (* It was created and destroyed since the last update *) - else - (creates, mods, (table, objref, deleted) :: deletes, last) - (* It might have been modified, but we can't tell now *) - else - (creates, mods, deletes, last) - ) - (Db_cache_types.TableSet.find table tableset) - acc - ) - ([], [], [], !last_generation) - tables - ) + let events = + let initial = {creates= []; mods= []; deletes= []; last= since} in + let folder = collect_events (subs, tableset, since) in + List.fold_left folder initial tables + in + (msg_gen, messages, tableset, events) in (* Each event.from should have an independent subscription record *) - let msg_gen, messages, tableset, (creates, mods, deletes, last) = + let msg_gen, messages, tableset, events = with_call session subs (fun sub -> let grab_nonempty_range = - Throttle.Batching.with_recursive_loop batching @@ fun self arg -> - let ( (msg_gen, messages, _tableset, (creates, mods, deletes, last)) - as result - ) = - Db_lock.with_lock (fun () -> grab_range (Db_backend.make ())) + Throttle.Batching.with_recursive_loop batching @@ fun self since -> + let result = + Db_lock.with_lock (fun () -> grab_range ~since (Db_backend.make ())) in + let msg_gen, messages, _tables, events = result in + let {creates; mods; deletes; last} = events in if creates = [] && mods = [] @@ -627,73 +627,85 @@ let from_inner __context session subs from from_t timer batching = && messages = [] && not (Clock.Timer.has_expired timer) then ( - last_generation := last ; - (* Cur_id was bumped, but nothing relevent fell out of the db. Therefore the *) + (* cur_id was bumped, but nothing relevent fell out of the database. + Therefore the last ID the client got is equivalent to the current one. *) sub.cur_id <- last ; - (* last id the client got is equivalent to the current one *) last_msg_gen := msg_gen ; wait2 sub last timer ; - (self [@tailcall]) arg + (* The next iteration will fold over events starting after + the last database event that matched a subscription. *) + let next = last in + (self [@tailcall]) next ) else result in - grab_nonempty_range () + grab_nonempty_range from ) in - last_generation := last ; - let event_of op ?snapshot (table, objref, time) = + let {creates; mods; deletes; last} = events in + let event_of op ?snapshot {table; obj; time} = { id= Int64.to_string time ; ts= "0.0" ; ty= String.lowercase_ascii table ; op - ; reference= objref + ; reference= obj ; snapshot } in - let events = - List.fold_left - (fun acc x -> - let ev = event_of `del x in - if Subscription.event_matches subs ev then ev :: acc else acc - ) - [] deletes - in - let events = - List.fold_left - (fun acc (table, objref, mtime) -> - let serialiser = Eventgen.find_get_record table in - try - let xml = serialiser ~__context ~self:objref () in - let ev = event_of `_mod ?snapshot:xml (table, objref, mtime) in - if Subscription.event_matches subs ev then ev :: acc else acc - with _ -> acc - ) - events mods + let events_of ~kind ?(with_snapshot = true) entries acc = + let rec go events ({table; obj; time= _} as entry) = + try + let snapshot = + let serialiser = Eventgen.find_get_record table in + if with_snapshot then + serialiser ~__context ~self:obj () + else + None + in + let event = event_of kind ?snapshot entry in + if Subscription.event_matches subs event then + event :: events + else + events + with _ -> + (* CA-91931: An exception may be raised here if an object's + lifetime is too short. + + The problem is that "collect_events" and "events_of" work + on different versions of the database, so some `add and + `mod events can be lost if the corresponding object is + deleted before a snapshot is taken. + + In practice, this has only been seen with the "task" + object - which can be rapidly created and destroyed using + helper functions. + + These exceptions have been suppressed since [bc0cc5a9]. *) + events + in + List.fold_left go acc entries in let events = - List.fold_left - (fun acc (table, objref, ctime) -> - let serialiser = Eventgen.find_get_record table in - try - let xml = serialiser ~__context ~self:objref () in - let ev = event_of `add ?snapshot:xml (table, objref, ctime) in - if Subscription.event_matches subs ev then ev :: acc else acc - with _ -> acc - ) - events creates + [] (* Accumulate the events for objects stored in the database. *) + |> events_of ~kind:`del ~with_snapshot:false deletes + |> events_of ~kind:`_mod mods + |> events_of ~kind:`add creates in let events = + (* Messages require a special casing as their contents are not + stored in the database. *) List.fold_left (fun acc mev -> let event = + let table = "message" in match mev with | Message.Create (_ref, message) -> event_of `add ?snapshot:(Some (API.rpc_of_message_t message)) - ("message", Ref.string_of _ref, 0L) + {table; obj= Ref.string_of _ref; time= 0L} | Message.Del _ref -> - event_of `del ("message", Ref.string_of _ref, 0L) + event_of `del {table; obj= Ref.string_of _ref; time= 0L} in event :: acc ) diff --git a/ocaml/xapi/xapi_extensions.ml b/ocaml/xapi/xapi_extensions.ml index 301a0a5e686..e19dc88933d 100644 --- a/ocaml/xapi/xapi_extensions.ml +++ b/ocaml/xapi/xapi_extensions.ml @@ -79,6 +79,7 @@ let call_extension rpc = | Api_errors.Server_error (code, params) -> API.response_of_failure code params | e -> + Backtrace.is_important e ; error "Unexpected exception calling extension %s: %s" rpc.Rpc.name (Printexc.to_string e) ; Debug.log_backtrace e (Backtrace.get e) ; diff --git a/ocaml/xapi/xapi_fuse.ml b/ocaml/xapi/xapi_fuse.ml index 48d0737a613..8c2b5b56d3d 100644 --- a/ocaml/xapi/xapi_fuse.ml +++ b/ocaml/xapi/xapi_fuse.ml @@ -52,6 +52,8 @@ let light_fuse_and_run ?(fuse_length = !Constants.fuse_time) () = in let new_fuse_length = max 5. (fuse_length -. delay_so_far) in debug "light_fuse_and_run: current RRDs have been saved" ; + ignore + (Thread.create Tracing_export.(flush_and_exit ~max_wait:new_fuse_length) ()) ; ignore (Thread.create (fun () -> diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 89665a13494..848203dcf99 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -151,6 +151,10 @@ let _dbv = "dbv" let _db_schema = "db_schema" +let _xapi_build_version = "xapi_build" + +let _xen_version = "xen" + (* When comparing two host versions, always treat a host that has platform_version defined as newer * than any host that does not have platform_version defined. * Substituting this default when a host does not have platform_version defined will be acceptable, @@ -368,6 +372,8 @@ let sync_bios_strings = "sync_bios_strings" let sync_chipset_info = "sync_chipset_info" +let sync_ssh_status = "sync_ssh_status" + let sync_pci_devices = "sync_pci_devices" let sync_gpus = "sync_gpus" @@ -434,6 +440,10 @@ let xapi_clusterd_port = ref 8896 *) let local_yum_repo_port = ref 8000 +(* The maximum number of start attempts for HA best-effort VMs. Each attempt is + spaced 20 seconds apart. *) +let ha_best_effort_max_retries = ref 2 + (* When a host is known to be shutting down or rebooting, we add it's reference in here. This can be used to force the Host_metrics.live flag to false. *) let hosts_which_are_shutting_down : API.ref_host list ref = ref [] @@ -500,6 +510,16 @@ let rpu_allowed_vm_operations = ; `update_allowed_operations ] +module Vdi_operations = struct + type t = API.vdi_operations + + (* this is more efficient than just 'let compare = Stdlib.compare', + because the compiler can specialize it to [t] without calling any runtime functions *) + let compare (a : t) (b : t) = Stdlib.compare a b +end + +module Vdi_operations_set = Set.Make (Vdi_operations) + (* Until the Ely release, the vdi_operations enum had stayed unchanged * since 2009 or earlier, but then Ely and some subsequent releases * added new members to the enum. *) @@ -517,6 +537,7 @@ let pre_ely_vdi_operations = ; `generate_config ; `blocked ] + |> Vdi_operations_set.of_list (* We might consider restricting this further. *) let rpu_allowed_vdi_operations = pre_ely_vdi_operations @@ -618,27 +639,11 @@ let auth_type_PAM = "PAM" let event_hook_auth_on_xapi_initialize_succeeded = ref false -(** {2 CPUID feature masking} *) - -let cpu_info_vendor_key = "vendor" - -let cpu_info_features_key = "features" - -let cpu_info_features_pv_key = "features_pv" - -let cpu_info_features_hvm_key = "features_hvm" - -let cpu_info_features_pv_host_key = "features_pv_host" - -let cpu_info_features_hvm_host_key = "features_hvm_host" - (** Metrics *) let metrics_root = "/dev/shm/metrics" -let metrics_prefix_mem_host = "xcp-rrdd-mem_host" - -let metrics_prefix_mem_vms = "xcp-rrdd-mem_vms" +let metrics_prefix_mem = "xcp-rrdd-squeezed" let metrics_prefix_pvs_proxy = "pvsproxy-" @@ -744,7 +749,7 @@ let ha_default_timeout_base = ref 60. let guest_liveness_timeout = ref 300. (** The default time, in µs, in which tapdisk3 will keep polling the vbd ring buffer in expectation for extra requests from the guest *) -let default_vbd3_polling_duration = ref 1000 +let default_vbd3_polling_duration = ref 8000 (** The default % of idle dom0 cpu above which tapdisk3 will keep polling the vbd ring buffer *) let default_vbd3_polling_idle_threshold = ref 50 @@ -806,6 +811,10 @@ let sparse_dd = ref "sparse_dd" let vhd_tool = ref "vhd-tool" +let qcow_to_stdout = ref "/opt/xensource/libexec/qcow2-to-stdout.py" + +let qcow_stream_tool = ref "qcow-stream-tool" + let fence = ref "fence" let host_bugreport_upload = ref "host-bugreport-upload" @@ -921,6 +930,13 @@ let gen_pool_secret_script = ref "/usr/bin/pool_secret_wrapper" let repository_domain_name_allowlist = ref [] +(* + This blocklist aims to prevent the creation of any repository whose URL matches an entry in the blocklist. + Additionally, if an existing repository contains a URL that matches an entry in the blocklist, + it should be removed automatically after xapi is restarted. +*) +let repository_url_blocklist = ref [] + let yum_cmd = ref "/usr/bin/yum" let dnf_cmd = ref "/usr/bin/dnf" @@ -999,11 +1015,15 @@ let winbind_cache_time = ref 60 let winbind_machine_pwd_timeout = ref (2. *. 7. *. 24. *. 3600.) +let winbind_dns_sync_interval = ref 3600. + let winbind_update_closest_kdc_interval = ref (3600. *. 22.) (* every 22 hours *) let winbind_kerberos_encryption_type = ref Kerberos_encryption_types.Winbind.All +let winbind_set_machine_account_kerberos_encryption_type = ref false + let winbind_allow_kerberos_auth_fallback = ref false let winbind_keep_configuration = ref false @@ -1039,10 +1059,14 @@ let trace_log_dir = ref "/var/log/dt/zipkinv2/json" let export_interval = ref 30. +let export_chunk_size = ref 10000 + let max_spans = ref 10000 let max_traces = ref 10000 +let max_span_depth = ref 100 + let use_xmlrpc = ref true let compress_tracing_files = ref true @@ -1081,11 +1105,18 @@ let reuse_pool_sessions = ref false let validate_reusable_pool_session = ref false (* Validate a reusable session before each use. This is slower and should not be required *) +let vm_sysprep_enabled = ref false +(* enable VM.sysprep API *) + +let vm_sysprep_wait = ref 5.0 (* seconds *) + let test_open = ref 0 let xapi_requests_cgroup = "/sys/fs/cgroup/cpu/control.slice/xapi.service/request" +let genisoimage_path = ref "/usr/bin/genisoimage" + (* Event.{from,next} batching delays *) let make_batching name ~delay_before ~delay_between = let name = Printf.sprintf "%s_delay" name in @@ -1199,6 +1230,7 @@ let xapi_globs_spec = ; ("winbind_debug_level", Int winbind_debug_level) ; ("winbind_cache_time", Int winbind_cache_time) ; ("winbind_machine_pwd_timeout", Float winbind_machine_pwd_timeout) + ; ("winbind_dns_sync_interval", Float winbind_dns_sync_interval) ; ( "winbind_update_closest_kdc_interval" , Float winbind_update_closest_kdc_interval ) @@ -1218,6 +1250,7 @@ let xapi_globs_spec = ; ("max_observer_file_size", Int max_observer_file_size) ; ("test-open", Int test_open) (* for consistency with xenopsd *) ; ("local_yum_repo_port", Int local_yum_repo_port) + ; ("ha_best_effort_max_retries", Int ha_best_effort_max_retries) ] let xapi_globs_spec_with_descriptions = @@ -1287,6 +1320,16 @@ let gpumon_stop_timeout = ref 10.0 let reboot_required_hfxs = ref "/run/reboot-required.hfxs" +let console_timeout_profile_path = ref "/etc/profile.d/console_timeout.sh" + +let job_for_disable_ssh = ref "Disable SSH" + +let ssh_service = ref "sshd" + +let ssh_monitor_service = ref "xapi-ssh-monitor" + +let ssh_auto_mode_default = ref true + (* Fingerprint of default patch key *) let citrix_patch_key = "NERDNTUzMDMwRUMwNDFFNDI4N0M4OEVCRUFEMzlGOTJEOEE5REUyNg==" @@ -1310,18 +1353,14 @@ let gen_list_option name desc of_string string_of opt = let sm_plugins = ref [] let accept_sm_plugin name = - List.( - fold_left ( || ) false - (map - (function - | `All -> - true - | `Sm x -> - String.lowercase_ascii x = String.lowercase_ascii name - ) - !sm_plugins + List.exists + (function + | `All -> + true + | `Sm x -> + String.lowercase_ascii x = String.lowercase_ascii name ) - ) + !sm_plugins let nvidia_multi_vgpu_enabled_driver_versions = ref ["430.42"; "430.62"; "440.00+"] @@ -1546,6 +1585,14 @@ let other_options = , "Encryption types to use when operating as Kerberos client \ [strong|legacy|all]" ) + ; ( "winbind_set_machine_account_kerberos_encryption_type" + , Arg.Set winbind_set_machine_account_kerberos_encryption_type + , (fun () -> + string_of_bool !winbind_set_machine_account_kerberos_encryption_type + ) + , "Whether set machine account encryption type \ + (msDS-SupportedEncryptionTypes) on domain controller" + ) ; ( "winbind_allow_kerberos_auth_fallback" , Arg.Set winbind_allow_kerberos_auth_fallback , (fun () -> string_of_bool !winbind_allow_kerberos_auth_fallback) @@ -1578,6 +1625,11 @@ let other_options = (fun s -> s) (fun s -> s) repository_domain_name_allowlist + ; gen_list_option "repository-url-blocklist" + "space-separated list of blocked URL patterns in base URL in repository." + (fun s -> s) + (fun s -> s) + repository_url_blocklist ; ( "repository-gpgcheck" , Arg.Set repository_gpgcheck , (fun () -> string_of_bool !repository_gpgcheck) @@ -1632,6 +1684,11 @@ let other_options = , (fun () -> string_of_float !export_interval) , "The interval for exports in Tracing" ) + ; ( "export-chunk-size" + , Arg.Set_int export_chunk_size + , (fun () -> string_of_int !export_chunk_size) + , "The span chunk size for exports in Tracing" + ) ; ( "max-spans" , Arg.Set_int max_spans , (fun () -> string_of_int !max_spans) @@ -1723,6 +1780,27 @@ let other_options = , (fun () -> string_of_bool !validate_reusable_pool_session) , "Enable validation of reusable pool sessions before use" ) + ; ( "ssh-auto-mode" + , Arg.Bool (fun b -> ssh_auto_mode_default := b) + , (fun () -> string_of_bool !ssh_auto_mode_default) + , "Defaults to true; overridden to false via \ + /etc/xapi.conf.d/ssh-auto-mode.conf(e.g., in XenServer 8)" + ) + ; ( "vm-sysprep-enabled" + , Arg.Set vm_sysprep_enabled + , (fun () -> string_of_bool !vm_sysprep_enabled) + , "Enable VM.sysprep API" + ) + ; ( "vm-sysprep-wait" + , Arg.Set_float vm_sysprep_wait + , (fun () -> string_of_float !vm_sysprep_wait) + , "Time in seconds to wait for VM to recognise inserted CD" + ) + ; ( "max-span-depth" + , Arg.Set_int max_span_depth + , (fun () -> string_of_int !max_span_depth) + , "The maximum depth to which spans are recorded in a trace in Tracing" + ) ] (* The options can be set with the variable xapiflags in /etc/sysconfig/xapi. @@ -1769,6 +1847,8 @@ module Resources = struct ) ; ("sparse_dd", sparse_dd, "Path to sparse_dd") ; ("vhd-tool", vhd_tool, "Path to vhd-tool") + ; ("qcow_to_stdout", qcow_to_stdout, "Path to qcow-to-stdout script") + ; ("qcow_stream_tool", qcow_stream_tool, "Path to qcow-stream-tool") ; ("fence", fence, "Path to fence binary, used for HA host fencing") ; ( "host-bugreport-upload" , host_bugreport_upload @@ -1914,6 +1994,7 @@ module Resources = struct , pvsproxy_close_cache_vdi , "Path to close-cache-vdi.sh" ) + ; ("genisoimage", genisoimage_path, "Path to genisoimage") ] let essential_files = diff --git a/ocaml/xapi/xapi_guest_agent.ml b/ocaml/xapi/xapi_guest_agent.ml index 5e70c1556cc..0de1ab163aa 100644 --- a/ocaml/xapi/xapi_guest_agent.ml +++ b/ocaml/xapi/xapi_guest_agent.ml @@ -33,12 +33,6 @@ end) NB each key is annotated with whether it appears in windows and/or linux *) let pv_drivers_version = [ - ("drivers/xenevtchn", "xenevtchn") - ; (* windows *) - ("drivers/xenvbd", "xenvbd") - ; (* windows *) - ("drivers/xennet", "xennet") - ; (* windows *) ("attr/PVAddons/MajorVersion", "major") ; (* linux + windows *) ("attr/PVAddons/MinorVersion", "minor") @@ -100,7 +94,7 @@ let ( // ) = Filename.concat * * Add support for SR-IOV VF, so there are two kinds of vif_type, either to be * `vif` or `net-sriov-vf` - * *) + *) let networks path vif_type (list : string -> string list) = (* Find all ipv6 addresses under a path. *) let find_ipv6 path prefix = @@ -224,6 +218,7 @@ type guest_metrics_t = { ; other: m ; memory: m ; device_id: m + ; services: m ; last_updated: float ; can_use_hotplug_vbd: API.tristate_type ; can_use_hotplug_vif: API.tristate_type @@ -235,6 +230,29 @@ let dead_domains : IntSet.t ref = ref IntSet.empty let mutex = Mutex.create () +(* Parse data/service which has the following structure: + data/service// = + data/service// = + ... + data/service// = + Read and convert to [(/, )] pair list. + The list is intended to store in VM_guest_metrics.services at last *) +let get_guest_services (lookup : string -> string option) + (list : string -> string list) = + let base_path = "data/service" in + let services = list base_path in + services + |> List.concat_map (fun service -> + let sub_path = base_path // service in + list sub_path + |> List.map (fun key -> + let full_path_key = sub_path // key in + let db_key = service // key in + let value = lookup full_path_key in + (db_key, Option.value ~default:"" value) + ) + ) + (* In the following functions, 'lookup' reads a key from xenstore and 'list' reads a directory from xenstore. Both are relative to the guest's domainpath. *) let get_initial_guest_metrics (lookup : string -> string option) @@ -270,7 +288,45 @@ let get_initial_guest_metrics (lookup : string -> string option) | None -> [] in - let pv_drivers_version = to_map pv_drivers_version + (* enumerate all driver versions from xenstore, which are stored like + drivers/0 = "XenServer XENBUS 9.1.9.105 " + drivers/1 = "XenServer XENVBD 9.1.8.79 " + drivers/2 = "XenServer XENVIF 9.1.12.101 " + drivers/3 = "XenServer XENIFACE 9.1.10.87 " + drivers/4 = "XenServer XENNET 9.1.7.65 " + + (see the format specified in xenstore-paths) + *) + let get_windows_driver_versions () = + (* Only look into directories that are numbers (indices) *) + let filter_dirs subdirs = + List.filter_map + (fun x -> + match int_of_string_opt x with + | Some _ -> + Some ("drivers/" ^ x, x) + | None -> + None + ) + subdirs + in + let versions = list "drivers" |> filter_dirs |> to_map in + List.filter_map + (fun (_, version_string) -> + try + Scanf.sscanf version_string "%s@ %s@ %s@ %s@\n" + (fun vendor driver_name version attr -> + Some + ( String.lowercase_ascii driver_name + , String.concat " " [vendor; version; attr] + ) + ) + with _ -> None + ) + versions + in + let pv_drivers_version = + to_map pv_drivers_version @ get_windows_driver_versions () and os_version = to_map os_version and netbios_name = match to_map dns_domain with @@ -289,6 +345,7 @@ let get_initial_guest_metrics (lookup : string -> string option) ; networks "xenserver/attr" "net-sriov-vf" list ] ) + and services = get_guest_services lookup list and other = List.append (to_map (other all_control)) ts and memory = to_map memory and last_updated = Unix.gettimeofday () in @@ -310,6 +367,7 @@ let get_initial_guest_metrics (lookup : string -> string option) ; other ; memory ; device_id + ; services ; last_updated ; can_use_hotplug_vbd ; can_use_hotplug_vif @@ -326,7 +384,8 @@ let create_and_set_guest_metrics (lookup : string -> string option) ~os_version:initial_gm.os_version ~netbios_name:initial_gm.netbios_name ~pV_drivers_version:initial_gm.pv_drivers_version ~pV_drivers_up_to_date:pV_drivers_detected ~memory:[] ~disks:[] - ~networks:initial_gm.networks ~pV_drivers_detected ~other:initial_gm.other + ~networks:initial_gm.networks ~services:initial_gm.services + ~pV_drivers_detected ~other:initial_gm.other ~last_updated:(Date.of_unix_time initial_gm.last_updated) ~other_config:[] ~live:true ~can_use_hotplug_vbd:initial_gm.can_use_hotplug_vbd @@ -356,6 +415,7 @@ let all (lookup : string -> string option) (list : string -> string list) ; other ; memory ; device_id + ; services ; last_updated ; can_use_hotplug_vbd ; can_use_hotplug_vif @@ -390,6 +450,7 @@ let all (lookup : string -> string option) (list : string -> string list) ; other= [] ; memory= [] ; device_id= [] + ; services= [] ; last_updated= 0.0 ; can_use_hotplug_vbd= `unspecified ; can_use_hotplug_vif= `unspecified @@ -407,6 +468,7 @@ let all (lookup : string -> string option) (list : string -> string list) ; other ; memory ; device_id + ; services ; last_updated ; can_use_hotplug_vbd ; can_use_hotplug_vif @@ -420,12 +482,13 @@ let all (lookup : string -> string option) (list : string -> string list) || guest_metrics_cached.networks <> networks || guest_metrics_cached.other <> other || guest_metrics_cached.device_id <> device_id + || guest_metrics_cached.services <> services ) || guest_metrics_cached.can_use_hotplug_vbd <> can_use_hotplug_vbd || guest_metrics_cached.can_use_hotplug_vif <> can_use_hotplug_vif (* Nb. we're ignoring the memory updates as far as the VM_guest_metrics API object is concerned. We are putting them into an RRD instead *) (* || - guest_metrics_cached.memory <> memory)*) + guest_metrics_cached.memory <> memory)*) then ( let gm = let existing = Db.VM.get_guest_metrics ~__context ~self in @@ -452,6 +515,8 @@ let all (lookup : string -> string option) (list : string -> string list) ~value:netbios_name ; if guest_metrics_cached.networks <> networks then Db.VM_guest_metrics.set_networks ~__context ~self:gm ~value:networks ; + if guest_metrics_cached.services <> services then + Db.VM_guest_metrics.set_services ~__context ~self:gm ~value:services ; if guest_metrics_cached.other <> other then ( Db.VM_guest_metrics.set_other ~__context ~self:gm ~value:other ; Helpers.call_api_functions ~__context (fun rpc session_id -> diff --git a/ocaml/xapi/xapi_ha.ml b/ocaml/xapi/xapi_ha.ml index b452aaa8221..fda471b1868 100644 --- a/ocaml/xapi/xapi_ha.ml +++ b/ocaml/xapi/xapi_ha.ml @@ -508,6 +508,14 @@ module Monitor = struct let liveset_uuids = List.sort compare (uuids_of_liveset liveset) in + let to_refs uuids = + List.map + (fun uuid -> + Db.Host.get_by_uuid ~__context ~uuid:(Uuidx.to_string uuid) + ) + uuids + in + let last_live_set = to_refs !last_liveset_uuids in if !last_liveset_uuids <> liveset_uuids then ( warn "Liveset looks different; assuming we need to rerun the \ @@ -515,19 +523,13 @@ module Monitor = struct plan_out_of_date := true ; last_liveset_uuids := liveset_uuids ) ; - let liveset_refs = - List.map - (fun uuid -> - Db.Host.get_by_uuid ~__context ~uuid:(Uuidx.to_string uuid) - ) - liveset_uuids - in + let live_set = to_refs liveset_uuids in if local_failover_decisions_are_ok () then ( try Xapi_ha_vm_failover.restart_auto_run_vms ~__context - liveset_refs to_tolerate + ~last_live_set ~live_set to_tolerate with e -> - log_backtrace () ; + log_backtrace e ; error "Caught unexpected exception when executing restart plan: \ %s" @@ -539,9 +541,7 @@ module Monitor = struct (* Next update the Host_metrics.live value to spot hosts coming back *) let all_hosts = Db.Host.get_all ~__context in let livemap = - List.map - (fun host -> (host, List.mem host liveset_refs)) - all_hosts + List.map (fun host -> (host, List.mem host live_set)) all_hosts in List.iter (fun (host, live) -> @@ -704,8 +704,7 @@ module Monitor = struct in if plan_too_old || !plan_out_of_date then ( let changed = - Xapi_ha_vm_failover.update_pool_status ~__context - ~live_set:liveset_refs () + Xapi_ha_vm_failover.update_pool_status ~__context ~live_set () in (* Extremely bad: something managed to break our careful plan *) if changed && not !plan_out_of_date then @@ -832,7 +831,7 @@ module Monitor = struct ) ) with e -> - log_backtrace () ; + log_backtrace e ; debug "Exception in HA monitor thread: %s" (ExnHelper.string_of_exn e) ; Thread.delay !Xapi_globs.ha_monitor_interval @@ -1745,6 +1744,11 @@ let disable_internal __context = ) errors ) ; + (* CA-408230: mark current operation, `ha_enable or `ha_disable, as done, + as otherwise it will fail to update_allowed_operations for metadata_vdis + and statefile_vdis *) + let task_id = Ref.string_of (Context.get_task_id __context) in + Db.Pool.remove_from_current_operations ~__context ~self:pool ~key:task_id ; (* Update the allowed operations on the statefile VDIs for tidiness *) List.iter (fun vdi -> Xapi_vdi.update_allowed_operations ~__context ~self:vdi) diff --git a/ocaml/xapi/xapi_ha_vm_failover.ml b/ocaml/xapi/xapi_ha_vm_failover.ml index 5cbb946b150..998088d8f59 100644 --- a/ocaml/xapi/xapi_ha_vm_failover.ml +++ b/ocaml/xapi/xapi_ha_vm_failover.ml @@ -112,7 +112,7 @@ end = struct k x | Task (task, next) -> (* similar reasoning as above, when we get the result we need to chain the computations, - * refer to http://okmij.org/ftp/Computation/free-monad.html for a deeper theoretical explanation *) + * refer to http://okmij.org/ftp/Computation/free-monad.html for a deeper theoretical explanation *) Task (task, fun x -> next x >>= k) end @@ -1223,9 +1223,6 @@ let assert_configuration_change_preserves_ha_plan ~__context c = "assert_configuration_change_preserves_ha_plan: plan exists after \ change" | Plan_exists_excluding_non_agile_VMs | No_plan_exists -> - debug - "assert_configuration_change_preserves_ha_plan: proposed change \ - breaks plan" ; raise (Api_errors.Server_error (Api_errors.ha_operation_would_break_failover_plan, []) @@ -1259,9 +1256,26 @@ let restart_failed : (API.ref_VM, unit) Hashtbl.t = Hashtbl.create 10 (* We also limit the rate we attempt to retry starting the VM. *) let last_start_attempt : (API.ref_VM, float) Hashtbl.t = Hashtbl.create 10 +module VMRefOrd = struct + type t = [`VM] Ref.t + + let compare = Ref.compare +end + +module VMMap = Map.Make (VMRefOrd) + +(* When a host is up, it will be added in the HA live set. But it may be still + in disabled state so that starting best-effort VMs on it would fail. + Meanwhile we don't want to retry on starting them forever. + This data is to remember the best-effort VMs which failed to start due to + this and the key of the map is the VM ref. And its value is the count of the + attempts of starting. This is to avoid retrying for ever and can be adjusted + according to how hong the host becomes enabled since it is in HA live set. *) +let tried_best_eff_vms = ref VMMap.empty + (* Takes the current live_set and number of hosts we're planning to handle, updates the host records in the database and restarts any offline protected VMs *) -let restart_auto_run_vms ~__context live_set n = +let restart_auto_run_vms ~__context ~last_live_set ~live_set n = (* ensure we have live=false on the host_metrics for those hosts not in the live_set; and force state to Halted for all VMs that are "running" or "paused" with resident_on set to one of the hosts that is now dead *) @@ -1396,8 +1410,9 @@ let restart_auto_run_vms ~__context live_set n = let open TaskChains.Infix in (* execute the plan *) Helpers.call_api_functions ~__context (fun rpc session_id -> - (* Helper function to start a VM somewhere. If the HA overcommit protection stops us then disable it and try once more. - Returns true if the VM was restarted and false otherwise. *) + (* Helper function to start a VM somewhere. If the HA overcommit + protection stops us then disable it and try once more. Returns true if + the VM was restarted and false otherwise. *) let restart_vm vm ?host () = let go () = ( if Xapi_fist.simulate_restart_failure () then @@ -1562,36 +1577,95 @@ let restart_auto_run_vms ~__context live_set n = in gc_table last_start_attempt ; gc_table restart_failed ; - (* Consider restarting the best-effort VMs we *think* have failed (but we might get this wrong -- - ok since this is 'best-effort'). NOTE we do not use the restart_vm function above as this will mark the - pool as overcommitted if an HA_OPERATION_WOULD_BREAK_FAILOVER_PLAN is received (although this should never - happen it's better safe than sorry) *) - map_parallel - ~order_f:(fun vm -> order_f (vm, Db.VM.get_record ~__context ~self:vm)) - (fun vm -> + (* Consider restarting the best-effort VMs we *think* have failed (but we + might get this wrong -- ok since this is 'best-effort'). NOTE we do + not use the restart_vm function above as this will mark the pool as + overcommitted if an HA_OPERATION_WOULD_BREAK_FAILOVER_PLAN is received + (although this should never happen it's better safe than sorry) *) + let is_best_effort r = + r.API.vM_ha_restart_priority = Constants.ha_restart_best_effort + && r.API.vM_power_state = `Halted + in + let resets = + !reset_vms + |> List.map (fun self -> (self, Db.VM.get_record ~__context ~self)) + in + let revalidate_tried m = + let valid, invalid = + VMMap.bindings m + |> List.partition_map (fun (self, _) -> + match Db.VM.get_record ~__context ~self with + | r -> + Left (self, r) + | exception _ -> + Right self + ) + in + let to_retry, to_remove = + List.partition (fun (_, r) -> is_best_effort r) valid + in + let m' = + List.map fst to_remove + |> List.rev_append invalid + |> List.fold_left (fun acc vm -> VMMap.remove vm acc) m + in + (to_retry, m') + in + let best_effort_vms = + (* Carefully decide which best-effort VMs should attempt to start. *) + let all_prot_is_ok = List.for_all (fun (_, r) -> r = Ok ()) started in + let is_better = List.compare_lengths live_set last_live_set > 0 in + ( match (all_prot_is_ok, is_better, last_live_set = live_set) with + | true, true, _ -> + (* Try to start all the best-effort halted VMs when HA is being + enabled or some hosts are transiting to HA live. + The DB has been updated by Xapi_vm_lifecycle.force_state_reset. + Read again. *) + tried_best_eff_vms := VMMap.empty ; + Db.VM.get_all_records ~__context + | true, false, true -> + (* Retry for best-effort VMs which attepmted but failed last time. *) + let to_retry, m = revalidate_tried !tried_best_eff_vms in + tried_best_eff_vms := m ; + List.rev_append to_retry resets + | true, false, false | false, _, _ -> + (* Try to start only the reset VMs. They were observed as residing + on the non-live hosts in this run. + Give up starting tried VMs as the HA situation changes. *) + tried_best_eff_vms := VMMap.empty ; + resets + ) + |> List.filter (fun (_, r) -> is_best_effort r) + in + map_parallel ~order_f + (fun (vm, _) -> ( vm - , if - Db.VM.get_power_state ~__context ~self:vm = `Halted - && Db.VM.get_ha_restart_priority ~__context ~self:vm - = Constants.ha_restart_best_effort - then - TaskChains.task (fun () -> - Client.Client.Async.VM.start ~rpc ~session_id ~vm - ~start_paused:false ~force:true - ) - else - TaskChains.ok Rpc.Null + , TaskChains.task (fun () -> + Client.Client.Async.VM.start ~rpc ~session_id ~vm + ~start_paused:false ~force:true + ) ) ) - !reset_vms + best_effort_vms |> List.iter (fun (vm, result) -> match result with | Error e -> + tried_best_eff_vms := + VMMap.update vm + (Option.fold ~none:(Some 1) ~some:(fun n -> + if n < !Xapi_globs.ha_best_effort_max_retries then + Some (n + 1) + else + None + ) + ) + !tried_best_eff_vms ; error "Failed to restart best-effort VM %s (%s): %s" (Db.VM.get_uuid ~__context ~self:vm) (Db.VM.get_name_label ~__context ~self:vm) (ExnHelper.string_of_exn e) | Ok _ -> + tried_best_eff_vms := VMMap.remove vm !tried_best_eff_vms ; () ) ) diff --git a/ocaml/xapi/xapi_ha_vm_failover.mli b/ocaml/xapi/xapi_ha_vm_failover.mli index 20eb3b6b844..abf6374822a 100644 --- a/ocaml/xapi/xapi_ha_vm_failover.mli +++ b/ocaml/xapi/xapi_ha_vm_failover.mli @@ -18,7 +18,11 @@ val all_protected_vms : __context:Context.t -> (API.ref_VM * API.vM_t) list val restart_auto_run_vms : - __context:Context.t -> API.ref_host list -> int -> unit + __context:Context.t + -> last_live_set:API.ref_host list + -> live_set:API.ref_host list + -> int + -> unit (** Take a set of live VMs and attempt to restart all protected VMs which have failed *) val compute_evacuation_plan : diff --git a/ocaml/xapi/xapi_hooks.ml b/ocaml/xapi/xapi_hooks.ml index 2f9edaff073..a7ba2d75548 100644 --- a/ocaml/xapi/xapi_hooks.ml +++ b/ocaml/xapi/xapi_hooks.ml @@ -102,13 +102,11 @@ let execute_hook ~__context ~script_name ~args ~reason = ) scripts -let execute_vm_hook ~__context ~reason ~vm = - let vmuuid = Db.VM.get_uuid ~__context ~self:vm in - execute_hook ~__context ~args:["-vmuuid"; vmuuid] ~reason +let execute_vm_hook ~__context ~reason ~vm_uuid = + execute_hook ~__context ~args:["-vmuuid"; vm_uuid] ~reason -let execute_host_hook ~__context ~reason ~host = - let uuid = Db.Host.get_uuid ~__context ~self:host in - execute_hook ~__context ~args:["-hostuuid"; uuid] ~reason +let execute_host_hook ~__context ~reason ~host_uuid = + execute_hook ~__context ~args:["-hostuuid"; host_uuid] ~reason let execute_pool_hook ~__context ~reason = execute_hook ~__context ~args:[] ~reason @@ -116,8 +114,9 @@ let execute_pool_hook ~__context ~reason = let host_pre_declare_dead ~__context ~host ~reason = info "Running host pre declare dead hook for %s" (Ref.string_of host) ; (* this could use power fencing *) + let host_uuid = Db.Host.get_uuid ~__context ~self:host in execute_host_hook ~__context ~script_name:scriptname__host_pre_declare_dead - ~reason ~host ; + ~reason ~host_uuid ; if String.equal reason reason__dbdestroy then log_and_ignore_exn (fun () -> (* declare it as dead to the clustering daemon if any *) @@ -132,11 +131,10 @@ let host_pre_declare_dead ~__context ~host ~reason = () ) -let xapi_pre_shutdown ~__context ~host ~reason = - info "%s Running xapi pre shutdown hooks for %s" __FUNCTION__ - (Ref.string_of host) ; +let xapi_pre_shutdown ~__context ~host_uuid ~reason = + info "%s Running xapi pre shutdown hooks for %s" __FUNCTION__ host_uuid ; execute_host_hook ~__context ~script_name:scriptname__xapi_pre_shutdown - ~reason ~host + ~reason ~host_uuid (* Called when host died -- !! hook code in here to abort outstanding forwarded ops *) let internal_host_dead_hook __context host = @@ -159,8 +157,9 @@ let internal_host_dead_hook __context host = let host_post_declare_dead ~__context ~host ~reason = (* Cancel outstanding tasks first-- should release necessary locks *) internal_host_dead_hook __context host ; + let host_uuid = Db.Host.get_uuid ~__context ~self:host in execute_host_hook ~__context ~script_name:scriptname__host_post_declare_dead - ~reason ~host + ~reason ~host_uuid let pool_ha_overcommitted_hook ~__context = execute_pool_hook ~__context ~script_name:scriptname__pool_ha_overcommitted diff --git a/ocaml/xapi/xapi_host.ml b/ocaml/xapi/xapi_host.ml index 31a1ded1a53..fa10169efc0 100644 --- a/ocaml/xapi/xapi_host.ml +++ b/ocaml/xapi/xapi_host.ml @@ -74,7 +74,7 @@ let set_power_on_mode ~__context ~self ~power_on_mode ~power_on_config = + HA is enabled and this host has broken storage or networking which would cause protected VMs to become non-agile *) -let assert_safe_to_reenable ~__context ~self = +let assert_safe_to_reenable ~__context ~self ~user_request = assert_startup_complete () ; Repository_helpers.assert_no_host_pending_mandatory_guidance ~__context ~host:self ; @@ -87,6 +87,14 @@ let assert_safe_to_reenable ~__context ~self = (Api_errors.Server_error (Api_errors.host_disabled_until_reboot, [Ref.string_of self]) ) ; + let host_auto_enable = + try bool_of_string (Localdb.get Constants.host_auto_enable) with _ -> true + in + if (not host_auto_enable) && not user_request then + raise + (Api_errors.Server_error + (Api_errors.host_disabled_indefinitely, [Ref.string_of self]) + ) ; if Db.Pool.get_ha_enabled ~__context ~self:(Helpers.get_pool ~__context) then ( let pbds = Db.Host.get_PBDs ~__context ~self in let unplugged_pbds = @@ -119,6 +127,8 @@ let pool_size_is_restricted ~__context = not (Pool_features.is_enabled ~__context Features.Pool_size) let bugreport_upload ~__context ~host:_ ~url ~options = + if url = "" then + raise Api_errors.(Server_error (invalid_value, ["url"; ""])) ; let proxy = if List.mem_assoc "http_proxy" options then List.assoc "http_proxy" options @@ -285,13 +295,18 @@ let compute_evacuation_plan_no_wlb ~__context ~host ?(ignore_ha = false) () = the source host. So as long as host versions aren't decreasing, we're allowed to migrate VMs between hosts. *) debug "evacuating host version: %s" - (Helpers.version_string_of ~__context (Helpers.LocalObject host)) ; + (Helpers.get_software_versions ~__context (Helpers.LocalObject host) + |> Helpers.versions_string_of + ) ; let target_hosts = List.filter (fun target -> debug "host %s version: %s" (Db.Host.get_hostname ~__context ~self:target) - (Helpers.version_string_of ~__context (Helpers.LocalObject target)) ; + Helpers.( + get_software_versions ~__context (LocalObject target) + |> versions_string_of + ) ; Helpers.host_versions_not_decreasing ~__context ~host_from:(Helpers.LocalObject host) ~host_to:(Helpers.LocalObject target) @@ -487,7 +502,8 @@ let compute_evacuation_plan_wlb ~__context ~self = if Db.Host.get_control_domain ~__context ~self:target_host <> v && Db.Host.get_uuid ~__context ~self:resident_h = target_uuid - then (* resident host and migration host are the same. Reject this plan *) + (* resident host and migration host are the same. Reject this plan *) + then raise (Api_errors.Server_error ( Api_errors.wlb_malformed_response @@ -649,8 +665,9 @@ let evacuate ~__context ~host ~network ~evacuate_batch_size = raise (Api_errors.Server_error (code, params)) in - (* execute [n] asynchronous API calls [api_fn] for [xs] and wait for them to - finish before executing the next batch. *) + (* execute [plans_length] asynchronous API calls [api_fn] for [xs] in batches + of [n] at a time, scheduling a new call as soon as one of the tasks from + the previous batch is completed *) let batch ~__context n api_fn xs = let finally = Xapi_stdext_pervasives.Pervasiveext.finally in let destroy = Client.Client.Task.destroy in @@ -675,27 +692,55 @@ let evacuate ~__context ~host ~network ~evacuate_batch_size = fail task "unexpected status of migration task" in - let rec loop xs = - match take n xs with - | [], _ -> - () - | head, tail -> - Helpers.call_api_functions ~__context @@ fun rpc session_id -> - let tasks = List.map (api_fn ~rpc ~session_id) head in - finally - (fun () -> - Tasks.wait_for_all ~rpc ~session_id ~tasks ; - List.iter assert_success tasks ; - let tail_length = List.length tail |> float in - let progress = 1.0 -. (tail_length /. plans_length) in - TaskHelper.set_progress ~__context progress + Helpers.call_api_functions ~__context @@ fun rpc session_id -> + ( match take n xs with + | [], _ -> + () + | head, tasks_left -> + let tasks_left = ref tasks_left in + let initial_task_batch = List.map (api_fn ~rpc ~session_id) head in + let tasks_pending = + ref + (List.fold_left + (fun task_set' task -> Tasks.TaskSet.add task task_set') + Tasks.TaskSet.empty initial_task_batch ) - (fun () -> - List.iter (fun self -> destroy ~rpc ~session_id ~self) tasks - ) ; - loop tail - in - loop xs ; + in + + let single_task_progress = 1.0 /. plans_length in + let on_each_task_completion completed_task_count completed_task = + (* Clean up the completed task *) + assert_success completed_task ; + destroy ~rpc ~session_id ~self:completed_task ; + tasks_pending := Tasks.TaskSet.remove completed_task !tasks_pending ; + + (* Update progress *) + let progress = + Int.to_float completed_task_count *. single_task_progress + in + TaskHelper.set_progress ~__context progress ; + + (* Schedule a new task, if there are any left *) + match !tasks_left with + | [] -> + [] + | task_to_schedule :: left -> + tasks_left := left ; + let new_task = api_fn ~rpc ~session_id task_to_schedule in + tasks_pending := Tasks.TaskSet.add new_task !tasks_pending ; + [new_task] + in + finally + (fun () -> + Tasks.wait_for_all_with_callback ~rpc ~session_id + ~tasks:initial_task_batch ~callback:on_each_task_completion + ) + (fun () -> + Tasks.TaskSet.iter + (fun self -> destroy ~rpc ~session_id ~self) + !tasks_pending + ) + ) ; TaskHelper.set_progress ~__context 1.0 in @@ -764,26 +809,29 @@ let restart_agent ~__context ~host:_ = ) let shutdown_agent ~__context = - debug "Host.restart_agent: Host agent will shutdown in 1s!!!!" ; - let localhost = Helpers.get_localhost ~__context in - Xapi_hooks.xapi_pre_shutdown ~__context ~host:localhost + debug "Host.shutdown_agent: Host agent will shutdown in 1s!!!!" ; + let host_uuid = Helpers.get_localhost_uuid () in + Xapi_hooks.xapi_pre_shutdown ~__context ~host_uuid ~reason:Xapi_hooks.reason__clean_shutdown ; Xapi_fuse.light_fuse_and_dont_restart ~fuse_length:1. () -let disable ~__context ~host = +let disable ~__context ~host ~auto_enable = if Db.Host.get_enabled ~__context ~self:host then ( info "Host.enabled: setting host %s (%s) to disabled because of user request" (Ref.string_of host) (Db.Host.get_hostname ~__context ~self:host) ; Db.Host.set_enabled ~__context ~self:host ~value:false ; - Xapi_host_helpers.user_requested_host_disable := true + Xapi_host_helpers.user_requested_host_disable := true ; + if not auto_enable then + Localdb.put Constants.host_auto_enable "false" ) let enable ~__context ~host = if not (Db.Host.get_enabled ~__context ~self:host) then ( - assert_safe_to_reenable ~__context ~self:host ; + assert_safe_to_reenable ~__context ~self:host ~user_request:true ; Xapi_host_helpers.user_requested_host_disable := false ; + Localdb.put Constants.host_auto_enable "true" ; info "Host.enabled: setting host %s (%s) to enabled because of user request" (Ref.string_of host) (Db.Host.get_hostname ~__context ~self:host) ; @@ -978,7 +1026,8 @@ let is_host_alive ~__context ~host = let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~external_auth_type ~external_auth_service_name ~external_auth_configuration ~license_params ~edition ~license_server ~local_cache_sr ~chipset_info - ~ssl_legacy:_ ~last_software_update ~last_update_hash = + ~ssl_legacy:_ ~last_software_update ~last_update_hash ~ssh_enabled + ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode = (* fail-safe. We already test this on the joining host, but it's racy, so multiple concurrent pool-join might succeed. Note: we do it in this order to avoid a problem checking restrictions during the initial setup of the database *) @@ -1042,7 +1091,8 @@ let create ~__context ~uuid ~name_label ~name_description:_ ~hostname ~address ~multipathing:false ~uefi_certificates:"" ~editions:[] ~pending_guidances:[] ~tls_verification_enabled ~last_software_update ~last_update_hash ~recommended_guidances:[] ~latest_synced_updates_applied:`unknown - ~pending_guidances_recommended:[] ~pending_guidances_full:[] ; + ~pending_guidances_recommended:[] ~pending_guidances_full:[] ~ssh_enabled + ~ssh_enabled_timeout ~ssh_expiry ~console_idle_timeout ~ssh_auto_mode ; (* If the host we're creating is us, make sure its set to live *) Db.Host_metrics.set_last_updated ~__context ~self:metrics ~value:(Date.now ()) ; Db.Host_metrics.set_live ~__context ~self:metrics ~value:host_is_us ; @@ -1583,19 +1633,17 @@ let install_server_certificate ~__context ~host ~certificate ~private_key replace_host_certificate ~__context ~type':`host ~host write_cert_fs let _new_host_cert ~dbg ~path : X509.Certificate.t = - let ip_as_string, ip = - match Networking_info.get_management_ip_addr ~dbg with - | None -> + let name, dns_names, ips = + match Networking_info.get_host_certificate_subjects ~dbg with + | Error cause -> + let msg = Networking_info.management_ip_error_to_string cause in Helpers.internal_error ~log_err:true ~err_fun:D.error - "%s: failed to get management IP" __LOC__ - | Some ip -> - ip + "%s: failed to generate certificate subjects because %s" __LOC__ msg + | Ok (name, dns_names, ips) -> + (name, dns_names, ips) in - let dns_names = Networking_info.dns_names () in - let cn = match dns_names with [] -> ip_as_string | dns :: _ -> dns in - let ips = [ip] in let valid_for_days = !Xapi_globs.cert_expiration_days in - Gencertlib.Selfcert.host ~name:cn ~dns_names ~ips ~valid_for_days path + Gencertlib.Selfcert.host ~name ~dns_names ~ips ~valid_for_days path !Xapi_globs.server_cert_group_id let reset_server_certificate ~__context ~host = @@ -1740,7 +1788,6 @@ let enable_external_auth ~__context ~host ~config ~service_name ~auth_type = raise (Api_errors.Server_error (Api_errors.auth_unknown_type, [msg])) ) else (* if no auth_type is currently defined (it is an empty string), then we can set up a new one *) - (* we try to use the configuration to set up the new external authentication service *) (* we persist as much set up configuration now as we can *) @@ -2050,8 +2097,8 @@ let apply_edition_internal ~__context ~host ~edition ~additional = raise Api_errors.(Server_error (license_processing_error, [])) | V6_interface.(V6_error Missing_connection_details) -> raise Api_errors.(Server_error (missing_connection_details, [])) - | V6_interface.(V6_error (License_checkout_error s)) -> - raise Api_errors.(Server_error (license_checkout_error, [s])) + | V6_interface.(V6_error (License_checkout_error (code, msg))) -> + raise Api_errors.(Server_error (license_checkout_error, [code; msg])) | V6_interface.(V6_error (Internal_error e)) -> Helpers.internal_error "%s" e in @@ -2156,19 +2203,19 @@ let reset_networking ~__context ~host = (Db.PIF.get_all ~__context) in let bond_is_local bond = - List.fold_left - (fun a pif -> Db.Bond.get_master ~__context ~self:bond = pif || a) - false local_pifs + List.exists + (fun pif -> Db.Bond.get_master ~__context ~self:bond = pif) + local_pifs in let vlan_is_local vlan = - List.fold_left - (fun a pif -> Db.VLAN.get_untagged_PIF ~__context ~self:vlan = pif || a) - false local_pifs + List.exists + (fun pif -> Db.VLAN.get_untagged_PIF ~__context ~self:vlan = pif) + local_pifs in let tunnel_is_local tunnel = - List.fold_left - (fun a pif -> Db.Tunnel.get_access_PIF ~__context ~self:tunnel = pif || a) - false local_pifs + List.exists + (fun pif -> Db.Tunnel.get_access_PIF ~__context ~self:tunnel = pif) + local_pifs in let bonds = List.filter bond_is_local (Db.Bond.get_all ~__context) in List.iter @@ -2740,7 +2787,7 @@ let write_uefi_certificates_to_disk ~__context ~host = ["KEK.auth"; "db.auth"] |> List.iter (fun cert -> let log_of found = - (if found then info else error) + (if found then info else warn) "check_valid_uefi_certs: %s %s in %s" (if found then "found" else "missing") cert path @@ -2792,6 +2839,7 @@ let set_uefi_certificates ~__context ~host:_ ~value:_ = let set_iscsi_iqn ~__context ~host ~value = if value = "" then raise Api_errors.(Server_error (invalid_value, ["value"; value])) ; + D.debug "%s: iqn=%S" __FUNCTION__ value ; (* Note, the following sequence is carefully written - see the other-config watcher thread in xapi_host_helpers.ml *) Db.Host.remove_from_other_config ~__context ~self:host ~key:"iscsi_iqn" ; @@ -2804,7 +2852,7 @@ let set_iscsi_iqn ~__context ~host ~value = * when you update the `iscsi_iqn` field we want to update `other_config`, * but when updating `other_config` we want to update `iscsi_iqn` too. * we have to be careful not to introduce an infinite loop of updates. - * *) + *) Db.Host.set_iscsi_iqn ~__context ~self:host ~value ; Db.Host.add_to_other_config ~__context ~self:host ~key:"iscsi_iqn" ~value ; Xapi_host_helpers.Configuration.set_initiator_name value @@ -3057,7 +3105,7 @@ let apply_updates ~__context ~self ~hash = if Db.Pool.get_ha_enabled ~__context ~self:pool then raise Api_errors.(Server_error (ha_is_enabled, [])) ; if Db.Host.get_enabled ~__context ~self then ( - disable ~__context ~host:self ; + disable ~__context ~host:self ~auto_enable:true ; Xapi_host_helpers.update_allowed_operations ~__context ~self ) ; Xapi_host_helpers.with_host_operation ~__context ~self @@ -3111,3 +3159,181 @@ let emergency_clear_mandatory_guidance ~__context = info "%s: %s is cleared" __FUNCTION__ s ) ; Db.Host.set_pending_guidances ~__context ~self ~value:[] + +let set_ssh_auto_mode ~__context ~self ~value = + debug "Setting SSH auto mode for host %s to %B" + (Helpers.get_localhost_uuid ()) + value ; + + Db.Host.set_ssh_auto_mode ~__context ~self ~value ; + + try + (* When enabled, the ssh_monitor_service regularly checks XAPI status to manage SSH availability. + During normal operation when XAPI is running properly, SSH is automatically disabled. + SSH is only enabled during emergency scenarios + (e.g., when XAPI is down) to allow administrative access for troubleshooting. *) + if value then ( + (* Ensure SSH is always enabled when SSH auto mode is on*) + Xapi_systemctl.enable ~wait_until_success:false !Xapi_globs.ssh_service ; + Xapi_systemctl.enable ~wait_until_success:false + !Xapi_globs.ssh_monitor_service ; + Xapi_systemctl.start ~wait_until_success:false + !Xapi_globs.ssh_monitor_service + ) else ( + Xapi_systemctl.stop ~wait_until_success:false + !Xapi_globs.ssh_monitor_service ; + Xapi_systemctl.disable ~wait_until_success:false + !Xapi_globs.ssh_monitor_service + ) + with e -> + error "Failed to configure SSH auto mode: %s" (Printexc.to_string e) ; + Helpers.internal_error "Failed to configure SSH auto mode: %s" + (Printexc.to_string e) + +let disable_ssh_internal ~__context ~self = + try + debug "Disabling SSH for host %s" (Helpers.get_localhost_uuid ()) ; + if not (Db.Host.get_ssh_auto_mode ~__context ~self) then + Xapi_systemctl.disable ~wait_until_success:false !Xapi_globs.ssh_service ; + Xapi_systemctl.stop ~wait_until_success:false !Xapi_globs.ssh_service ; + Db.Host.set_ssh_enabled ~__context ~self ~value:false + with e -> + error "Failed to disable SSH for host %s: %s" (Ref.string_of self) + (Printexc.to_string e) ; + Helpers.internal_error "Failed to disable SSH access, host: %s" + (Ref.string_of self) + +let set_expiry ~__context ~self ~timeout = + let expiry_time = + match + Ptime.add_span (Ptime_clock.now ()) + (Ptime.Span.of_int_s (Int64.to_int timeout)) + with + | None -> + error "Invalid SSH timeout: %Ld" timeout ; + raise + (Api_errors.Server_error + ( Api_errors.invalid_value + , ["ssh_enabled_timeout"; Int64.to_string timeout] + ) + ) + | Some t -> + Ptime.to_float_s t |> Date.of_unix_time + in + Db.Host.set_ssh_expiry ~__context ~self ~value:expiry_time + +let schedule_disable_ssh_job ~__context ~self ~timeout ~auto_mode = + let host_uuid = Helpers.get_localhost_uuid () in + + debug "Scheduling SSH disable job for host %s with timeout %Ld seconds" + host_uuid timeout ; + + (* Remove any existing job first *) + Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue + !Xapi_globs.job_for_disable_ssh ; + + Xapi_stdext_threads_scheduler.Scheduler.add_to_queue + !Xapi_globs.job_for_disable_ssh + Xapi_stdext_threads_scheduler.Scheduler.OneShot (Int64.to_float timeout) + (fun () -> + disable_ssh_internal ~__context ~self ; + (* re-enable SSH auto mode if it was enabled before calling host.enable_ssh *) + if auto_mode then + set_ssh_auto_mode ~__context ~self ~value:true + ) + +let enable_ssh ~__context ~self = + try + debug "Enabling SSH for host %s" (Helpers.get_localhost_uuid ()) ; + + let cached_ssh_auto_mode = Db.Host.get_ssh_auto_mode ~__context ~self in + (* Disable SSH auto mode when SSH is enabled manually *) + set_ssh_auto_mode ~__context ~self ~value:false ; + + Xapi_systemctl.enable ~wait_until_success:false !Xapi_globs.ssh_service ; + Xapi_systemctl.start ~wait_until_success:false !Xapi_globs.ssh_service ; + + let timeout = Db.Host.get_ssh_enabled_timeout ~__context ~self in + ( match timeout with + | 0L -> + Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue + !Xapi_globs.job_for_disable_ssh ; + Db.Host.set_ssh_expiry ~__context ~self ~value:Date.epoch + | t -> + set_expiry ~__context ~self ~timeout:t ; + schedule_disable_ssh_job ~__context ~self ~timeout:t + ~auto_mode:cached_ssh_auto_mode + ) ; + + Db.Host.set_ssh_enabled ~__context ~self ~value:true + with e -> + error "Failed to enable SSH on host %s: %s" (Ref.string_of self) + (Printexc.to_string e) ; + Helpers.internal_error "Failed to enable SSH access, host: %s" + (Ref.string_of self) + +let disable_ssh ~__context ~self = + Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue + !Xapi_globs.job_for_disable_ssh ; + disable_ssh_internal ~__context ~self ; + Db.Host.set_ssh_expiry ~__context ~self ~value:(Date.now ()) + +let set_ssh_enabled_timeout ~__context ~self ~value = + let validate_timeout value = + (* the max timeout is two days: 172800L = 2*24*60*60 *) + if value < 0L || value > 172800L then + raise + (Api_errors.Server_error + ( Api_errors.invalid_value + , ["ssh_enabled_timeout"; Int64.to_string value] + ) + ) + in + validate_timeout value ; + debug "Setting SSH timeout for host %s to %Ld seconds" + (Db.Host.get_uuid ~__context ~self) + value ; + Db.Host.set_ssh_enabled_timeout ~__context ~self ~value ; + if Db.Host.get_ssh_enabled ~__context ~self then + match value with + | 0L -> + Xapi_stdext_threads_scheduler.Scheduler.remove_from_queue + !Xapi_globs.job_for_disable_ssh ; + Db.Host.set_ssh_expiry ~__context ~self ~value:Date.epoch + | t -> + set_expiry ~__context ~self ~timeout:t ; + schedule_disable_ssh_job ~__context ~self ~timeout:t ~auto_mode:false + +let set_console_idle_timeout ~__context ~self ~value = + let assert_timeout_valid timeout = + if timeout < 0L then + raise + (Api_errors.Server_error + ( Api_errors.invalid_value + , ["console_timeout"; Int64.to_string timeout] + ) + ) + in + + assert_timeout_valid value ; + try + let content = + match value with + | 0L -> + "# Console timeout is disabled\n" + | timeout -> + Printf.sprintf "# Console timeout configuration\nexport TMOUT=%Ld\n" + timeout + in + + Unixext.atomic_write_to_file !Xapi_globs.console_timeout_profile_path 0o0644 + (fun fd -> + Unix.write fd (Bytes.of_string content) 0 (String.length content) + |> ignore + ) ; + + Db.Host.set_console_idle_timeout ~__context ~self ~value + with e -> + error "Failed to configure console timeout: %s" (Printexc.to_string e) ; + Helpers.internal_error "Failed to set console timeout: %Ld: %s" value + (Printexc.to_string e) diff --git a/ocaml/xapi/xapi_host.mli b/ocaml/xapi/xapi_host.mli index 1799b651085..19d25361528 100644 --- a/ocaml/xapi/xapi_host.mli +++ b/ocaml/xapi/xapi_host.mli @@ -79,7 +79,8 @@ val restart_agent : __context:'a -> host:'b -> unit val shutdown_agent : __context:Context.t -> unit -val disable : __context:Context.t -> host:[`host] Ref.t -> unit +val disable : + __context:Context.t -> host:[`host] Ref.t -> auto_enable:bool -> unit val enable : __context:Context.t -> host:[`host] Ref.t -> unit @@ -130,6 +131,11 @@ val create : -> ssl_legacy:bool -> last_software_update:API.datetime -> last_update_hash:string + -> ssh_enabled:bool + -> ssh_enabled_timeout:int64 + -> ssh_expiry:API.datetime + -> console_idle_timeout:int64 + -> ssh_auto_mode:bool -> [`host] Ref.t val destroy : __context:Context.t -> self:API.ref_host -> unit @@ -563,3 +569,23 @@ val set_https_only : __context:Context.t -> self:API.ref_host -> value:bool -> unit val emergency_clear_mandatory_guidance : __context:Context.t -> unit + +val enable_ssh : __context:Context.t -> self:API.ref_host -> unit + +val disable_ssh : __context:Context.t -> self:API.ref_host -> unit + +val set_ssh_enabled_timeout : + __context:Context.t -> self:API.ref_host -> value:int64 -> unit + +val set_console_idle_timeout : + __context:Context.t -> self:API.ref_host -> value:int64 -> unit + +val schedule_disable_ssh_job : + __context:Context.t + -> self:API.ref_host + -> timeout:int64 + -> auto_mode:bool + -> unit + +val set_ssh_auto_mode : + __context:Context.t -> self:API.ref_host -> value:bool -> unit diff --git a/ocaml/xapi/xapi_host_crashdump.ml b/ocaml/xapi/xapi_host_crashdump.ml index 645e1e6fc33..02e8303d777 100644 --- a/ocaml/xapi/xapi_host_crashdump.ml +++ b/ocaml/xapi/xapi_host_crashdump.ml @@ -149,5 +149,6 @@ let destroy ~__context ~self = let upload ~__context ~self ~url ~options = let filename = Db.Host_crashdump.get_filename ~__context ~self in - let url = if url = "" then upload_url filename else url in + if url = "" then + raise Api_errors.(Server_error (invalid_value, ["url"; ""])) ; do_upload "host-crash-upload" (crash_dir ^ "/" ^ filename) url options diff --git a/ocaml/xapi/xapi_host_driver.ml b/ocaml/xapi/xapi_host_driver.ml index 0416b1de33f..b44ee615c64 100644 --- a/ocaml/xapi/xapi_host_driver.ml +++ b/ocaml/xapi/xapi_host_driver.ml @@ -142,7 +142,8 @@ let select ~__context ~self ~variant = if v.API.driver_variant_hardware_present = false then no_hardware (Ref.string_of variant) ; let stdout = - Tool.call ["select"; d.API.host_driver_name; v.API.driver_variant_name] + Tool.call + ["-s"; "-n"; d.API.host_driver_name; "-v"; v.API.driver_variant_name] in info "%s: %s" __FUNCTION__ stdout ; Db.Host_driver.set_selected_variant ~__context ~self ~value:variant @@ -154,7 +155,7 @@ let select ~__context ~self ~variant = let deselect ~__context ~self = D.debug "%s driver %s" __FUNCTION__ (Ref.string_of self) ; let d = Db.Host_driver.get_record ~__context ~self in - let stdout = Tool.call ["deselect"; d.API.host_driver_name] in + let stdout = Tool.call ["-d"; d.API.host_driver_name] in info "%s: %s" __FUNCTION__ stdout ; Db.Host_driver.set_active_variant ~__context ~self ~value:Ref.null ; Db.Host_driver.set_selected_variant ~__context ~self ~value:Ref.null diff --git a/ocaml/xapi/xapi_host_driver_tool.ml b/ocaml/xapi/xapi_host_driver_tool.ml index 80fe5d208b1..0dd837dda4d 100644 --- a/ocaml/xapi/xapi_host_driver_tool.ml +++ b/ocaml/xapi/xapi_host_driver_tool.ml @@ -243,6 +243,15 @@ module Mock = struct set -o errexit set -o pipefail +function deselect { + cat <&2 #>&2 redirects error message to stderr exit 1 @@ -656,6 +670,11 @@ if $s_flag; then selection "$n_value" "$v_value" exit 0 fi + +if [ -n "$d_value" ]; then + deselect "$d_value" + exit 0 +fi |} let install () = diff --git a/ocaml/xapi/xapi_host_helpers.ml b/ocaml/xapi/xapi_host_helpers.ml index 834b34beb4b..da837f9329a 100644 --- a/ocaml/xapi/xapi_host_helpers.ml +++ b/ocaml/xapi/xapi_host_helpers.ml @@ -31,7 +31,7 @@ let all_operations = API.host_allowed_operations__all (** Returns a table of operations -> API error options (None if the operation would be ok) *) let valid_operations ~__context record _ref' = let _ref = Ref.string_of _ref' in - let current_ops = List.map snd record.Db_actions.host_current_operations in + let current_ops = record.Db_actions.host_current_operations in let table = Hashtbl.create 10 in List.iter (fun x -> Hashtbl.replace table x None) all_operations ; let set_errors (code : string) (params : string list) @@ -49,40 +49,53 @@ let valid_operations ~__context record _ref' = let is_creating_new x = List.mem x [`provision; `vm_resume; `vm_migrate] in let is_removing x = List.mem x [`evacuate; `reboot; `shutdown] in let creating_new = - List.fold_left (fun acc op -> acc || is_creating_new op) false current_ops - in - let removing = - List.fold_left (fun acc op -> acc || is_removing op) false current_ops + List.find_opt (fun (_, op) -> is_creating_new op) current_ops in + let removing = List.find_opt (fun (_, op) -> is_removing op) current_ops in List.iter (fun op -> - if (is_creating_new op && removing) || (is_removing op && creating_new) - then - set_errors Api_errors.other_operation_in_progress - ["host"; _ref; host_operation_to_string (List.hd current_ops)] - [op] + match (is_creating_new op, removing, is_removing op, creating_new) with + | true, Some (op_ref, op_type), _, _ | _, _, true, Some (op_ref, op_type) + -> + set_errors Api_errors.other_operation_in_progress + ["host"; _ref; host_operation_to_string op_type; op_ref] + [op] + | _ -> + () ) (List.filter (fun x -> x <> `power_on) all_operations) ; (* reboot, shutdown and apply_updates cannot run concurrently *) - if List.mem `reboot current_ops then - set_errors Api_errors.other_operation_in_progress - ["host"; _ref; host_operation_to_string `reboot] - [`shutdown; `apply_updates] ; - if List.mem `shutdown current_ops then - set_errors Api_errors.other_operation_in_progress - ["host"; _ref; host_operation_to_string `shutdown] - [`reboot; `apply_updates] ; - if List.mem `apply_updates current_ops then - set_errors Api_errors.other_operation_in_progress - ["host"; _ref; host_operation_to_string `apply_updates] - [`reboot; `shutdown; `enable] ; + Option.iter + (fun (op_ref, _op_type) -> + set_errors Api_errors.other_operation_in_progress + ["host"; _ref; host_operation_to_string `reboot; op_ref] + [`shutdown; `apply_updates] + ) + (List.find_opt (fun (_, op) -> op = `reboot) current_ops) ; + Option.iter + (fun (op_ref, _op_type) -> + set_errors Api_errors.other_operation_in_progress + ["host"; _ref; host_operation_to_string `shutdown; op_ref] + [`reboot; `apply_updates] + ) + (List.find_opt (fun (_, op) -> op = `shutdown) current_ops) ; + Option.iter + (fun (op_ref, _op_type) -> + set_errors Api_errors.other_operation_in_progress + ["host"; _ref; host_operation_to_string `apply_updates; op_ref] + [`reboot; `shutdown; `enable] + ) + (List.find_opt (fun (_, op) -> op = `apply_updates) current_ops) ; (* Prevent more than one provision happening at a time to prevent extreme dom0 load (in the case of the debian template). Once the template becomes a 'real' template we can relax this. *) - if List.mem `provision current_ops then - set_errors Api_errors.other_operation_in_progress - ["host"; _ref; host_operation_to_string `provision] - [`provision] ; + Option.iter + (fun (op_ref, _op_type) -> + set_errors Api_errors.other_operation_in_progress + ["host"; _ref; host_operation_to_string `provision; op_ref] + [`provision] + ) + (List.find_opt (fun (_, op) -> op = `provision) current_ops) ; (* The host must be disabled before reboots or shutdowns are permitted *) if record.Db_actions.host_enabled then set_errors Api_errors.host_not_disabled [] @@ -409,17 +422,33 @@ let consider_enabling_host_nolock ~__context = else f () in + let host_auto_enable = + try bool_of_string (Localdb.get Constants.host_auto_enable) + with _ -> true + in if !Xapi_globs.on_system_boot then ( debug "Host.enabled: system has just restarted" ; if_no_pending_guidances (fun () -> debug "Host.enabled: system has just restarted and no pending mandatory \ - guidances: setting localhost to enabled" ; - Db.Host.set_enabled ~__context ~self:localhost ~value:true ; - update_allowed_operations ~__context ~self:localhost ; + guidances: clearing host_disabled_until_reboot" ; Localdb.put Constants.host_disabled_until_reboot "false" ; - (* Start processing pending VM powercycle events *) - Local_work_queue.start_vm_lifecycle_queue () + + (* If the host was persistently disabled, honour it *) + if host_auto_enable then ( + debug + "Host.enabled: system has just restarted, no pending mandatory \ + guidances and host_auto_enable=true: setting localhost to \ + enabled" ; + Db.Host.set_enabled ~__context ~self:localhost ~value:true ; + update_allowed_operations ~__context ~self:localhost ; + (* Start processing pending VM powercycle events *) + Local_work_queue.start_vm_lifecycle_queue () + ) else + debug + "Host.enabled: system has just restarted, no pending mandatory \ + guidances, but host_auto_enable=false: Leaving host disabled \ + until manually re-enabled by the user" ) ) else if try bool_of_string (Localdb.get Constants.host_disabled_until_reboot) @@ -433,14 +462,22 @@ let consider_enabling_host_nolock ~__context = "Host.enabled: system not just rebooted && host_disabled_until_reboot \ not set" ; if_no_pending_guidances (fun () -> - debug - "Host.enabled: system not just rebooted && \ - host_disabled_until_reboot not set and no pending mandatory \ - guidances: setting localhost to enabled" ; - Db.Host.set_enabled ~__context ~self:localhost ~value:true ; - update_allowed_operations ~__context ~self:localhost ; - (* Start processing pending VM powercycle events *) - Local_work_queue.start_vm_lifecycle_queue () + if host_auto_enable then ( + debug + "Host.enabled: system not just rebooted && \ + host_disabled_until_reboot not set and no pending mandatory \ + guidances and host_auto_enable=true: setting localhost to \ + enabled" ; + Db.Host.set_enabled ~__context ~self:localhost ~value:true ; + update_allowed_operations ~__context ~self:localhost ; + (* Start processing pending VM powercycle events *) + Local_work_queue.start_vm_lifecycle_queue () + ) else + debug + "Host.enabled: system not just rebooted && \ + host_disabled_until_reboot not set and no pending mandatory \ + guidances but host_auto_enable=false: Leaving host disabled \ + until manually re-enabled by the user" ) ) ; (* If Host has been enabled and HA is also enabled then tell the master to recompute its plan *) @@ -497,10 +534,13 @@ module Configuration = struct [iqn; hostname_chopped] let set_initiator_name iqn = + if iqn = "" then + raise Api_errors.(Server_error (invalid_value, ["iqn"; iqn])) ; let hostname = Unix.gethostname () in (* CA-377454 - robustness, create dir if necessary *) Unixext.mkdir_rec "/var/lock/sm/iscsiadm" 0o700 ; let args = make_set_initiator_args iqn hostname in + D.debug "%s: iqn=%S" __FUNCTION__ iqn ; ignore (Helpers.call_script !Xapi_globs.set_iSCSI_initiator_script args) let set_multipathing enabled = @@ -541,6 +581,7 @@ module Configuration = struct | Some "" -> () | Some iqn when iqn <> host_rec.API.host_iscsi_iqn -> + D.debug "%s: iqn=%S" __FUNCTION__ iqn ; Client.Client.Host.set_iscsi_iqn ~rpc ~session_id ~host:host_ref ~value:iqn | _ -> diff --git a/ocaml/xapi/xapi_host_helpers.mli b/ocaml/xapi/xapi_host_helpers.mli index 519aa34a560..84cc271c652 100644 --- a/ocaml/xapi/xapi_host_helpers.mli +++ b/ocaml/xapi/xapi_host_helpers.mli @@ -79,7 +79,8 @@ val consider_enabling_host : __context:Context.t -> unit {ul {- the user asked the host to be disabled and there was a problem} {- HA is enabled and one-or-more PBDs failed to plug} - {- `disabled_until_next_reboot` is set in the local DB}} + {- `host_disabled_until_reboot` is set in the local DB and the system + hasn't just booted up}} *) val consider_enabling_host_request : __context:Context.t -> unit diff --git a/ocaml/xapi/xapi_local_session.ml b/ocaml/xapi/xapi_local_session.ml index e356ae87256..709275077b0 100644 --- a/ocaml/xapi/xapi_local_session.ml +++ b/ocaml/xapi/xapi_local_session.ml @@ -31,12 +31,9 @@ let create ~__context ~pool = with_lock m (fun () -> Hashtbl.replace table r session) ; r -let get_record ~__context ~self = with_lock m (fun () -> Hashtbl.find table self) +let has_record ~__context ~self = with_lock m (fun () -> Hashtbl.mem table self) let destroy ~__context ~self = with_lock m (fun () -> Hashtbl.remove table self) let local_session_hook ~__context ~session_id = - try - ignore (get_record ~__context ~self:session_id) ; - true - with _ -> false + has_record ~__context ~self:session_id diff --git a/ocaml/xapi/xapi_local_session.mli b/ocaml/xapi/xapi_local_session.mli index ca8c1810018..8e7c4d31bc9 100644 --- a/ocaml/xapi/xapi_local_session.mli +++ b/ocaml/xapi/xapi_local_session.mli @@ -19,8 +19,6 @@ val get_all : __context:Context.t -> API.ref_session list val create : __context:Context.t -> pool:bool -> API.ref_session -val get_record : __context:Context.t -> self:API.ref_session -> t - val destroy : __context:Context.t -> self:API.ref_session -> unit val local_session_hook : diff --git a/ocaml/xapi/xapi_message.ml b/ocaml/xapi/xapi_message.ml index 90f75943f6a..4c08648dc66 100644 --- a/ocaml/xapi/xapi_message.ml +++ b/ocaml/xapi/xapi_message.ml @@ -28,7 +28,7 @@ *) module Date = Clock.Date -module Encodings = Xapi_stdext_encodings.Encodings +module Encodings = Xapi_stdext_encodings module Listext = Xapi_stdext_std.Listext module Pervasiveext = Xapi_stdext_pervasives.Pervasiveext module Unixext = Xapi_stdext_unix.Unixext @@ -414,7 +414,7 @@ let create ~__context ~name ~priority ~cls ~obj_uuid ~body = debug "Message.create %s %Ld %s %s" name priority (Record_util.cls_to_string cls) obj_uuid ; - if not (Encodings.UTF8_XML.is_valid body) then + if not (Encodings.Utf8.is_valid body) then raise (Api_errors.Server_error (Api_errors.invalid_value, ["UTF8 expected"])) ; if not (check_uuid ~__context ~cls ~uuid:obj_uuid) then raise @@ -730,8 +730,54 @@ let get_record ~__context ~self = let get_all_records ~__context = get_real message_dir (fun _ -> true) 0.0 -let get_all_records_where ~__context ~expr:_ = - get_real message_dir (fun _ -> true) 0.0 +let get_all_records_where ~__context ~expr = + let open Xapi_database in + let expr = Db_filter.expr_of_string expr in + let eval_val msg expr = + match expr with + | Db_filter_types.Literal x -> + x + | Db_filter_types.Field x -> ( + match x with + | "name" -> + msg.API.message_name + | "uuid" -> + msg.API.message_uuid + | "priority" -> + Int64.to_string msg.API.message_priority + | "cls" -> + Record_util.cls_to_string msg.API.message_cls + | "obj_uuid" -> + msg.API.message_obj_uuid + | "timestamp" -> + Date.to_rfc3339 msg.API.message_timestamp + | "body" -> + msg.API.message_body + | any_other_key -> + raise (Db_exn.DBCache_NotFound ("missing field", any_other_key, "")) + ) + in + let eval_expr (lookup_val : API.message_t -> Db_filter_types._val -> string) + (msg : API.message_t) = + let lookup_val = lookup_val msg in + let compare _a _b = lookup_val _a = lookup_val _b in + let rec f = function + | Db_filter_types.True -> + true + | Db_filter_types.False -> + false + | Db_filter_types.Not x -> + not (f x) + | Db_filter_types.And (a, b) -> + f a && f b + | Db_filter_types.Eq (_a, _b) -> + compare _a _b + | Db_filter_types.Or (a, b) -> + f a || f b + in + f expr + in + get_real message_dir (eval_expr eval_val) 0.0 let repopulate_cache () = with_lock in_memory_cache_mutex (fun () -> diff --git a/ocaml/xapi/xapi_observer.ml b/ocaml/xapi/xapi_observer.ml index 2c1fcd81312..073e920cba2 100644 --- a/ocaml/xapi/xapi_observer.ml +++ b/ocaml/xapi/xapi_observer.ml @@ -48,10 +48,14 @@ module type ObserverInterface = sig val set_export_interval : __context:Context.t -> interval:float -> unit + val set_export_chunk_size : __context:Context.t -> size:int -> unit + val set_max_spans : __context:Context.t -> spans:int -> unit val set_max_traces : __context:Context.t -> traces:int -> unit + val set_max_depth : __context:Context.t -> depth:int -> unit + val set_max_file_size : __context:Context.t -> file_size:int -> unit val set_host_id : __context:Context.t -> host_id:string -> unit @@ -61,56 +65,64 @@ end module Observer : ObserverInterface = struct let create ~__context ~uuid ~name_label ~attributes ~endpoints ~enabled = - debug "Observer.create %s" uuid ; + debug "xapi Observer.create %s" uuid ; Tracing.TracerProvider.create ~uuid ~name_label ~attributes ~endpoints ~enabled let destroy ~__context ~uuid = - debug "Observer.destroy %s" uuid ; + debug "xapi Observer.destroy %s" uuid ; Tracing.TracerProvider.destroy ~uuid let set_enabled ~__context ~uuid ~enabled = - debug "Observer.set_enabled %s" uuid ; + debug "xapi Observer.set_enabled %s" uuid ; Tracing.TracerProvider.set ~uuid ~enabled () let set_attributes ~__context ~uuid ~attributes = - debug "Observer.set_attributes %s" uuid ; + debug "xapi Observer.set_attributes %s" uuid ; Tracing.TracerProvider.set ~uuid ~attributes () let set_endpoints ~__context ~uuid ~endpoints = - debug "Observer.set_endpoints %s" uuid ; + debug "xapi Observer.set_endpoints %s" uuid ; Tracing.TracerProvider.set ~uuid ~endpoints () let init ~__context = - debug "Observer.init" ; + debug "xapi Observer.init" ; ignore @@ Tracing_export.main () let set_trace_log_dir ~__context ~dir = - debug "Observer.set_trace_log_dir" ; + debug "xapi Observer.set_trace_log_dir" ; Tracing_export.Destination.File.set_trace_log_dir dir let set_export_interval ~__context ~interval = - debug "Observer.set_export_interval" ; + debug "xapi Observer.set_export_interval" ; Tracing_export.set_export_interval interval + let set_export_chunk_size ~__context ~size = + debug "xapi Observer.set_export_chunk_size" ; + Tracing_export.set_export_chunk_size size + let set_max_spans ~__context ~spans = - debug "Observer.set_max_spans" ; + debug "xapi Observer.set_max_spans" ; Tracing.Spans.set_max_spans spans let set_max_traces ~__context ~traces = - debug "Observer.set_max_traces" ; + debug "xapi Observer.set_max_traces" ; Tracing.Spans.set_max_traces traces + let set_max_depth ~__context ~depth = + debug "xapi Observer.set_max_depth" ; + Tracing.Spans.set_max_depth depth + let set_max_file_size ~__context ~file_size = - debug "Observer.set_max_file_size" ; + debug "xapi Observer.set_max_file_size" ; Tracing_export.Destination.File.set_max_file_size file_size let set_host_id ~__context ~host_id = - debug "Observer.set_host_id" ; + debug "xapi Observer.set_host_id" ; Tracing_export.set_host_id host_id let set_compress_tracing_files ~__context ~enabled = - debug "Observer.set_compress_tracing_files" ; + debug "xapi Observer.set_compress_tracing_files" ; Tracing_export.Destination.File.set_compress_tracing_files enabled end @@ -142,79 +154,91 @@ module Xapi_cluster = struct module Observer = struct let create ~__context ~uuid ~name_label ~attributes ~endpoints ~enabled = - debug "Observer.create %s" uuid ; + debug "xapi_cluster Observer.create %s" uuid ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.create dbg uuid name_label attributes endpoints enabled let destroy ~__context ~uuid = - debug "Observer.destroy %s" uuid ; + debug "xapi_cluster Observer.destroy %s" uuid ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.destroy dbg uuid let set_enabled ~__context ~uuid ~enabled = - debug "Observer.set_enabled %s" uuid ; + debug "xapi_cluster Observer.set_enabled %s" uuid ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_enabled dbg uuid enabled let set_attributes ~__context ~uuid ~attributes = - debug "Observer.set_attributes %s" uuid ; + debug "xapi_cluster Observer.set_attributes %s" uuid ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_attributes dbg uuid attributes let set_endpoints ~__context ~uuid ~endpoints = - debug "Observer.set_endpoints %s" uuid ; + debug "xapi_cluster Observer.set_endpoints %s" uuid ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_endpoints dbg uuid endpoints let init ~__context = - debug "Observer.init" ; + debug "xapi_cluster Observer.init" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.init dbg let set_trace_log_dir ~__context ~dir = - debug "Observer.set_trace_log_dir" ; + debug "xapi_cluster Observer.set_trace_log_dir" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_trace_log_dir dbg dir let set_export_interval ~__context ~interval = - debug "Observer.set_export_interval" ; + debug "xapi_cluster Observer.set_export_interval" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_export_interval dbg interval + let set_export_chunk_size ~__context ~size = + debug "xapi_cluster Observer.set_export_chunk_size" ; + let module S = (val local_client ~__context : XAPI_CLUSTER) in + let dbg = Context.string_of_task __context in + S.Observer.set_export_chunk_size dbg size + let set_max_spans ~__context ~spans = - debug "Observer.set_max_spans" ; + debug "xapi_cluster Observer.set_max_spans" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_max_spans dbg spans let set_max_traces ~__context ~traces = - debug "Observer.set_max_traces" ; + debug "xapi_cluster Observer.set_max_traces" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_max_traces dbg traces + let set_max_depth ~__context ~depth = + debug "xapi_cluster Observer.set_max_depth" ; + let module S = (val local_client ~__context : XAPI_CLUSTER) in + let dbg = Context.string_of_task __context in + S.Observer.set_max_depth dbg depth + let set_max_file_size ~__context ~file_size = - debug "Observer.set_max_file_size" ; + debug "xapi_cluster Observer.set_max_file_size" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_max_file_size dbg file_size let set_host_id ~__context ~host_id = - debug "Observer.set_host_id" ; + debug "xapi_cluster Observer.set_host_id" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_host_id dbg host_id let set_compress_tracing_files ~__context ~enabled = - debug "Observer.set_compress_tracing_files" ; + debug "xapi_cluster Observer.set_compress_tracing_files" ; let module S = (val local_client ~__context : XAPI_CLUSTER) in let dbg = Context.string_of_task __context in S.Observer.set_compress_tracing_files dbg enabled @@ -331,37 +355,53 @@ module Dom0ObserverConfig (ObserverComponent : OBSERVER_COMPONENT) : let create ~__context ~uuid ~name_label:_ ~attributes:_ ~endpoints:_ ~enabled:_ = + debug "%s config Observer.create" (to_string ObserverComponent.component) ; let observer = Db.Observer.get_by_uuid ~__context ~uuid in update_config ~__context ~observer ~uuid - let destroy ~__context ~uuid = remove_config ~uuid + let destroy ~__context ~uuid = + debug "%s config Observer.destroy" (to_string ObserverComponent.component) ; + remove_config ~uuid let set_enabled ~__context ~uuid ~enabled:_ = + debug "%s config Observer.set_enabled" + (to_string ObserverComponent.component) ; let observer = Db.Observer.get_by_uuid ~__context ~uuid in update_config ~__context ~observer ~uuid let set_attributes ~__context ~uuid ~attributes:_ = + debug "%s config Observer.set_attributes" + (to_string ObserverComponent.component) ; let observer = Db.Observer.get_by_uuid ~__context ~uuid in update_config ~__context ~observer ~uuid let set_endpoints ~__context ~uuid ~endpoints:_ = + debug "%s config Observer.set_endpoints" + (to_string ObserverComponent.component) ; let observer = Db.Observer.get_by_uuid ~__context ~uuid in update_config ~__context ~observer ~uuid let init ~__context = + debug "%s config Observer.init" (to_string ObserverComponent.component) ; let observer_all = Db.Observer.get_all ~__context in update_all_configs ~__context ~observer_all let set_trace_log_dir ~__context ~dir:_ = + debug "%s config Observer.set_trace_log_dir" + (to_string ObserverComponent.component) ; let observer_all = Db.Observer.get_all ~__context in update_all_configs ~__context ~observer_all let set_export_interval ~__context:_ ~interval:_ = () + let set_export_chunk_size ~__context:_ ~size:_ = () + let set_max_spans ~__context:_ ~spans:_ = () let set_max_traces ~__context:_ ~traces:_ = () + let set_max_depth ~__context:_ ~depth:_ = () + let set_max_file_size ~__context:_ ~file_size:_ = () let set_host_id ~__context:_ ~host_id:_ = () @@ -371,6 +411,30 @@ end module SMObserverConfig = Dom0ObserverConfig (struct let component = SMApi end) +module SMObserver = struct + include SMObserverConfig + open Observer_helpers + + let create ~__context ~uuid ~name_label ~attributes ~endpoints ~enabled = + debug "SMObserver Observer.create %s" uuid ; + SMObserverConfig.create ~__context ~uuid ~name_label ~attributes ~endpoints + ~enabled ; + let dbg = Context.string_of_task __context in + Client.create dbg uuid name_label attributes endpoints enabled + + let destroy ~__context ~uuid = + debug "SMObserver Observer.destroy %s" uuid ; + SMObserverConfig.destroy ~__context ~uuid ; + let dbg = Context.string_of_task __context in + Client.destroy dbg uuid + + let set_enabled ~__context ~uuid ~enabled = + debug "SMObserver Observer.set_enabled %s" uuid ; + SMObserverConfig.set_enabled ~__context ~uuid ~enabled ; + let dbg = Context.string_of_task __context in + Client.set_enabled dbg uuid enabled +end + let get_forwarder c = let module Forwarder = ( val match c with @@ -381,7 +445,7 @@ let get_forwarder c = | Xapi_clusterd -> (module Xapi_cluster.Observer) | SMApi -> - (module SMObserverConfig) + (module SMObserver) : ObserverInterface ) in @@ -506,6 +570,10 @@ let set_export_interval ~__context interval component = let module Forwarder = (val get_forwarder component : ObserverInterface) in Forwarder.set_export_interval ~__context ~interval +let set_export_chunk_size ~__context size component = + let module Forwarder = (val get_forwarder component : ObserverInterface) in + Forwarder.set_export_chunk_size ~__context ~size + let set_max_spans ~__context spans component = let module Forwarder = (val get_forwarder component : ObserverInterface) in Forwarder.set_max_spans ~__context ~spans @@ -514,6 +582,10 @@ let set_max_traces ~__context traces component = let module Forwarder = (val get_forwarder component : ObserverInterface) in Forwarder.set_max_traces ~__context ~traces +let set_max_depth ~__context depth component = + let module Forwarder = (val get_forwarder component : ObserverInterface) in + Forwarder.set_max_depth ~__context ~depth + let set_max_file_size ~__context file_size component = let module Forwarder = (val get_forwarder component : ObserverInterface) in Forwarder.set_max_file_size ~__context ~file_size @@ -549,8 +621,10 @@ let initialise_observer_component ~__context component = let initialise_observer_meta ~__context component = set_trace_log_dir ~__context !Xapi_globs.trace_log_dir component ; set_export_interval ~__context !Xapi_globs.export_interval component ; + set_export_chunk_size ~__context !Xapi_globs.export_chunk_size component ; set_max_spans ~__context !Xapi_globs.max_spans component ; set_max_traces ~__context !Xapi_globs.max_traces component ; + set_max_depth ~__context !Xapi_globs.max_span_depth component ; set_max_file_size ~__context !Xapi_globs.max_observer_file_size component ; set_host_id ~__context (Helpers.get_localhost_uuid ()) component ; set_compress_tracing_files ~__context @@ -571,6 +645,14 @@ let initialise ~__context = |> observed_components_of |> List.iter (initialise_observer_component ~__context) ) ; + (* If SMApi is now experimental, manually remove the config as there is no observer to do it *) + if + Xapi_globs.( + StringSet.mem (to_string SMApi) !observer_experimental_components + ) + then + Xapi_stdext_unix.Unixext.rm_rec (dir_name_of_component SMApi) ; + Tracing_export.set_service_name "xapi" let set_hosts ~__context ~self ~value = diff --git a/ocaml/xapi/xapi_observer_components.ml b/ocaml/xapi/xapi_observer_components.ml index e7803189151..18dd8bdd25d 100644 --- a/ocaml/xapi/xapi_observer_components.ml +++ b/ocaml/xapi/xapi_observer_components.ml @@ -91,13 +91,13 @@ let is_component_enabled ~component = ) observers with e -> - D.log_backtrace () ; + D.log_backtrace e ; D.warn "is_component_enabled(%s) inner got exception: %s" (to_string component) (Printexc.to_string e) ; false ) with e -> - D.log_backtrace () ; + D.log_backtrace e ; D.warn "is_component_enabled(%s) got exception: %s" (to_string component) (Printexc.to_string e) ; false diff --git a/ocaml/xapi/xapi_pbd.ml b/ocaml/xapi/xapi_pbd.ml index a9625dc3c62..86d9b7fabc0 100644 --- a/ocaml/xapi/xapi_pbd.ml +++ b/ocaml/xapi/xapi_pbd.ml @@ -114,13 +114,18 @@ let abort_if_storage_attached_to_protected_vms ~__context ~self = (fun vbd -> let vdi = Db.VBD.get_VDI ~__context ~self:vbd in if List.mem vdi vdis then ( - warn - "PBD.unplug will make protected VM %s not agile since it has a \ - VBD attached to VDI %s" - (Ref.string_of vm_ref) (Ref.string_of vdi) ; + let vm = Ref.string_of vm_ref in + let pbd = Ref.string_of self in + let sr = Ref.string_of sr in + info + "The protected VM %s must remain agile and blocked the \ + operation. The PBD %s of must be plugged to ensure this. This \ + happened because the SR %s is used by both the VM and the \ + PBD." + vm pbd sr ; raise - (Api_errors.Server_error - (Api_errors.ha_operation_would_break_failover_plan, []) + Api_errors.( + Server_error (ha_constraint_violation_sr_not_shared, [sr]) ) ) ) diff --git a/ocaml/xapi/xapi_periodic_scheduler_init.ml b/ocaml/xapi/xapi_periodic_scheduler_init.ml index 1bd13d5f6d6..e238e9daa62 100644 --- a/ocaml/xapi/xapi_periodic_scheduler_init.ml +++ b/ocaml/xapi/xapi_periodic_scheduler_init.ml @@ -13,6 +13,8 @@ *) (** Periodic scheduler for background tasks. *) +module Date = Clock.Date + module D = Debug.Make (struct let name = "backgroundscheduler" end) open D @@ -73,6 +75,31 @@ let register ~__context = (fun __context -> Xapi_subject.update_all_subjects ~__context ) in + let sync_ssh_status ~__context = + let self = Helpers.get_localhost ~__context in + let timeout = Db.Host.get_ssh_enabled_timeout ~__context ~self in + + if timeout > 0L then + let expiry_time = + Db.Host.get_ssh_expiry ~__context ~self + |> Date.to_unix_time + |> Int64.of_float + in + let current_time = Unix.time () |> Int64.of_float in + + if Int64.compare expiry_time current_time > 0 then + let remaining = Int64.sub expiry_time current_time in + Xapi_host.schedule_disable_ssh_job ~__context ~self ~timeout:remaining + ~auto_mode:true + (* Handle the case where XAPI is not active when the SSH timeout expires. + This typically occurs when XAPI has been down for an extended period that + exceeds the timeout duration. In this scenario, we need to enable SSH auto + mode to ensure the SSH service remains continuously available. *) + else if Fe_systemctl.is_active ~service:!Xapi_globs.ssh_service then ( + Xapi_host.disable_ssh ~__context ~self ; + Xapi_host.set_ssh_auto_mode ~__context ~self ~value:true + ) + in let update_all_subjects_delay = 10.0 in (* initial delay = 10 seconds *) if master then @@ -133,6 +160,7 @@ let register ~__context = "Check stunnel cache expiry" (Xapi_stdext_threads_scheduler.Scheduler.Periodic stunnel_period) stunnel_period Stunnel_cache.gc ; + sync_ssh_status ~__context ; if master && Db.Pool.get_update_sync_enabled ~__context diff --git a/ocaml/xapi/xapi_pif.ml b/ocaml/xapi/xapi_pif.ml index 163e1f31d57..f5584fa8634 100644 --- a/ocaml/xapi/xapi_pif.ml +++ b/ocaml/xapi/xapi_pif.ml @@ -271,13 +271,17 @@ let abort_if_network_attached_to_protected_vms ~__context ~self = List.iter (fun vm -> if Helpers.is_xha_protected ~__context ~self:vm then ( - warn - "PIF.unplug will make protected VM %s not agile since it has a VIF \ - attached to network %s" - (Ref.string_of vm) (Ref.string_of net) ; + let vm = Ref.string_of vm in + let pif = Ref.string_of self in + let net = Ref.string_of net in + info + "The protected VM %s must remain agile and blocked the operation. \ + PIF %s must be plugged this. This happened because network %s is \ + used by both the VM and the PIF" + vm pif net ; raise - (Api_errors.Server_error - (Api_errors.ha_operation_would_break_failover_plan, []) + Api_errors.( + Server_error (ha_constraint_violation_network_not_shared, [net]) ) ) ) @@ -926,17 +930,25 @@ let assert_cluster_host_operation_not_in_progress ~__context = match Db.Cluster.get_all ~__context with | [] -> () - | cluster :: _ -> - let ops = - Db.Cluster.get_current_operations ~__context ~self:cluster - |> List.map snd - in - if List.mem `enable ops || List.mem `add ops then - raise - Api_errors.( - Server_error - (other_operation_in_progress, ["Cluster"; Ref.string_of cluster]) - ) + | cluster :: _ -> ( + let ops = Db.Cluster.get_current_operations ~__context ~self:cluster in + match List.find_opt (fun (_, op) -> op = `enable || op = `add) ops with + | Some (op_ref, op_type) -> + raise + Api_errors.( + Server_error + ( other_operation_in_progress + , [ + "Cluster" + ; Ref.string_of cluster + ; API.cluster_operation_to_string op_type + ; op_ref + ] + ) + ) + | None -> + () + ) (* Block allowing unplug if - a cluster host is enabled on this PIF @@ -1126,7 +1138,7 @@ let start_of_day_best_effort_bring_up ~__context () = debug "Configured network backend: %s" (Network_interface.string_of_kind (Net.Bridge.get_kind dbg ())) ; (* Clear the state of the network daemon, before refreshing it by plugging - * the most important PIFs (see above). *) + * the most important PIFs (see above). *) Net.clear_state () ; List.iter (fun (pif, pifr) -> diff --git a/ocaml/xapi/xapi_pool.ml b/ocaml/xapi/xapi_pool.ml index 8859bee1c45..771f3a68243 100644 --- a/ocaml/xapi/xapi_pool.ml +++ b/ocaml/xapi/xapi_pool.ml @@ -112,6 +112,95 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = ) ) in + let one_ip_configured_on_joining_cluster_network () = + let one_ip_configured_on_joining_cluster_network' cluster_host = + match Client.Cluster_host.get_PIF ~rpc ~session_id ~self:cluster_host with + | pif when pif = Ref.null -> + () + | pif -> ( + match + ( Client.PIF.get_VLAN ~rpc ~session_id ~self:pif + , Client.PIF.get_management ~rpc ~session_id ~self:pif + ) + with + | vlan, false when vlan > 0L -> + error + "Cannot join pool whose clustering is enabled on a \ + non-management VLAN network" ; + raise + (Api_errors.Server_error + ( Api_errors + .pool_joining_pool_cannot_enable_clustering_on_vlan_network + , [Int64.to_string vlan] + ) + ) + | _ -> ( + let clustering_bridges_in_pool = + ( match + Client.PIF.get_bond_master_of ~rpc ~session_id ~self:pif + with + | [] -> + [pif] + | bonds -> + List.concat_map + (fun bond -> + Client.Bond.get_slaves ~rpc ~session_id ~self:bond + ) + bonds + ) + |> List.map (fun self -> + Client.PIF.get_network ~rpc ~session_id ~self + ) + |> List.map (fun self -> + Client.Network.get_bridge ~rpc ~session_id ~self + ) + in + match + Db.Host.get_PIFs ~__context + ~self:(Helpers.get_localhost ~__context) + |> List.filter (fun p -> + List.exists + (fun b -> + let network = Db.PIF.get_network ~__context ~self:p in + Db.Network.get_bridge ~__context ~self:network = b + ) + clustering_bridges_in_pool + && Db.PIF.get_IP ~__context ~self:p <> "" + ) + with + | [_] -> + () + | _ -> + error + "Cannot join pool as the joining host needs to have one (and \ + only one) IP address on the network that will be used for \ + clustering." ; + raise + (Api_errors.Server_error + ( Api_errors + .pool_joining_host_must_have_only_one_IP_on_clustering_network + , [] + ) + ) + ) + ) + in + match Client.Cluster_host.get_all ~rpc ~session_id with + | [] -> + () + | ch :: _ -> ( + let cluster = + Client.Cluster_host.get_cluster ~rpc ~session_id ~self:ch + in + match + Client.Cluster.get_pool_auto_join ~rpc ~session_id ~self:cluster + with + | false -> + () + | true -> + one_ip_configured_on_joining_cluster_network' ch + ) + in (* CA-26975: Pool edition MUST match *) let assert_restrictions_match () = let my_edition = @@ -888,6 +977,7 @@ let pre_join_checks ~__context ~rpc ~session_id ~force = assert_management_interface_exists () ; ha_is_not_enable_on_me () ; clustering_is_not_enabled_on_me () ; + one_ip_configured_on_joining_cluster_network () ; ha_is_not_enable_on_the_distant_pool () ; assert_not_joining_myself () ; assert_i_know_of_no_other_hosts () ; @@ -963,6 +1053,11 @@ let rec create_or_get_host_on_master __context rpc session_id (host_ref, host) : ~ssl_legacy:false ~last_software_update:host.API.host_last_software_update ~last_update_hash:host.API.host_last_update_hash + ~ssh_enabled:host.API.host_ssh_enabled + ~ssh_enabled_timeout:host.API.host_ssh_enabled_timeout + ~ssh_expiry:host.API.host_ssh_expiry + ~console_idle_timeout:host.API.host_console_idle_timeout + ~ssh_auto_mode:host.API.host_ssh_auto_mode in (* Copy other-config into newly created host record: *) no_exn @@ -1358,6 +1453,7 @@ let create_or_get_secret_on_master __context rpc session_id (_secret_ref, secret let protect_exn f x = try Some (f x) with e -> + Backtrace.is_important e ; debug "Ignoring exception: %s" (Printexc.to_string e) ; Debug.log_backtrace e (Backtrace.get e) ; None @@ -1555,6 +1651,7 @@ let join_common ~__context ~master_address ~master_username ~master_password ) in + let remote_coordinator = get_master ~rpc ~session_id in (* If management is on a VLAN, then get the Pool master management network bridge before we logout the session *) let pool_master_bridge, mgmt_pif = @@ -1565,7 +1662,7 @@ let join_common ~__context ~master_address ~master_username ~master_password if Db.PIF.get_VLAN_master_of ~__context ~self:my_pif <> Ref.null then let pif = Client.Host.get_management_interface ~rpc ~session_id - ~host:(get_master ~rpc ~session_id) + ~host:remote_coordinator in let network = Client.PIF.get_network ~rpc ~session_id ~self:pif in (Some (Client.Network.get_bridge ~rpc ~session_id ~self:network), my_pif) @@ -1655,8 +1752,44 @@ let join_common ~__context ~master_address ~master_username ~master_password "Unable to set the write the new pool certificates to the disk : %s" (ExnHelper.string_of_exn e) ) ; - Db.Host.set_latest_synced_updates_applied ~__context ~self:me - ~value:`unknown ; + ( try + let ssh_enabled_timeout = + Client.Host.get_ssh_enabled_timeout ~rpc ~session_id + ~self:remote_coordinator + in + let console_idle_timeout = + Client.Host.get_console_idle_timeout ~rpc ~session_id + ~self:remote_coordinator + in + let ssh_auto_mode = + Client.Host.get_ssh_auto_mode ~rpc ~session_id + ~self:remote_coordinator + in + Xapi_host.set_console_idle_timeout ~__context ~self:me + ~value:console_idle_timeout ; + Xapi_host.set_ssh_enabled_timeout ~__context ~self:me + ~value:ssh_enabled_timeout ; + Xapi_host.set_ssh_auto_mode ~__context ~self:me ~value:ssh_auto_mode ; + let ssh_enabled = + Client.Host.get_ssh_enabled ~rpc ~session_id + ~self:remote_coordinator + in + (* As ssh_expiry will be updated by host.enable_ssh and host.disable_ssh, + there is a corner case when the joiner's SSH state will not match SSH + service state in its new coordinator exactly: if the joiner joins when + SSH service has been enabled in the new coordinator, while not timed + out yet, the joiner will start SSH service with timeout + host.ssh_enabled_timeout, which means SSH service in the joiner will + be disabled later than in the new coordinator. *) + match ssh_enabled with + | true -> + Xapi_host.enable_ssh ~__context ~self:me + | false -> + Xapi_host.disable_ssh ~__context ~self:me + with e -> + error "Unable to configure SSH service on local host: %s" + (ExnHelper.string_of_exn e) + ) ; (* this is where we try and sync up as much state as we can with the master. This is "best effort" rather than critical; if we fail part way through this then we carry @@ -2012,6 +2145,25 @@ let eject_self ~__context ~host = control_domains_to_destroy with _ -> () ) ; + ( try + (* Restore console idle timeout *) + Xapi_host.set_console_idle_timeout ~__context ~self:host + ~value:Constants.default_console_idle_timeout ; + (* Restore SSH service to default state *) + Xapi_host.set_ssh_enabled_timeout ~__context ~self:host + ~value:Constants.default_ssh_enabled_timeout ; + Xapi_host.set_ssh_auto_mode ~__context ~self:host + ~value:!Xapi_globs.ssh_auto_mode_default ; + match Constants.default_ssh_enabled with + | true -> + Xapi_host.enable_ssh ~__context ~self:host + | false -> + Xapi_host.disable_ssh ~__context ~self:host + with e -> + warn "Caught %s while restoring ssh service. Ignoring" + (Printexc.to_string e) + ) ; + debug "Pool.eject: setting our role to be master" ; Xapi_pool_transition.set_role Pool_role.Master ; debug "Pool.eject: forgetting pool secret" ; @@ -2914,8 +3066,10 @@ let disable_external_auth ~__context ~pool:_ ~config = debug "Failed to disable the external authentication of at least one \ host in the pool" ; - if String.starts_with ~prefix:Api_errors.auth_disable_failed err - then (* tagged exception *) + if + String.starts_with ~prefix:Api_errors.auth_disable_failed err + (* tagged exception *) + then raise (Api_errors.Server_error (Api_errors.pool_auth_prefix ^ err, [Ref.string_of host; msg]) @@ -3974,3 +4128,64 @@ let put_bundle_handler (req : Request.t) s _ = | None -> () ) + +module Ssh = struct + let operate ~__context ~action ~error = + let hosts = Db.Host.get_all ~__context in + Helpers.call_api_functions ~__context (fun rpc session_id -> + let failed_hosts = + List.fold_left + (fun failed_hosts host -> + try + action ~rpc ~session_id ~self:host ; + failed_hosts + with _ -> Ref.string_of host :: failed_hosts + ) + [] hosts + in + match failed_hosts with + | [] -> + () + | _ -> + raise (Api_errors.Server_error (error, failed_hosts)) + ) + + let enable ~__context ~self:_ = + operate ~__context ~action:Client.Host.enable_ssh + ~error:Api_errors.enable_ssh_partially_failed + + let disable ~__context ~self:_ = + operate ~__context ~action:Client.Host.disable_ssh + ~error:Api_errors.disable_ssh_partially_failed + + let set_enabled_timeout ~__context ~self:_ ~value = + operate ~__context + ~action:(fun ~rpc ~session_id ~self -> + Client.Host.set_ssh_enabled_timeout ~rpc ~session_id ~self ~value + ) + ~error:Api_errors.set_ssh_timeout_partially_failed + + let set_console_timeout ~__context ~self:_ ~value = + operate ~__context + ~action:(fun ~rpc ~session_id ~self -> + Client.Host.set_console_idle_timeout ~rpc ~session_id ~self ~value + ) + ~error:Api_errors.set_console_timeout_partially_failed + + let set_ssh_auto_mode ~__context ~self:_ ~value = + operate ~__context + ~action:(fun ~rpc ~session_id ~self -> + Client.Host.set_ssh_auto_mode ~rpc ~session_id ~self ~value + ) + ~error:Api_errors.set_ssh_auto_mode_partially_failed +end + +let enable_ssh = Ssh.enable + +let disable_ssh = Ssh.disable + +let set_ssh_enabled_timeout = Ssh.set_enabled_timeout + +let set_console_idle_timeout = Ssh.set_console_timeout + +let set_ssh_auto_mode = Ssh.set_ssh_auto_mode diff --git a/ocaml/xapi/xapi_pool.mli b/ocaml/xapi/xapi_pool.mli index 9669b717762..dc87e90a18e 100644 --- a/ocaml/xapi/xapi_pool.mli +++ b/ocaml/xapi/xapi_pool.mli @@ -433,3 +433,16 @@ val get_guest_secureboot_readiness : -> API.pool_guest_secureboot_readiness val put_bundle_handler : Http.Request.t -> Unix.file_descr -> 'a -> unit + +val enable_ssh : __context:Context.t -> self:API.ref_pool -> unit + +val disable_ssh : __context:Context.t -> self:API.ref_pool -> unit + +val set_ssh_enabled_timeout : + __context:Context.t -> self:API.ref_pool -> value:int64 -> unit + +val set_console_idle_timeout : + __context:Context.t -> self:API.ref_pool -> value:int64 -> unit + +val set_ssh_auto_mode : + __context:Context.t -> self:API.ref_pool -> value:bool -> unit diff --git a/ocaml/xapi/xapi_pool_helpers.ml b/ocaml/xapi/xapi_pool_helpers.ml index 14f4c37d030..bdd4e0454b1 100644 --- a/ocaml/xapi/xapi_pool_helpers.ml +++ b/ocaml/xapi/xapi_pool_helpers.ml @@ -99,7 +99,7 @@ type validity = Unknown | Allowed | Disallowed of string * string list let compute_valid_operations ~__context record pool : API.pool_allowed_operations -> validity = let ref = Ref.string_of pool in - let current_ops = List.map snd record.Db_actions.pool_current_operations in + let current_ops = record.Db_actions.pool_current_operations in let table = (Hashtbl.create 32 : (all_operations, validity) Hashtbl.t) in let set_validity = Hashtbl.replace table in (* Start by assuming all operations are allowed. *) @@ -118,30 +118,45 @@ let compute_valid_operations ~__context record pool : in List.iter populate ops in - let other_operation_in_progress = - (Api_errors.other_operation_in_progress, [Datamodel_common._pool; ref]) + let other_operation_in_progress waiting_op = + let additional_info = + match waiting_op with + | Some (op_ref, op_type) -> + [API.pool_allowed_operations_to_string op_type; op_ref] + | _ -> + [] + in + ( Api_errors.other_operation_in_progress + , [Datamodel_common._pool; ref] @ additional_info + ) + in + let is_current_op op = + List.exists (fun (_, current_op) -> op = current_op) current_ops in - let is_current_op = Fun.flip List.mem current_ops in let blocking = List.find_opt (fun (op, _) -> is_current_op op) blocking_ops_table in - let waiting = List.find_opt is_current_op waiting_ops in + let waiting = + List.find_opt + (fun (_, current_op) -> List.mem current_op waiting_ops) + current_ops + in ( match (blocking, waiting) with - | Some (_, reason), _ -> + | Some (_, reason), waiting_current_op -> (* Mark all potentially blocking operations as invalid due to the specific blocking operation's "in progress" error. *) set_errors blocking_ops (reason, []) ; (* Mark all waiting operations as invalid for the generic "OTHER_OPERATION_IN_PROGRESS" reason. *) - set_errors waiting_ops other_operation_in_progress + set_errors waiting_ops (other_operation_in_progress waiting_current_op) (* Note that all_operations ⊆ blocking_ops ∪ waiting_ops, so this invalidates all operations (with the reason partitioned between whether the operation is blocking or waiting). *) - | None, Some _ -> + | None, (Some _ as waiting_current_op) -> (* If there's no blocking operation in current operations, but there is a waiting operation, invalidate all operations for the generic reason. Again, this covers every operation. *) - set_errors all_operations other_operation_in_progress + set_errors all_operations (other_operation_in_progress waiting_current_op) | None, None -> ( (* If there's no blocking or waiting operation in current operations (i.e. current operations is empty), we can report diff --git a/ocaml/xapi/xapi_services.ml b/ocaml/xapi/xapi_services.ml index a413e4c3630..ca9e3d729ca 100644 --- a/ocaml/xapi/xapi_services.ml +++ b/ocaml/xapi/xapi_services.ml @@ -196,7 +196,7 @@ let put_handler (req : Http.Request.t) s _ = http_proxy_to_plugin req s name | [""; services; "SM"; "data"; sr; vdi] when services = _services -> let vdi, _ = - Storage_smapiv1.find_vdi ~__context + Storage_utils.find_vdi ~__context (Storage_interface.Sr.of_string sr) (Storage_interface.Vdi.of_string vdi) in @@ -207,8 +207,9 @@ let put_handler (req : Http.Request.t) s _ = -> Storage_migrate.nbd_handler req s ~vm sr vdi dp | [""; services; "SM"; "nbdproxy"; vm; sr; vdi; dp] + | [""; services; "SM"; "nbdproxy"; "import"; vm; sr; vdi; dp] when services = _services -> - Storage_migrate.nbd_proxy req s vm sr vdi dp + Storage_migrate.import_nbd_proxy req s vm sr vdi dp | _ -> Http_svr.headers s (Http.http_404_missing ~version:"1.0" ()) ; req.Http.Request.close <- true @@ -254,6 +255,7 @@ let get_handler (req : Http.Request.t) s _ = ; features= List.map (fun x -> path [_services; x]) [_SM] ; configuration= [] ; required_cluster_stack= [] + ; smapi_version= SMAPIv2 } in respond req (Storage_interface.(rpc_of query_result) q) s diff --git a/ocaml/xapi/xapi_session.ml b/ocaml/xapi/xapi_session.ml index f7fcfdac7e9..e4b5a495a5f 100644 --- a/ocaml/xapi/xapi_session.ml +++ b/ocaml/xapi/xapi_session.ml @@ -32,6 +32,7 @@ module Listext = Xapi_stdext_std.Listext open Client open Auth_signature open Extauth +module SessionValidateMap = Map.Make (String) module AuthFail : sig (* stats are reset each time you query, so if there hasn't @@ -420,18 +421,18 @@ let destroy_db_session ~__context ~self = (* CP-703: ensure that activate sessions are invalidated in a bounded time *) (* in response to external authentication/directory services updates, such as *) (* e.g. group membership changes, or even account disabled *) -let revalidate_external_session ~__context ~session = +let revalidate_external_session ~__context acc session = Context.with_tracing ~__context __FUNCTION__ @@ fun __context -> try (* guard: we only want to revalidate external sessions, where is_local_superuser is false *) (* Neither do we want to revalidate the special read-only external database sessions, since they can exist independent of external authentication. *) + (* 1. is the external authentication disabled in the pool? *) if not (Db.Session.get_is_local_superuser ~__context ~self:session || Xapi_database.Db_backend.is_session_registered (Ref.string_of session) ) - then ( - (* 1. is the external authentication disabled in the pool? *) + then let master = Helpers.get_master ~__context in let auth_type = Db.Host.get_external_auth_type ~__context ~self:master in if auth_type = "" then ( @@ -442,45 +443,54 @@ let revalidate_external_session ~__context ~session = (trackid session) in debug "%s" msg ; - destroy_db_session ~__context ~self:session + destroy_db_session ~__context ~self:session ; + acc ) else (* otherwise, we try to revalidate it against the external authentication service *) let session_lifespan = 60.0 *. 30.0 in (* allowed session lifespan = 30 minutes *) let random_lifespan = Random.float 60.0 *. 10.0 in - (* extra random (up to 10min) lifespan to spread access to external directory *) - (* 2. has the external session expired/does it need revalidation? *) let session_last_validation_time = Date.to_unix_time (Db.Session.get_validation_time ~__context ~self:session) in let now = Date.now () in - let session_needs_revalidation = + let session_timed_out = Date.to_unix_time now > session_last_validation_time +. session_lifespan +. random_lifespan in - if session_needs_revalidation then ( + + (* extra random (up to 10min) lifespan to spread access to external directory *) + let authenticated_user_sid = + Db.Session.get_auth_user_sid ~__context ~self:session + in + let validate_with_memo acc f = + match SessionValidateMap.find_opt authenticated_user_sid acc with + | None -> + f acc + | Some false -> + debug "Destory session %s as previous check for user %s not pass" + (trackid session) authenticated_user_sid ; + destroy_db_session ~__context ~self:session ; + acc + | Some true -> + debug "Skip check session %s as previous check for user %s pass" + (trackid session) authenticated_user_sid ; + acc + in + + if session_timed_out then ( (* if so, then:*) + validate_with_memo acc @@ fun acc -> debug "session %s needs revalidation" (trackid session) ; - let authenticated_user_sid = - Db.Session.get_auth_user_sid ~__context ~self:session - in (* 2a. revalidate external authentication *) (* CP-827: if the user was suspended (disabled,expired,locked-out), then we must destroy the session *) let suspended, _ = - is_subject_suspended ~__context ~cache:true authenticated_user_sid - in - let suspended = - if suspended then - is_subject_suspended ~__context ~cache:false - authenticated_user_sid - |> fst - else - suspended + is_subject_suspended ~__context ~cache:false authenticated_user_sid in if suspended then ( debug @@ -488,7 +498,8 @@ let revalidate_external_session ~__context ~session = %s" authenticated_user_sid (trackid session) ; (* we must destroy the session in this case *) - destroy_db_session ~__context ~self:session + destroy_db_session ~__context ~self:session ; + SessionValidateMap.add authenticated_user_sid false acc ) else try (* if the user is not in the external directory service anymore, this call raises Not_found *) @@ -525,7 +536,8 @@ let revalidate_external_session ~__context ~session = in debug "%s" msg ; (* we must destroy the session in this case *) - destroy_db_session ~__context ~self:session + destroy_db_session ~__context ~self:session ; + SessionValidateMap.add authenticated_user_sid false acc ) else ( (* non-empty intersection: externally-authenticated subject still has login rights in the pool *) @@ -552,7 +564,9 @@ let revalidate_external_session ~__context ~session = ~value:subject_in_intersection ; debug "updated subject for session %s, sid %s " (trackid session) authenticated_user_sid - ) + ) ; + debug "end revalidation of session %s " (trackid session) ; + SessionValidateMap.add authenticated_user_sid true acc with Not_found -> (* subject ref for intersection's sid does not exist in our metadata!!! *) (* this should never happen, it's an internal metadata inconsistency between steps 2b and 2c *) @@ -564,7 +578,8 @@ let revalidate_external_session ~__context ~session = in debug "%s" msg ; (* we must destroy the session in this case *) - destroy_db_session ~__context ~self:session + destroy_db_session ~__context ~self:session ; + SessionValidateMap.add authenticated_user_sid false acc ) with Auth_signature.Subject_cannot_be_resolved | Not_found -> (* user was not found in external directory in order to obtain group membership *) @@ -577,15 +592,18 @@ let revalidate_external_session ~__context ~session = in debug "%s" msg ; (* user is not in the external directory anymore: we must destroy the session in this case *) - destroy_db_session ~__context ~self:session - ) ; - debug "end revalidation of session %s " (trackid session) - ) + destroy_db_session ~__context ~self:session ; + SessionValidateMap.add authenticated_user_sid false acc + ) else + acc + else + acc with e -> (*unexpected exception: we absorb it and print out a debug line *) debug "Unexpected exception while revalidating session %s: %s" (trackid session) - (ExnHelper.string_of_exn e) + (ExnHelper.string_of_exn e) ; + acc (* CP-703: ensure that activate sessions are invalidated in a bounded time *) (* in response to external authentication/directory services updates, such as *) @@ -595,21 +613,18 @@ let revalidate_all_sessions ~__context = try debug "revalidating all external sessions in the local host" ; (* obtain all sessions in the pool *) - let sessions = Db.Session.get_all ~__context in + Db.Session.get_all ~__context (* filter out those sessions where is_local_superuser or client_certificate is true *) (* we only want to revalidate the sessions created using the external authentication service *) - let external_sessions = - List.filter - (fun session -> - (not (Db.Session.get_is_local_superuser ~__context ~self:session)) - && not (Db.Session.get_client_certificate ~__context ~self:session) - ) - sessions - in - (* revalidate each external session *) - List.iter - (fun session -> revalidate_external_session ~__context ~session) - external_sessions + |> List.filter (fun session -> + (not (Db.Session.get_is_local_superuser ~__context ~self:session)) + && not (Db.Session.get_client_certificate ~__context ~self:session) + ) + |> (* revalidate each external session *) + List.fold_left + (revalidate_external_session ~__context) + SessionValidateMap.empty + |> ignore with e -> (*unexpected exception: we absorb it and print out a debug line *) debug "Unexpected exception while revalidating external sessions: %s" @@ -801,12 +816,12 @@ module Caching = struct and type password = string and type session = external_auth_result - let () = Mirage_crypto_rng_unix.initialize (module Mirage_crypto_rng.Fortuna) + let () = Mirage_crypto_rng_unix.use_default () let create_salt () = (* Creates a Cstruct of length 8. *) let data = Mirage_crypto_rng.generate 8 in - let bytes = Cstruct.to_bytes data in + let bytes = Bytes.of_string data in (* Encode the salt as a hex string. Each byte becomes 2 hexadecimal digits, so the length is 16 (the maximum for crypt_r). *) @@ -1569,5 +1584,5 @@ let create_from_db_file ~__context ~filename = Xapi_database.Db_xml.From.file (Datamodel_schema.of_datamodel ()) filename |> Xapi_database.Db_upgrade.generic_database_upgrade in - let db_ref = Some (Xapi_database.Db_ref.in_memory (ref (ref db))) in + let db_ref = Some (Xapi_database.Db_ref.in_memory (Atomic.make db)) in create_readonly_session ~__context ~uname:"db-from-file" ~db_ref diff --git a/ocaml/xapi/xapi_sr.ml b/ocaml/xapi/xapi_sr.ml index d09490b9521..8261757bb5e 100644 --- a/ocaml/xapi/xapi_sr.ml +++ b/ocaml/xapi/xapi_sr.ml @@ -757,6 +757,11 @@ let update_vdis ~__context ~sr db_vdis vdi_infos = (* Perform a scan of this locally-attached SR *) let scan ~__context ~sr = + let module RefSet = Set.Make (struct + type t = [`VDI] Ref.t + + let compare = Ref.compare + end) in let open Storage_access in let task = Context.get_task_id __context in let module C = Storage_interface.StorageAPI (Idl.Exn.GenClient (struct @@ -781,9 +786,21 @@ let scan ~__context ~sr = (* It is sufficient to just compare the refs in two db_vdis, as this is what update_vdis uses to determine what to delete *) let vdis_ref_equal db_vdi1 db_vdi2 = - Listext.List.set_difference (List.map fst db_vdi1) - (List.map fst db_vdi2) - = [] + let refs1 = RefSet.of_list (List.map fst db_vdi1) in + let refs2 = RefSet.of_list (List.map fst db_vdi2) in + if RefSet.equal refs1 refs2 then + true + else + let log_diff label a b = + RefSet.diff a b + |> RefSet.elements + |> List.map Ref.string_of + |> String.concat " " + |> debug "%s: VDIs %s during scan: %s" __FUNCTION__ label + in + log_diff "removed" refs1 refs2 ; + log_diff "added" refs2 refs1 ; + false in let db_vdis_before = find_vdis () in let vs, sr_info = @@ -793,21 +810,13 @@ let scan ~__context ~sr = let db_vdis_after = find_vdis () in if limit > 0 && not (vdis_ref_equal db_vdis_before db_vdis_after) then ( - debug - "%s detected db change while scanning, before scan vdis %s, \ - after scan vdis %s, retry limit left %d" - __FUNCTION__ - (List.map (fun (_, v) -> v.vDI_uuid) db_vdis_before - |> String.concat "," - ) - (List.map (fun (_, v) -> v.vDI_uuid) db_vdis_after - |> String.concat "," - ) - limit ; + debug "%s detected db change while scanning, retry limit left %d" + __FUNCTION__ limit ; (scan_rec [@tailcall]) (limit - 1) ) else if limit = 0 then Helpers.internal_error "SR.scan retry limit exceeded" else ( + debug "%s no change detected, updating VDIs" __FUNCTION__ ; update_vdis ~__context ~sr db_vdis_after vs ; let virtual_allocation = List.fold_left @@ -1071,3 +1080,15 @@ let get_live_hosts ~__context ~sr = Xapi_vm_helpers.assert_can_see_specified_SRs ~__context ~reqd_srs:[sr] ~host in Xapi_vm_helpers.possible_hosts ~__context ~choose_fn () + +let required_api_version_of_sr ~__context ~sr = + let sr_type = Db.SR.get_type ~__context ~self:sr in + let expr = + Xapi_database.Db_filter_types.(Eq (Field "type", Literal sr_type)) + in + match Db.SM.get_records_where ~__context ~expr with + | (_, sm) :: _ -> + Some sm.API.sM_required_api_version + | [] -> + warn "Couldn't find SM with type %s" sr_type ; + None diff --git a/ocaml/xapi/xapi_sr_operations.ml b/ocaml/xapi/xapi_sr_operations.ml index 75a3c695af4..b08a82c20f2 100644 --- a/ocaml/xapi/xapi_sr_operations.ml +++ b/ocaml/xapi/xapi_sr_operations.ml @@ -200,24 +200,35 @@ let valid_operations ~__context ?op record _ref' : table = let check_parallel_ops ~__context _record = let safe_to_parallelise = [`plug] in let current_ops = - Xapi_stdext_std.Listext.List.setify (List.map snd current_ops) + List.sort_uniq + (fun (_ref1, op1) (_ref2, op2) -> compare op1 op2) + current_ops in (* If there are any current operations, all the non_parallelisable operations must definitely be stopped *) - if current_ops <> [] then - set_errors Api_errors.other_operation_in_progress - ["SR"; _ref; sr_operation_to_string (List.hd current_ops)] - (Xapi_stdext_std.Listext.List.set_difference all_ops safe_to_parallelise) ; - let all_are_parallelisable = - List.fold_left ( && ) true - (List.map (fun op -> List.mem op safe_to_parallelise) current_ops) - in - (* If not all are parallelisable (eg a vdi_resize), ban the otherwise - parallelisable operations too *) - if not all_are_parallelisable then - set_errors Api_errors.other_operation_in_progress - ["SR"; _ref; sr_operation_to_string (List.hd current_ops)] - safe_to_parallelise + match current_ops with + | (current_op_ref, current_op_type) :: _ -> + set_errors Api_errors.other_operation_in_progress + ["SR"; _ref; sr_operation_to_string current_op_type; current_op_ref] + (Xapi_stdext_std.Listext.List.set_difference all_ops + safe_to_parallelise + ) ; + let non_parallelisable_op = + List.find_opt + (fun (_, op) -> not (List.mem op safe_to_parallelise)) + current_ops + in + (* If not all are parallelisable (eg a vdi_resize), ban the otherwise + parallelisable operations too *) + Option.iter + (fun (op_ref, op_type) -> + set_errors Api_errors.other_operation_in_progress + ["SR"; _ref; sr_operation_to_string op_type; op_ref] + safe_to_parallelise + ) + non_parallelisable_op + | [] -> + () in let check_cluster_stack_compatible ~__context _record = (* Check whether there are any conflicts with HA that prevent us from diff --git a/ocaml/xapi/xapi_stats.ml b/ocaml/xapi/xapi_stats.ml index 2c94ca64974..f39b5ae88fa 100644 --- a/ocaml/xapi/xapi_stats.ml +++ b/ocaml/xapi/xapi_stats.ml @@ -16,6 +16,10 @@ module D = Debug.Make (struct let name = "xapi_stats" end) let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute +let pool_vgpu_migration_count : int Atomic.t = Atomic.make 0 + +let incr_pool_vgpu_migration_count () = Atomic.incr pool_vgpu_migration_count + let generate_master_stats ~__context = let session_count = Db.Session.get_all ~__context |> List.length |> Int64.of_int @@ -44,7 +48,23 @@ let generate_master_stats ~__context = ~min:0.0 ~units:"sessions/s" () ) in - [session_count_ds; task_count_ds; session_count_change_ds] + let vgpu_migration_count = + Atomic.exchange pool_vgpu_migration_count 0 |> Int64.of_int + in + let vgpu_migration_count_ds = + ( Rrd.Host + , Ds.ds_make ~name:"pool_vgpu_migration_rate" + ~description:"Number of vGPU migrations occurred per second" + ~value:(Rrd.VT_Int64 vgpu_migration_count) ~ty:Rrd.Absolute + ~default:true ~min:0. ~units:"migrations/s" () + ) + in + [ + session_count_ds + ; task_count_ds + ; session_count_change_ds + ; vgpu_migration_count_ds + ] let gc_debug = ref true diff --git a/ocaml/xapi/xapi_stats.mli b/ocaml/xapi/xapi_stats.mli index 5282dca6db7..4e1b20750d9 100644 --- a/ocaml/xapi/xapi_stats.mli +++ b/ocaml/xapi/xapi_stats.mli @@ -18,3 +18,6 @@ val start : unit -> unit val stop : unit -> unit (** Stop the stats reporting thread. *) + +val incr_pool_vgpu_migration_count : unit -> unit +(** Increments the pool_vgpu_migration_count by 1 . *) diff --git a/ocaml/xapi/xapi_support.ml b/ocaml/xapi/xapi_support.ml index 5e65d586776..13666dc09e0 100644 --- a/ocaml/xapi/xapi_support.ml +++ b/ocaml/xapi/xapi_support.ml @@ -14,14 +14,6 @@ module D = Debug.Make (struct let name = "xapi_support" end) open D - -let support_url = "ftp://support.xensource.com/uploads/" - -(* URL to which the crashdump/whatever will be uploaded *) -let upload_url name = - let uuid = Xapi_inventory.lookup Xapi_inventory._installation_uuid in - Printf.sprintf "%s%s-%s" support_url uuid name - open Forkhelpers let do_upload label file url options = diff --git a/ocaml/xapi/xapi_systemctl.ml b/ocaml/xapi/xapi_systemctl.ml index 1dbca594a0c..448d565a266 100644 --- a/ocaml/xapi/xapi_systemctl.ml +++ b/ocaml/xapi/xapi_systemctl.ml @@ -19,7 +19,7 @@ module D = Debug.Make (struct let name = "xapi_systemctl" end) open D -type t = Start | Stop | Restart +type t = Start | Stop | Restart | Enable | Disable exception Systemctl_fail of string @@ -30,6 +30,10 @@ let to_string = function "stop" | Restart -> "restart" + | Enable -> + "enable" + | Disable -> + "disable" let perform ~wait_until_success ~service ~timeout op = let op_str = op |> to_string in @@ -42,8 +46,17 @@ let perform ~wait_until_success ~service ~timeout op = if wait_until_success then ( if op = Restart then Thread.delay 0.1 ; let is_active = Fe_systemctl.is_active ~service in + let is_enabled = Fe_systemctl.is_enabled ~service in let success_cond () = - match op with Start | Restart -> is_active | Stop -> is_active |> not + match op with + | Start | Restart -> + is_active + | Stop -> + is_active |> not + | Enable -> + is_enabled + | Disable -> + is_enabled |> not in try Helpers.retry_until_timeout ~timeout @@ -62,7 +75,17 @@ let restart ?(timeout = 5.) ~wait_until_success service = perform ~wait_until_success ~service ~timeout Restart let stop ?(timeout = 5.) ~wait_until_success service = - perform ~wait_until_success ~service ~timeout Stop + if Fe_systemctl.is_active ~service then + perform ~wait_until_success ~service ~timeout Stop let start ?(timeout = 5.) ~wait_until_success service = - perform ~wait_until_success ~service ~timeout Start + if not (Fe_systemctl.is_active ~service) then + perform ~wait_until_success ~service ~timeout Start + +let disable ?(timeout = 5.) ~wait_until_success service = + if Fe_systemctl.is_enabled ~service then + perform ~wait_until_success ~service ~timeout Disable + +let enable ?(timeout = 5.) ~wait_until_success service = + if not (Fe_systemctl.is_enabled ~service) then + perform ~wait_until_success ~service ~timeout Enable diff --git a/ocaml/xapi/xapi_systemctl.mli b/ocaml/xapi/xapi_systemctl.mli index 2660839f9b7..b552afebbbc 100644 --- a/ocaml/xapi/xapi_systemctl.mli +++ b/ocaml/xapi/xapi_systemctl.mli @@ -15,6 +15,10 @@ (* Exception about systemctl operation like start/stop failed *) exception Systemctl_fail of string +type t = Start | Stop | Restart | Enable | Disable + +val to_string : t -> string + (* start a service with systemctl *) val start : ?timeout:float -> wait_until_success:bool -> string -> unit @@ -23,3 +27,9 @@ val stop : ?timeout:float -> wait_until_success:bool -> string -> unit (* restart a service with systemctl *) val restart : ?timeout:float -> wait_until_success:bool -> string -> unit + +(* enable a service with systemctl *) +val enable : ?timeout:float -> wait_until_success:bool -> string -> unit + +(* disable a service with systemctl *) +val disable : ?timeout:float -> wait_until_success:bool -> string -> unit diff --git a/ocaml/xapi/xapi_vbd.ml b/ocaml/xapi/xapi_vbd.ml index cf7ab173882..331284eb344 100644 --- a/ocaml/xapi/xapi_vbd.ml +++ b/ocaml/xapi/xapi_vbd.ml @@ -184,19 +184,26 @@ let create ~__context ~vM ~vDI ~device ~userdevice ~bootable ~mode ~_type (* CA-75697: Disallow VBD.create on a VM that's in the middle of a migration *) debug "Checking whether there's a migrate in progress..." ; let vm_current_ops = - Xapi_stdext_std.Listext.List.setify - (List.map snd (Db.VM.get_current_operations ~__context ~self:vM)) + List.sort_uniq + (fun (_ref1, op1) (_ref2, op2) -> compare op1 op2) + (Db.VM.get_current_operations ~__context ~self:vM) in + let migrate_ops = [`migrate_send; `pool_migrate] in let migrate_ops_in_progress = - List.filter (fun op -> List.mem op vm_current_ops) migrate_ops + List.filter (fun (_, op) -> List.mem op migrate_ops) vm_current_ops in match migrate_ops_in_progress with - | op :: _ -> + | (op_ref, op_type) :: _ -> raise (Api_errors.Server_error ( Api_errors.other_operation_in_progress - , ["VM"; Ref.string_of vM; Record_util.vm_operation_to_string op] + , [ + "VM" + ; Ref.string_of vM + ; Record_util.vm_operation_to_string op_type + ; op_ref + ] ) ) | _ -> diff --git a/ocaml/xapi/xapi_vbd_helpers.ml b/ocaml/xapi/xapi_vbd_helpers.ml index 3e74dfe1f88..07d6b012da2 100644 --- a/ocaml/xapi/xapi_vbd_helpers.ml +++ b/ocaml/xapi/xapi_vbd_helpers.ml @@ -42,7 +42,9 @@ type table = (API.vbd_operations, (string * string list) option) Hashtbl.t let valid_operations ~expensive_sharing_checks ~__context record _ref' : table = let _ref = Ref.string_of _ref' in let current_ops = - Listext.List.setify (List.map snd record.Db_actions.vBD_current_operations) + List.sort_uniq + (fun (_ref1, op1) (_ref2, op2) -> compare op1 op2) + record.Db_actions.vBD_current_operations in (* Policy: * current_ops must be empty [ will make exceptions later for eg eject/unplug of attached vbd ] @@ -74,30 +76,48 @@ let valid_operations ~expensive_sharing_checks ~__context record _ref' : table = let safe_to_parallelise = [`pause; `unpause] in (* Any current_operations preclude everything that isn't safe to parallelise *) ( if current_ops <> [] then - let concurrent_op = List.hd current_ops in + let concurrent_op_ref, concurrent_op_type = List.hd current_ops in set_errors Api_errors.other_operation_in_progress - ["VBD"; _ref; vbd_operations_to_string concurrent_op] + [ + "VBD" + ; _ref + ; vbd_operations_to_string concurrent_op_type + ; concurrent_op_ref + ] (Listext.List.set_difference all_ops safe_to_parallelise) ) ; (* If not all operations are parallisable then preclude pause *) - let all_are_parallelisable = - List.fold_left ( && ) true - (List.map (fun op -> List.mem op safe_to_parallelise) current_ops) + let non_parallelisable_op = + List.find_opt + (fun (_, op) -> not (List.mem op safe_to_parallelise)) + current_ops in (* If not all are parallelisable, ban the otherwise parallelisable operations too *) - if not all_are_parallelisable then - set_errors Api_errors.other_operation_in_progress - ["VBD"; _ref; vbd_operations_to_string (List.hd current_ops)] - [`pause] ; + ( match non_parallelisable_op with + | Some (concurrent_op_ref, concurrent_op_type) -> + set_errors Api_errors.other_operation_in_progress + [ + "VBD" + ; _ref + ; vbd_operations_to_string concurrent_op_type + ; concurrent_op_ref + ] + [`pause] + | None -> + () + ) ; + (* If something other than `pause `unpause *and* `attach (for VM.reboot, see CA-24282) then disallow unpause *) - if - Listext.List.set_difference current_ops (`attach :: safe_to_parallelise) - <> [] - then - set_errors Api_errors.other_operation_in_progress - ["VBD"; _ref; vbd_operations_to_string (List.hd current_ops)] - [`unpause] ; + let set_difference a b = List.filter (fun (_, x) -> not (List.mem x b)) a in + ( match set_difference current_ops (`attach :: safe_to_parallelise) with + | (op_ref, op_type) :: _ -> + set_errors Api_errors.other_operation_in_progress + ["VBD"; _ref; vbd_operations_to_string op_type; op_ref] + [`unpause] + | [] -> + () + ) ; (* Drives marked as not unpluggable cannot be unplugged *) if not record.Db_actions.vBD_unpluggable then set_errors Api_errors.vbd_not_unpluggable [_ref] [`unplug; `unplug_force] ; @@ -128,7 +148,10 @@ let valid_operations ~expensive_sharing_checks ~__context record _ref' : table = let bad_ops = [`plug; `unplug; `unplug_force] in (* However allow VBD pause and unpause if the VM is paused: *) let bad_ops' = - if power_state = `Paused then bad_ops else `pause :: `unpause :: bad_ops + if power_state = `Paused then + bad_ops + else + `pause :: `unpause :: bad_ops in set_errors Api_errors.vm_bad_power_state [Ref.string_of vm; expected; actual] @@ -226,17 +249,23 @@ let valid_operations ~expensive_sharing_checks ~__context record _ref' : table = | _ -> true in - List.exists + List.find_opt (fun (_, operation) -> is_illegal_operation operation) vdi_record.Db_actions.vDI_current_operations in - ( if vdi_operations_besides_copy then - let concurrent_op = - snd (List.hd vdi_record.Db_actions.vDI_current_operations) - in + + ( match vdi_operations_besides_copy with + | Some (concurrent_op_ref, concurrent_op_type) -> set_errors Api_errors.other_operation_in_progress - ["VDI"; Ref.string_of vdi; vdi_operations_to_string concurrent_op] + [ + "VDI" + ; Ref.string_of vdi + ; vdi_operations_to_string concurrent_op_type + ; concurrent_op_ref + ] [`attach; `plug; `insert] + | None -> + () ) ; if (not record.Db_actions.vBD_currently_attached) && expensive_sharing_checks @@ -329,24 +358,17 @@ let assert_attachable ~__context ~self = let assert_doesnt_make_vm_non_agile ~__context ~vm ~vdi = let pool = Helpers.get_pool ~__context in - let properly_shared = - Agility.is_sr_properly_shared ~__context - ~self:(Db.VDI.get_SR ~__context ~self:vdi) - in + let sr = Db.VDI.get_SR ~__context ~self:vdi in + let properly_shared = Agility.is_sr_properly_shared ~__context ~self:sr in if true && Db.Pool.get_ha_enabled ~__context ~self:pool && (not (Db.Pool.get_ha_allow_overcommit ~__context ~self:pool)) && Helpers.is_xha_protected ~__context ~self:vm && not properly_shared - then ( - warn "Attaching VDI %s makes VM %s not agile" (Ref.string_of vdi) - (Ref.string_of vm) ; - raise - (Api_errors.Server_error - (Api_errors.ha_operation_would_break_failover_plan, []) - ) - ) + then + let sr = Ref.string_of sr in + raise Api_errors.(Server_error (ha_constraint_violation_sr_not_shared, [sr])) let update_allowed_operations ~__context ~self : unit = let all = Db.VBD.get_record_internal ~__context ~self in diff --git a/ocaml/xapi/xapi_vdi.ml b/ocaml/xapi/xapi_vdi.ml index 3713f189040..624875c21e5 100644 --- a/ocaml/xapi/xapi_vdi.ml +++ b/ocaml/xapi/xapi_vdi.ml @@ -63,48 +63,13 @@ let check_sm_feature_error (op : API.vdi_operations) sm_features sr = specified, it should contain at least all the VBD records from the database that are linked to this VDI. *) let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) - ?vbd_records ha_enabled record _ref' op = + ?vbd_records ha_enabled record _ref' = let ( let* ) = Result.bind in let _ref = Ref.string_of _ref' in let current_ops = record.Db_actions.vDI_current_operations in let reset_on_boot = record.Db_actions.vDI_on_boot = `reset in - (* Policy: - 1. any current_operation besides copy implies exclusivity; fail everything - else; except vdi mirroring is in current operations and destroy is performed - as part of vdi_pool_migrate. - 2. if a copy is ongoing, don't fail with other_operation_in_progress, as - blocked operations could then get stuck behind a long-running copy. - Instead, rely on the blocked_by_attach check further down to decide - whether an operation should be allowed. - 3. if doing a VM start then assume the sharing check is done elsewhere - (so VMs may share disks but our operations cannot) - 4. for other operations, fail if any VBD has currently-attached=true or any VBD - has a current_operation itself - 5. HA prevents you from deleting statefiles or metadata volumes - 6. During rolling pool upgrade, only operations known by older releases are allowed - *) - let* () = - if - Helpers.rolling_upgrade_in_progress ~__context - && not (List.mem op Xapi_globs.rpu_allowed_vdi_operations) - then - Error (Api_errors.not_supported_during_upgrade, []) - else - Ok () - in - let* () = - (* Don't fail with other_operation_in_progress if VDI mirroring is in - progress and destroy is called as part of VDI mirroring *) - let is_vdi_mirroring_in_progress = - List.exists (fun (_, op) -> op = `mirror) current_ops && op = `destroy - in - if - List.exists (fun (_, op) -> op <> `copy) current_ops - && not is_vdi_mirroring_in_progress - then - Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) - else - Ok () + let rolling_upgrade_in_progress = + Helpers.rolling_upgrade_in_progress ~__context in (* check to see whether it's a local cd drive *) let sr = record.Db_actions.vDI_SR in @@ -129,14 +94,6 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) ) pbd_records in - let* () = - if pbds_attached = [] && List.mem op [`resize] then - Error (Api_errors.sr_no_pbds, [Ref.string_of sr]) - else - Ok () - in - - (* check to see whether VBDs exist which are using this VDI *) (* Only a 'live' operation can be performed if there are active (even RO) devices *) let my_active_vbd_records = @@ -155,16 +112,14 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) ) ) | Some records -> - List.map snd - (List.filter - (fun (_, vbd_record) -> - vbd_record.Db_actions.vBD_VDI = _ref' - && (vbd_record.Db_actions.vBD_currently_attached - || vbd_record.Db_actions.vBD_reserved - ) - ) - records + List.filter + (fun vbd_record -> + vbd_record.Db_actions.vBD_VDI = _ref' + && (vbd_record.Db_actions.vBD_currently_attached + || vbd_record.Db_actions.vBD_reserved + ) ) + records in let my_active_rw_vbd_records = List.filter (fun vbd -> vbd.Db_actions.vBD_mode = `RW) my_active_vbd_records @@ -183,261 +138,322 @@ let check_operation_error ~__context ?sr_records:_ ?(pbd_records = []) ) ) | Some records -> - List.map snd - (List.filter - (fun (_, vbd_record) -> - vbd_record.Db_actions.vBD_VDI = _ref' - && vbd_record.Db_actions.vBD_current_operations <> [] - ) - records + List.filter + (fun vbd_record -> + vbd_record.Db_actions.vBD_VDI = _ref' + && vbd_record.Db_actions.vBD_current_operations <> [] ) + records in - (* If the VBD is currently_attached then some operations can still be - performed ie: VDI.clone (if the VM is suspended we have to have the - 'allow_clone_suspended_vm' flag); VDI.snapshot; VDI.resize_online; - 'blocked' (CP-831); VDI.data_destroy: it is not allowed on VDIs linked - to a VM, but the implementation first waits for the VDI's VBDs to be - unplugged and destroyed, and the checks are performed there. + + (* Policy: + 1. any current_operation besides copy implies exclusivity; fail everything + else; except vdi mirroring is in current operations and destroy is performed + as part of vdi_pool_migrate. + 2. if a copy is ongoing, don't fail with other_operation_in_progress, as + blocked operations could then get stuck behind a long-running copy. + Instead, rely on the blocked_by_attach check further down to decide + whether an operation should be allowed. + 3. if doing a VM start then assume the sharing check is done elsewhere + (so VMs may share disks but our operations cannot) + 4. for other operations, fail if any VBD has currently-attached=true or any VBD + has a current_operation itself + 5. HA prevents you from deleting statefiles or metadata volumes + 6. During rolling pool upgrade, only operations known by older releases are allowed *) - let operation_can_be_performed_live = - match op with - | `snapshot - | `resize_online - | `blocked - | `clone - | `mirror - | `enable_cbt - | `disable_cbt - | `data_destroy -> - true - | _ -> - false + let sm_features = + Xapi_sr_operations.features_of_sr_internal ~__context ~_type:sr_type in - let operation_can_be_performed_with_ro_attach = - operation_can_be_performed_live - || match op with `copy -> true | _ -> false + let vdi_is_ha_state_or_redolog = + List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] in - (* NB RO vs RW sharing checks are done in xapi_vbd.ml *) - let blocked_by_attach = - let blocked_by_attach = - if operation_can_be_performed_live then - false - else if operation_can_be_performed_with_ro_attach then - my_active_rw_vbd_records <> [] + + fun op -> + let* () = + if + rolling_upgrade_in_progress + && not + (Xapi_globs.Vdi_operations_set.mem op + Xapi_globs.rpu_allowed_vdi_operations + ) + then + Error (Api_errors.not_supported_during_upgrade, []) else - my_active_vbd_records <> [] + Ok () in - let allow_attached_vbds = - (* We use Valid_ref_list.list to ignore exceptions due to invalid - references that could propagate to the message forwarding layer, which - calls this function to check for errors - these exceptions would - prevent the actual XenAPI function from being run. Checks called from - the message forwarding layer should not fail with an exception. *) - let true_for_all_active_vbds f = - Valid_ref_list.for_all f my_active_vbd_records + let* () = + (* Don't fail with other_operation_in_progress if VDI mirroring is in + progress and destroy is called as part of VDI mirroring *) + let is_vdi_mirroring_in_progress = + op = `destroy && List.exists (fun (_, op) -> op = `mirror) current_ops in - match op with - | `list_changed_blocks -> - let vbd_connected_to_vm_snapshot vbd = - let vm = vbd.Db_actions.vBD_VM in - Db.is_valid_ref __context vm - && Db.VM.get_is_a_snapshot ~__context ~self:vm - in - (* We allow list_changed_blocks on VDIs attached to snapshot VMs, - because VM.checkpoint may set the currently_attached fields of the - snapshot's VBDs to true, and this would block list_changed_blocks. *) - true_for_all_active_vbds vbd_connected_to_vm_snapshot + match + ( is_vdi_mirroring_in_progress + , List.find_opt (fun (_, op) -> op <> `copy) current_ops + ) + with + | false, Some (op_ref, op_type) -> + Error + ( Api_errors.other_operation_in_progress + , ["VDI"; _ref; API.vdi_operations_to_string op_type; op_ref] + ) | _ -> - false + Ok () in - blocked_by_attach && not allow_attached_vbds - in - let* () = - if blocked_by_attach then - Error - (Api_errors.vdi_in_use, [_ref; Record_util.vdi_operations_to_string op]) - else if - (* data_destroy first waits for all the VBDs to disappear in its - implementation, so it is harmless to allow it when any of the VDI's - VBDs have operations in progress. This ensures that we avoid the retry - mechanism of message forwarding and only use the event loop. *) - my_has_current_operation_vbd_records <> [] && op <> `data_destroy - then - Error (Api_errors.other_operation_in_progress, ["VDI"; _ref]) - else - Ok () - in - let sm_features = - Xapi_sr_operations.features_of_sr_internal ~__context ~_type:sr_type - in - let* () = check_sm_feature_error op sm_features sr in - let allowed_for_cbt_metadata_vdi = - match op with - | `clone - | `copy - | `disable_cbt - | `enable_cbt - | `mirror - | `resize - | `resize_online - | `snapshot - | `set_on_boot -> - false - | `blocked - | `data_destroy - | `destroy - | `list_changed_blocks - | `force_unlock - | `forget - | `generate_config - | `update -> - true - in - let* () = - if - (not allowed_for_cbt_metadata_vdi) - && record.Db_actions.vDI_type = `cbt_metadata - then - Error - ( Api_errors.vdi_incompatible_type - , [_ref; Record_util.vdi_type_to_string `cbt_metadata] - ) - else - Ok () - in - let allowed_when_cbt_enabled = - match op with - | `mirror | `set_on_boot -> - false - | `blocked - | `clone - | `copy - | `data_destroy - | `destroy - | `disable_cbt - | `enable_cbt - | `list_changed_blocks - | `force_unlock - | `forget - | `generate_config - | `resize - | `resize_online - | `snapshot - | `update -> - true - in - let* () = - if (not allowed_when_cbt_enabled) && record.Db_actions.vDI_cbt_enabled then - Error (Api_errors.vdi_cbt_enabled, [_ref]) - else - Ok () - in - let check_destroy () = - if sr_type = "udev" then - Error (Api_errors.vdi_is_a_physical_device, [_ref]) - else if is_tools_sr then - Error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) - else if List.mem record.Db_actions.vDI_type [`rrd] then - Error (Api_errors.vdi_has_rrds, [_ref]) - else if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - then - Error (Api_errors.ha_is_enabled, []) - else if - List.mem record.Db_actions.vDI_type [`ha_statefile; `metadata] - && Xapi_pool_helpers.ha_enable_in_progress ~__context - then - Error (Api_errors.ha_enable_in_progress, []) - else if - List.mem record.Db_actions.vDI_type [`ha_statefile; `metadata] - && Xapi_pool_helpers.ha_disable_in_progress ~__context - then - Error (Api_errors.ha_disable_in_progress, []) - else - Ok () - in - match op with - | `forget -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] - then - Error (Api_errors.ha_is_enabled, []) - else if List.mem record.Db_actions.vDI_type [`rrd] then - Error (Api_errors.vdi_has_rrds, [_ref]) + let* () = + if pbds_attached = [] && op = `resize then + Error (Api_errors.sr_no_pbds, [Ref.string_of sr]) else Ok () - | `destroy -> - check_destroy () - | `data_destroy -> - if not record.Db_actions.vDI_is_a_snapshot then + in + + (* check to see whether VBDs exist which are using this VDI *) + + (* If the VBD is currently_attached then some operations can still be + performed ie: VDI.clone (if the VM is suspended we have to have the + 'allow_clone_suspended_vm' flag); VDI.snapshot; VDI.resize_online; + 'blocked' (CP-831); VDI.data_destroy: it is not allowed on VDIs linked + to a VM, but the implementation first waits for the VDI's VBDs to be + unplugged and destroyed, and the checks are performed there. + *) + let operation_can_be_performed_live = + match op with + | `snapshot + | `resize_online + | `blocked + | `clone + | `mirror + | `enable_cbt + | `disable_cbt + | `data_destroy -> + true + | _ -> + false + in + let operation_can_be_performed_with_ro_attach = + operation_can_be_performed_live + || match op with `copy -> true | _ -> false + in + (* NB RO vs RW sharing checks are done in xapi_vbd.ml *) + let blocked_by_attach = + let blocked_by_attach = + if operation_can_be_performed_live then + false + else if operation_can_be_performed_with_ro_attach then + my_active_rw_vbd_records <> [] + else + my_active_vbd_records <> [] + in + let allow_attached_vbds = + (* We use Valid_ref_list.list to ignore exceptions due to invalid + references that could propagate to the message forwarding layer, which + calls this function to check for errors - these exceptions would + prevent the actual XenAPI function from being run. Checks called from + the message forwarding layer should not fail with an exception. *) + let true_for_all_active_vbds f = + Valid_ref_list.for_all f my_active_vbd_records + in + match op with + | `list_changed_blocks -> + let vbd_connected_to_vm_snapshot vbd = + let vm = vbd.Db_actions.vBD_VM in + Db.is_valid_ref __context vm + && Db.VM.get_is_a_snapshot ~__context ~self:vm + in + (* We allow list_changed_blocks on VDIs attached to snapshot VMs, + because VM.checkpoint may set the currently_attached fields of the + snapshot's VBDs to true, and this would block list_changed_blocks. *) + true_for_all_active_vbds vbd_connected_to_vm_snapshot + | _ -> + false + in + blocked_by_attach && not allow_attached_vbds + in + let* () = + if blocked_by_attach then Error - (Api_errors.operation_not_allowed, ["VDI is not a snapshot: " ^ _ref]) - else if not record.Db_actions.vDI_cbt_enabled then - Error (Api_errors.vdi_no_cbt_metadata, [_ref]) - else - check_destroy () - | `resize -> - if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + ( Api_errors.vdi_in_use + , [_ref; Record_util.vdi_operations_to_string op] + ) + else if + (* data_destroy first waits for all the VBDs to disappear in its + implementation, so it is harmless to allow it when any of the VDI's + VBDs have operations in progress. This ensures that we avoid the retry + mechanism of message forwarding and only use the event loop. *) + my_has_current_operation_vbd_records <> [] && op <> `data_destroy then - Error (Api_errors.ha_is_enabled, []) + let op_ref, op_type = + List.hd + (List.hd my_has_current_operation_vbd_records) + .Db_actions.vBD_current_operations + in + Error + ( Api_errors.other_operation_in_progress + , ["VDI"; _ref; API.vbd_operations_to_string op_type; op_ref] + ) else Ok () - | `resize_online -> + in + let* () = check_sm_feature_error op sm_features sr in + let allowed_for_cbt_metadata_vdi = + match op with + | `clone + | `copy + | `disable_cbt + | `enable_cbt + | `mirror + | `resize + | `resize_online + | `snapshot + | `set_on_boot -> + false + | `blocked + | `data_destroy + | `destroy + | `list_changed_blocks + | `force_unlock + | `forget + | `generate_config + | `update -> + true + in + let* () = if - ha_enabled - && List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] + (not allowed_for_cbt_metadata_vdi) + && record.Db_actions.vDI_type = `cbt_metadata then - Error (Api_errors.ha_is_enabled, []) - else - Ok () - | `snapshot when record.Db_actions.vDI_sharable -> - Error (Api_errors.vdi_is_sharable, [_ref]) - | (`snapshot | `copy) when reset_on_boot -> - Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, []) - | `snapshot -> - if List.exists (fun (_, op) -> op = `copy) current_ops then Error - ( Api_errors.operation_not_allowed - , ["Snapshot operation not allowed during copy."] + ( Api_errors.vdi_incompatible_type + , [_ref; Record_util.vdi_type_to_string `cbt_metadata] ) else Ok () - | `copy -> - if List.mem record.Db_actions.vDI_type [`ha_statefile; `redo_log] then - Error - ( Api_errors.operation_not_allowed - , [ - "VDI containing HA statefile or redo log cannot be copied (check \ - the VDI's allowed operations)." - ] - ) + in + let allowed_when_cbt_enabled = + match op with + | `mirror | `set_on_boot -> + false + | `blocked + | `clone + | `copy + | `data_destroy + | `destroy + | `disable_cbt + | `enable_cbt + | `list_changed_blocks + | `force_unlock + | `forget + | `generate_config + | `resize + | `resize_online + | `snapshot + | `update -> + true + in + let* () = + if (not allowed_when_cbt_enabled) && record.Db_actions.vDI_cbt_enabled + then + Error (Api_errors.vdi_cbt_enabled, [_ref]) else Ok () - | `enable_cbt | `disable_cbt -> - if record.Db_actions.vDI_is_a_snapshot then - Error (Api_errors.operation_not_allowed, ["VDI is a snapshot: " ^ _ref]) - else if not (List.mem record.Db_actions.vDI_type [`user; `system]) then - Error - ( Api_errors.vdi_incompatible_type - , [_ref; Record_util.vdi_type_to_string record.Db_actions.vDI_type] - ) - else if reset_on_boot then - Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, []) + in + let check_destroy () = + if sr_type = "udev" then + Error (Api_errors.vdi_is_a_physical_device, [_ref]) + else if is_tools_sr then + Error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) + else if List.mem record.Db_actions.vDI_type [`rrd] then + Error (Api_errors.vdi_has_rrds, [_ref]) + else if ha_enabled && vdi_is_ha_state_or_redolog then + Error (Api_errors.ha_is_enabled, []) + else if + vdi_is_ha_state_or_redolog + && Xapi_pool_helpers.ha_enable_in_progress ~__context + then + Error (Api_errors.ha_enable_in_progress, []) + else if + vdi_is_ha_state_or_redolog + && Xapi_pool_helpers.ha_disable_in_progress ~__context + then + Error (Api_errors.ha_disable_in_progress, []) else Ok () - | `mirror - | `clone - | `generate_config - | `force_unlock - | `set_on_boot - | `list_changed_blocks - | `blocked - | `update -> - Ok () + in + match op with + | `forget -> + if ha_enabled && vdi_is_ha_state_or_redolog then + Error (Api_errors.ha_is_enabled, []) + else if List.mem record.Db_actions.vDI_type [`rrd] then + Error (Api_errors.vdi_has_rrds, [_ref]) + else + Ok () + | `destroy -> + check_destroy () + | `data_destroy -> + if not record.Db_actions.vDI_is_a_snapshot then + Error + ( Api_errors.operation_not_allowed + , ["VDI is not a snapshot: " ^ _ref] + ) + else if not record.Db_actions.vDI_cbt_enabled then + Error (Api_errors.vdi_no_cbt_metadata, [_ref]) + else + check_destroy () + | `resize -> + if ha_enabled && vdi_is_ha_state_or_redolog then + Error (Api_errors.ha_is_enabled, []) + else + Ok () + | `resize_online -> + if ha_enabled && vdi_is_ha_state_or_redolog then + Error (Api_errors.ha_is_enabled, []) + else + Ok () + | `snapshot when record.Db_actions.vDI_sharable -> + Error (Api_errors.vdi_is_sharable, [_ref]) + | (`snapshot | `copy) when reset_on_boot -> + Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, []) + | `snapshot -> + if List.exists (fun (_, op) -> op = `copy) current_ops then + Error + ( Api_errors.operation_not_allowed + , ["Snapshot operation not allowed during copy."] + ) + else + Ok () + | `copy -> + if vdi_is_ha_state_or_redolog then + Error + ( Api_errors.operation_not_allowed + , [ + "VDI containing HA statefile or redo log cannot be copied \ + (check the VDI's allowed operations)." + ] + ) + else + Ok () + | `enable_cbt | `disable_cbt -> + if record.Db_actions.vDI_is_a_snapshot then + Error + (Api_errors.operation_not_allowed, ["VDI is a snapshot: " ^ _ref]) + else if not (List.mem record.Db_actions.vDI_type [`user; `system]) then + Error + ( Api_errors.vdi_incompatible_type + , [_ref; Record_util.vdi_type_to_string record.Db_actions.vDI_type] + ) + else if reset_on_boot then + Error (Api_errors.vdi_on_boot_mode_incompatible_with_operation, []) + else + Ok () + | `mirror + | `clone + | `generate_config + | `force_unlock + | `set_on_boot + | `list_changed_blocks + | `blocked + | `update -> + Ok () let assert_operation_valid ~__context ~self ~(op : API.vdi_operations) = let pool = Helpers.get_pool ~__context in @@ -467,7 +483,7 @@ let update_allowed_operations_internal ~__context ~self ~sr_records ~pbd_records *) let all_ops = Xapi_globs.pre_ely_vdi_operations - |> List.filter (function + |> Xapi_globs.Vdi_operations_set.filter (function | `blocked -> false (* CA-260245 *) | `force_unlock -> @@ -477,25 +493,31 @@ let update_allowed_operations_internal ~__context ~self ~sr_records ~pbd_records ) in let all = Db.VDI.get_record_internal ~__context ~self in + let vbd_records = + match vbd_records with + | None when Pool_role.is_master () -> + all.Db_actions.vDI_VBDs + |> List.rev_map (fun self -> Db.VBD.get_record_internal ~__context ~self) + |> Option.some + | v -> + v + in let allowed = - let check x = - match - check_operation_error ~__context ~sr_records ~pbd_records ?vbd_records - ha_enabled all self x - with - | Ok () -> - [x] - | _ -> - [] + let check' = + check_operation_error ~__context ~sr_records ~pbd_records ?vbd_records + ha_enabled all self in - List.fold_left (fun accu op -> check op @ accu) [] all_ops + let check x = match check' x with Ok () -> true | _ -> false in + all_ops |> Xapi_globs.Vdi_operations_set.filter check in let allowed = - if Helpers.rolling_upgrade_in_progress ~__context then - Xapi_stdext_std.Listext.List.intersect allowed - Xapi_globs.rpu_allowed_vdi_operations - else - allowed + ( if Helpers.rolling_upgrade_in_progress ~__context then + Xapi_globs.Vdi_operations_set.inter allowed + Xapi_globs.rpu_allowed_vdi_operations + else + allowed + ) + |> Xapi_globs.Vdi_operations_set.elements in Db.VDI.set_allowed_operations ~__context ~self ~value:allowed diff --git a/ocaml/xapi/xapi_vdi.mli b/ocaml/xapi/xapi_vdi.mli index 45569a12fde..3d60ad31ff1 100644 --- a/ocaml/xapi/xapi_vdi.mli +++ b/ocaml/xapi/xapi_vdi.mli @@ -23,7 +23,7 @@ val check_operation_error : __context:Context.t -> ?sr_records:'a list -> ?pbd_records:(API.ref_PBD * API.pBD_t) list - -> ?vbd_records:(API.ref_VBD * Db_actions.vBD_t) list + -> ?vbd_records:Db_actions.vBD_t list -> bool -> Db_actions.vDI_t -> API.ref_VDI @@ -40,7 +40,7 @@ val update_allowed_operations_internal : -> self:[`VDI] API.Ref.t -> sr_records:'a list -> pbd_records:(API.ref_PBD * API.pBD_t) list - -> ?vbd_records:(API.ref_VBD * Db_actions.vBD_t) list + -> ?vbd_records:Db_actions.vBD_t list -> unit -> unit diff --git a/ocaml/xapi/xapi_vdi_helpers.ml b/ocaml/xapi/xapi_vdi_helpers.ml index 3cc2d4a7f5f..84db627c719 100644 --- a/ocaml/xapi/xapi_vdi_helpers.ml +++ b/ocaml/xapi/xapi_vdi_helpers.ml @@ -184,7 +184,7 @@ let database_ref_of_vdi ~__context ~vdi = debug "Enabling redo_log with device reason [%s]" device ; Redo_log.enable_block_existing log device ; let db = Database.make (Datamodel_schema.of_datamodel ()) in - let db_ref = Xapi_database.Db_ref.in_memory (ref (ref db)) in + let db_ref = Xapi_database.Db_ref.in_memory (Atomic.make db) in Redo_log_usage.read_from_redo_log log Xapi_globs.foreign_metadata_db db_ref ; Redo_log.delete log ; (* Upgrade database to the local schema. *) diff --git a/ocaml/xapi/xapi_vgpu_type.ml b/ocaml/xapi/xapi_vgpu_type.ml index f7d5e1eb408..aae64cef195 100644 --- a/ocaml/xapi/xapi_vgpu_type.ml +++ b/ocaml/xapi/xapi_vgpu_type.ml @@ -1033,7 +1033,9 @@ module Nvidia_compat = struct read_configs ac tl ) in - let conf_files = Array.to_list (Sys.readdir conf_dir) in + let conf_files = + try Array.to_list (Sys.readdir conf_dir) with Sys_error _ -> [] + in debug "Reading NVIDIA vGPU config files %s/{%s}" conf_dir (String.concat ", " conf_files) ; read_configs [] diff --git a/ocaml/xapi/xapi_vif_helpers.ml b/ocaml/xapi/xapi_vif_helpers.ml index 2fab562dbe4..37de1b77770 100644 --- a/ocaml/xapi/xapi_vif_helpers.ml +++ b/ocaml/xapi/xapi_vif_helpers.ml @@ -50,18 +50,20 @@ let valid_operations ~__context record _ref' : table = in let vm = Db.VIF.get_VM ~__context ~self:_ref' in (* Any current_operations preclude everything else *) - if current_ops <> [] then ( - debug "No operations are valid because current-operations = [ %s ]" - (String.concat "; " - (List.map - (fun (task, op) -> task ^ " -> " ^ vif_operations_to_string op) - current_ops - ) - ) ; - let concurrent_op = snd (List.hd current_ops) in - set_errors Api_errors.other_operation_in_progress - ["VIF"; _ref; vif_operations_to_string concurrent_op] - all_ops + ( if current_ops <> [] then + let concurrent_op_refs, concurrent_op_types = + List.fold_left + (fun (refs, types) (ref, op) -> + (ref :: refs, vif_operations_to_string op :: types) + ) + ([], []) current_ops + in + let format x = Printf.sprintf "{%s}" (String.concat "; " x) in + let concurrent_op_refs = format concurrent_op_refs in + let concurrent_op_types = format concurrent_op_types in + set_errors Api_errors.other_operation_in_progress + ["VIF"; _ref; concurrent_op_types; concurrent_op_refs] + all_ops ) ; (* No hotplug on dom0 *) if Helpers.is_domain_zero ~__context vm then @@ -192,10 +194,11 @@ let clear_current_operations ~__context ~self = (**************************************************************************************) -(** Check if the device string has the right form *) +(** Check if the device string has the right form - it should only be an + unsigned decimal integer *) let valid_device dev = try - ignore (int_of_string dev) ; + Scanf.sscanf dev "%u%!" ignore ; true with _ -> false @@ -264,19 +267,18 @@ let create ~__context ~device ~network ~vM ~mAC ~mTU ~other_config raise (Api_errors.Server_error (Api_errors.mac_invalid, [mAC])) ; (* Make people aware that non-shared networks being added to VMs makes them not agile *) let pool = Helpers.get_pool ~__context in - if - true - && Db.Pool.get_ha_enabled ~__context ~self:pool - && (not (Db.Pool.get_ha_allow_overcommit ~__context ~self:pool)) - && Helpers.is_xha_protected ~__context ~self:vM - && not (Agility.is_network_properly_shared ~__context ~self:network) - then ( - warn "Creating VIF %s makes VM %s not agile" (Ref.string_of ref) - (Ref.string_of vM) ; - raise - (Api_errors.Server_error - (Api_errors.ha_operation_would_break_failover_plan, []) - ) + ( if + true + && Db.Pool.get_ha_enabled ~__context ~self:pool + && (not (Db.Pool.get_ha_allow_overcommit ~__context ~self:pool)) + && Helpers.is_xha_protected ~__context ~self:vM + && not (Agility.is_network_properly_shared ~__context ~self:network) + then + let net = Ref.string_of network in + raise + Api_errors.( + Server_error (ha_constraint_violation_network_not_shared, [net]) + ) ) ; (* Check to make sure the device is unique *) Xapi_stdext_threads.Threadext.Mutex.execute m (fun () -> @@ -288,8 +290,7 @@ let create ~__context ~device ~network ~vM ~mAC ~mTU ~other_config in let new_device = int_of_string device in if List.exists (fun (_, d) -> d = new_device) all_vifs_with_devices then - raise - (Api_errors.Server_error (Api_errors.device_already_exists, [device])) ; + raise Api_errors.(Server_error (device_already_exists, [device])) ; (* If the VM uses a PVS_proxy, then the proxy _must_ be associated with the VIF that has the lowest device number. Check that the new VIF diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index 78967197a8f..036c6f07f9a 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -65,7 +65,7 @@ let update_allowed_operations ~__context ~self = let assert_can_boot_here ~__context ~self ~host = let snapshot = Db.VM.get_record ~__context ~self in if Helpers.rolling_upgrade_in_progress ~__context then - Helpers.assert_platform_version_is_same_on_master ~__context ~host ~self ; + Helpers.assert_host_versions_are_same_on_master ~__context ~host ~self ; assert_can_boot_here ~__context ~self ~host ~snapshot ~do_cpuid_check:true () let retrieve_wlb_recommendations ~__context ~vm = @@ -89,9 +89,6 @@ let retrieve_wlb_recommendations ~__context ~vm = let assert_agile ~__context ~self = Agility.vm_assert_agile ~__context ~self -(* helpers *) -let immediate_complete ~__context = Helpers.progress ~__context (0.0 -. 1.0) - (* API *) let set_actions_after_crash ~__context ~self ~value = set_actions_after_crash ~__context ~self ~value @@ -1171,6 +1168,11 @@ let call_plugin ~__context ~vm ~plugin ~fn ~args = (Api_errors.xenapi_plugin_failure, ["failed to execute fn"; msg; msg]) ) +let call_host_plugin ~__context ~vm ~plugin ~fn ~args = + (* vm is unused; was used to find the host *) + let _ = vm in + Xapi_plugins.call_plugin (Context.get_session_id __context) plugin fn args + let send_sysrq ~__context ~vm:_ ~key:_ = raise (Api_errors.Server_error (Api_errors.not_implemented, ["send_sysrq"])) @@ -1349,11 +1351,11 @@ let set_suspend_VDI ~__context ~self ~value = let dst_vdi = value in if src_vdi <> dst_vdi then ( (* - * We don't care if the future host can see current suspend VDI or not, but - * we want to make sure there's at least a host can see all the VDIs of the - * VM + the new suspend VDI. We raise an exception if there's no suitable - * host. - *) + * We don't care if the future host can see current suspend VDI or not, but + * we want to make sure there's at least a host can see all the VDIs of the + * VM + the new suspend VDI. We raise an exception if there's no suitable + * host. + *) let vbds = Db.VM.get_VBDs ~__context ~self in let vbds = List.filter (fun self -> not (Db.VBD.get_empty ~__context ~self)) vbds @@ -1613,7 +1615,7 @@ let nvram = Mutex.create () let set_NVRAM_EFI_variables ~__context ~self ~value = with_lock nvram (fun () -> (* do not use remove_from_NVRAM: we do not want to - * temporarily end up with an empty NVRAM in HA *) + * temporarily end up with an empty NVRAM in HA *) let key = "EFI-variables" in let nvram = Db.VM.get_NVRAM ~__context ~self in let value = (key, value) :: List.remove_assoc key nvram in @@ -1699,3 +1701,46 @@ let get_secureboot_readiness ~__context ~self = ) ) ) + +let sysprep ~__context ~self ~unattend ~timeout = + let uuid = Db.VM.get_uuid ~__context ~self in + debug "%s %S (timeout %f)" __FUNCTION__ uuid timeout ; + if timeout < 0.0 then + raise + Api_errors.( + Server_error (invalid_value, ["timeout"; string_of_float timeout]) + ) ; + match Vm_sysprep.sysprep ~__context ~vm:self ~unattend ~timeout with + | () -> + debug "%s %S success" __FUNCTION__ uuid ; + () + | exception Vm_sysprep.Sysprep API_not_enabled -> + raise Api_errors.(Server_error (sysprep, [uuid; "API call is disabled"])) + | exception Vm_sysprep.Sysprep VM_CDR_not_found -> + raise Api_errors.(Server_error (sysprep, [uuid; "CD-ROM drive not found"])) + | exception Vm_sysprep.Sysprep VM_misses_feature -> + raise + Api_errors.( + Server_error (sysprep, [uuid; "VM driver does not support sysprep"]) + ) + | exception Vm_sysprep.Sysprep VM_not_running -> + raise Api_errors.(Server_error (sysprep, [uuid; "VM is not running"])) + | exception Vm_sysprep.Sysprep VM_CDR_eject -> + raise Api_errors.(Server_error (sysprep, [uuid; "VM failed to eject CD"])) + | exception Vm_sysprep.Sysprep VM_CDR_insert -> + raise Api_errors.(Server_error (sysprep, [uuid; "VM failed to insert CD"])) + | exception Vm_sysprep.Sysprep VM_sysprep_timeout -> + raise + Api_errors.( + Server_error + (sysprep, [uuid; "No response from sysprep within allocated time"]) + ) + | exception Vm_sysprep.Sysprep XML_too_large -> + raise + Api_errors.( + Server_error (sysprep, [uuid; "unattend.xml file too large"]) + ) + | exception Vm_sysprep.Sysprep (Other msg) -> + raise Api_errors.(Server_error (sysprep, [uuid; msg])) + | exception e -> + raise e diff --git a/ocaml/xapi/xapi_vm.mli b/ocaml/xapi/xapi_vm.mli index d0771c49cfa..b3f07d38a9d 100644 --- a/ocaml/xapi/xapi_vm.mli +++ b/ocaml/xapi/xapi_vm.mli @@ -32,8 +32,6 @@ val retrieve_wlb_recommendations : val assert_agile : __context:Context.t -> self:[`VM] Ref.t -> unit -val immediate_complete : __context:Context.t -> unit - val set_actions_after_crash : __context:Context.t -> self:[`VM] Ref.t @@ -401,6 +399,14 @@ val call_plugin : -> args:(string * string) list -> string +val call_host_plugin : + __context:Context.t + -> vm:API.ref_VM + -> plugin:string + -> fn:string + -> args:(string * string) list + -> string + val set_has_vendor_device : __context:Context.t -> self:API.ref_VM -> value:bool -> unit @@ -444,3 +450,10 @@ val add_to_blocked_operations : val remove_from_blocked_operations : __context:Context.t -> self:API.ref_VM -> key:API.vm_operations -> unit + +val sysprep : + __context:Context.t + -> self:API.ref_VM + -> unattend:SecretString.t + -> timeout:float + -> unit diff --git a/ocaml/xapi/xapi_vm_appliance_lifecycle.ml b/ocaml/xapi/xapi_vm_appliance_lifecycle.ml index 330d028cf1c..765fd9c3568 100644 --- a/ocaml/xapi/xapi_vm_appliance_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_appliance_lifecycle.ml @@ -18,38 +18,48 @@ let check_operation_error ~__context record self op = let _ref = Ref.string_of self in let current_ops = record.Db_actions.vM_appliance_current_operations in (* Only allow one operation of [`start | `clean_shutdown | `hard_shutdown | `shutdown ] at a time. *) - if current_ops <> [] then - Some (Api_errors.other_operation_in_progress, ["VM_appliance"; _ref]) - else - let vms = Db.VM_appliance.get_VMs ~__context ~self in - if vms = [] then - Some (Api_errors.operation_not_allowed, ["Appliance has no VMs."]) - else (* Allow the op if any VMs are in a state where the op makes sense. *) - let power_states = - List.map (fun vm -> Db.VM.get_power_state ~__context ~self:vm) vms - in - let predicate, error = - match op with - (* Can start if any are halted. *) - | `start -> - ( (fun power_state -> power_state = `Halted) - , "There are no halted VMs in this appliance." - ) - (* Can clean_shutdown if any are running. *) - | `clean_shutdown -> - ( (fun power_state -> power_state = `Running) - , "There are no running VMs in this appliance." - ) - (* Can hard_shutdown/shutdown if any are not halted. *) - | `hard_shutdown | `shutdown -> - ( (fun power_state -> power_state <> `Halted) - , "All VMs in this appliance are halted." - ) - in - if List.exists predicate power_states then - None + match current_ops with + | (op_ref, op_type) :: _ -> + Some + ( Api_errors.other_operation_in_progress + , [ + "VM_appliance" + ; _ref + ; API.vm_appliance_operation_to_string op_type + ; op_ref + ] + ) + | [] -> + let vms = Db.VM_appliance.get_VMs ~__context ~self in + if vms = [] then + Some (Api_errors.operation_not_allowed, ["Appliance has no VMs."]) else - Some (Api_errors.operation_not_allowed, [error]) + (* Allow the op if any VMs are in a state where the op makes sense. *) + let power_states = + List.map (fun vm -> Db.VM.get_power_state ~__context ~self:vm) vms + in + let predicate, error = + match op with + (* Can start if any are halted. *) + | `start -> + ( (fun power_state -> power_state = `Halted) + , "There are no halted VMs in this appliance." + ) + (* Can clean_shutdown if any are running. *) + | `clean_shutdown -> + ( (fun power_state -> power_state = `Running) + , "There are no running VMs in this appliance." + ) + (* Can hard_shutdown/shutdown if any are not halted. *) + | `hard_shutdown | `shutdown -> + ( (fun power_state -> power_state <> `Halted) + , "All VMs in this appliance are halted." + ) + in + if List.exists predicate power_states then + None + else + Some (Api_errors.operation_not_allowed, [error]) let assert_operation_valid ~__context ~self ~(op : API.vm_appliance_operation) = let all = Db.VM_appliance.get_record_internal ~__context ~self in diff --git a/ocaml/xapi/xapi_vm_helpers.ml b/ocaml/xapi/xapi_vm_helpers.ml index 859591c74a8..551a292f396 100644 --- a/ocaml/xapi/xapi_vm_helpers.ml +++ b/ocaml/xapi/xapi_vm_helpers.ml @@ -497,7 +497,7 @@ let has_non_allocated_vgpus ~__context ~self = * 4. Remove the list head from the remainding vGPU list of the VM * 5. Repeat step 2-4 until fail or the remainding list is empty * 6. Return success - * *) + *) let assert_gpus_available ~__context ~self ~host = let vgpus = Db.VM.get_VGPUs ~__context ~self in let vGPU_structs = List.map (Vgpuops.vgpu_of_ref ~__context) vgpus in @@ -1304,9 +1304,9 @@ let allowed_VBD_devices_HVM_floppy = (fun x -> Device_number.(make Floppy ~disk:x ~partition:0)) (inclusive_range 0 1) -let allowed_VIF_devices_HVM = vif_inclusive_range 0 6 +let allowed_VIF_devices_HVM = vif_inclusive_range 0 15 -let allowed_VIF_devices_PV = vif_inclusive_range 0 6 +let allowed_VIF_devices_PV = vif_inclusive_range 0 15 (** [possible_VBD_devices_of_string s] returns a list of Device_number.t which represent possible interpretations of [s]. *) @@ -1465,6 +1465,7 @@ let copy_guest_metrics ~__context ~vm = ~memory:all.API.vM_guest_metrics_memory ~disks:all.API.vM_guest_metrics_disks ~networks:all.API.vM_guest_metrics_networks + ~services:all.API.vM_guest_metrics_services ~pV_drivers_detected:all.API.vM_guest_metrics_PV_drivers_detected ~other:all.API.vM_guest_metrics_other ~last_updated:all.API.vM_guest_metrics_last_updated @@ -1671,8 +1672,10 @@ let ensure_device_model_profile_present ~__context ~domain_type ~is_a_template let trad = Vm_platform.(device_model, fallback_device_model_stage_1) in if is_a_template then platform - else if (not needs_qemu) || List.mem_assoc Vm_platform.device_model platform - then (* upgrade existing Device Model entry *) + else if + (not needs_qemu) || List.mem_assoc Vm_platform.device_model platform + (* upgrade existing Device Model entry *) + then platform |> List.map (fun entry -> if entry = trad then default else entry) else (* only add device-model to an HVM VM platform if it is not already there *) diff --git a/ocaml/xapi/xapi_vm_lifecycle.ml b/ocaml/xapi/xapi_vm_lifecycle.ml index 9ab13f79b54..b9cc6b884b4 100644 --- a/ocaml/xapi/xapi_vm_lifecycle.ml +++ b/ocaml/xapi/xapi_vm_lifecycle.ml @@ -61,6 +61,7 @@ let allowed_power_states ~__context ~vmr ~(op : API.vm_operations) = | `send_sysrq | `send_trigger | `snapshot_with_quiesce + | `sysprep | `suspend -> [`Running] | `changing_dynamic_range -> @@ -151,6 +152,12 @@ let has_feature ~vmgmr ~feature = try List.assoc feature other = "1" with Not_found -> false ) +let get_feature ~vmgmr ~feature = + Option.bind vmgmr (fun gmr -> + let other = gmr.Db_actions.vM_guest_metrics_other in + List.assoc_opt feature other + ) + (* Returns `true` only if we are certain that the VM has booted PV (if there * is no metrics record, then we can't tell) *) let has_definitely_booted_pv ~vmmr = @@ -166,45 +173,58 @@ let has_definitely_booted_pv ~vmmr = ) (** Return an error iff vmr is an HVM guest and lacks a needed feature. - * Note: it turned out that the Windows guest agent does not write "feature-suspend" - * on resume (only on startup), so we cannot rely just on that flag. We therefore - * add a clause that enables all features when PV drivers are present using the - * old-style check. + + * Note: The FreeBSD driver used by NetScaler supports all power actions. + * However, older versions of the FreeBSD driver do not explicitly advertise + * these support. As a result, xapi does not attempt to signal these power + * actions. To address this as a workaround, all power actions should be + * permitted for FreeBSD guests. + + * Additionally, VMs with an explicit `data/cant_suspend_reason` set aren't + * allowed to suspend, which would crash Windows and other UEFI VMs. + * The "strict" param should be true for determining the allowed_operations list * (which is advisory only) and false (more permissive) when we are potentially about * to perform an operation. This makes a difference for ops that require the guest to * react helpfully. *) let check_op_for_feature ~__context ~vmr:_ ~vmmr ~vmgmr ~power_state ~op ~ref ~strict = - if + let implicit_support = power_state <> `Running (* PV guests offer support implicitly *) || has_definitely_booted_pv ~vmmr || Xapi_pv_driver_version.(has_pv_drivers (of_guest_metrics vmgmr)) (* Full PV drivers imply all features *) - then - None - else - let some_err e = Some (e, [Ref.string_of ref]) in - let lack_feature feature = not (has_feature ~vmgmr ~feature) in - match op with - | `clean_shutdown - when strict - && lack_feature "feature-shutdown" - && lack_feature "feature-poweroff" -> - some_err Api_errors.vm_lacks_feature - | `clean_reboot - when strict - && lack_feature "feature-shutdown" - && lack_feature "feature-reboot" -> - some_err Api_errors.vm_lacks_feature - | `changing_VCPUs_live when lack_feature "feature-vcpu-hotplug" -> - some_err Api_errors.vm_lacks_feature - | (`suspend | `checkpoint | `pool_migrate | `migrate_send) - when strict && lack_feature "feature-suspend" -> + in + let some_err e = Some (e, [Ref.string_of ref]) in + let lack_feature feature = not (has_feature ~vmgmr ~feature) in + match op with + | `suspend | `checkpoint | `pool_migrate | `migrate_send -> ( + match get_feature ~vmgmr ~feature:"data-cant-suspend-reason" with + | Some reason -> + Some (Api_errors.vm_non_suspendable, [Ref.string_of ref; reason]) + | None + when (not implicit_support) && strict && lack_feature "feature-suspend" -> some_err Api_errors.vm_lacks_feature - | _ -> + | None -> None + ) + | _ when implicit_support -> + None + | `clean_shutdown + when strict + && lack_feature "feature-shutdown" + && lack_feature "feature-poweroff" -> + some_err Api_errors.vm_lacks_feature + | `clean_reboot + when strict + && lack_feature "feature-shutdown" + && lack_feature "feature-reboot" -> + some_err Api_errors.vm_lacks_feature + | `changing_VCPUs_live when lack_feature "feature-vcpu-hotplug" -> + some_err Api_errors.vm_lacks_feature + | _ -> + None (* N.B. In the pattern matching above, "pat1 | pat2 | pat3" counts as * one pattern, and the whole thing can be guarded by a "when" clause. *) @@ -269,42 +289,26 @@ let report_power_state_error ~__context ~vmr ~power_state ~op ~ref_str = Some (Api_errors.vm_bad_power_state, [ref_str; expected; actual]) let report_concurrent_operations_error ~current_ops ~ref_str = - let current_ops_str = + let current_ops_ref_str, current_ops_str = + let op_to_str = Record_util.vm_operation_to_string in + let ( >> ) f g x = g (f x) in match current_ops with | [] -> failwith "No concurrent operation to report" - | [(_, cop)] -> - Record_util.vm_operation_to_string cop + | [(op_ref, cop)] -> + (op_ref, op_to_str cop) | l -> - "{" - ^ String.concat "," - (List.map Record_util.vm_operation_to_string (List.map snd l)) - ^ "}" + ( Printf.sprintf "{%s}" (String.concat "," (List.map fst l)) + , Printf.sprintf "{%s}" + (String.concat "," (List.map (snd >> op_to_str) l)) + ) in Some - (Api_errors.other_operation_in_progress, ["VM." ^ current_ops_str; ref_str]) + ( Api_errors.other_operation_in_progress + , ["VM"; ref_str; current_ops_str; current_ops_ref_str] + ) let check_vgpu ~__context ~op ~ref_str ~vgpus ~power_state = - let is_migratable vgpu = - try - (* Prevent VMs with VGPU from being migrated from pre-Jura to Jura and later hosts during RPU *) - let host_from = - Db.VGPU.get_VM ~__context ~self:vgpu |> fun vm -> - Db.VM.get_resident_on ~__context ~self:vm |> fun host -> - Helpers.LocalObject host - in - (* true if platform version of host_from more than inverness' 2.4.0 *) - Helpers.( - compare_int_lists - (version_of ~__context host_from) - platform_version_inverness - ) - > 0 - with e -> - debug "is_migratable: %s" (ExnHelper.string_of_exn e) ; - (* best effort: yes if not possible to decide *) - true - in let is_suspendable vgpu = Db.VGPU.get_type ~__context ~self:vgpu |> fun self -> Db.VGPU_type.get_implementation ~__context ~self |> function @@ -319,9 +323,7 @@ let check_vgpu ~__context ~op ~ref_str ~vgpus ~power_state = match op with | `migrate_send when power_state = `Halted -> None - | (`pool_migrate | `migrate_send) - when List.for_all is_migratable vgpus && List.for_all is_suspendable vgpus - -> + | (`pool_migrate | `migrate_send) when List.for_all is_suspendable vgpus -> None | `checkpoint when power_state = `Suspended -> None @@ -393,8 +395,7 @@ let nested_virt ~__context vm metrics = let key = "nested-virt" in Vm_platform.is_true ~key ~platformdata ~default:false -let is_mobile ~__context vm strict = - let metrics = Db.VM.get_metrics ~__context ~self:vm in +let is_mobile ~__context vm strict metrics = (not @@ nomigrate ~__context vm metrics) && (not @@ nested_virt ~__context vm metrics) || not strict @@ -447,6 +448,42 @@ let check_operation_error ~__context ~ref = vmr.Db_actions.vM_VBDs |> List.filter (Db.is_valid_ref __context) in + let current_ops = vmr.Db_actions.vM_current_operations in + let metrics = Db.VM.get_metrics ~__context ~self:ref in + let is_nested_virt = nested_virt ~__context ref metrics in + let is_domain_zero = + Db.VM.get_by_uuid ~__context ~uuid:vmr.Db_actions.vM_uuid + |> Helpers.is_domain_zero ~__context + in + let vdis_reset_and_caching = + List.filter_map + (fun vdi -> + try + let sm_config = Db.VDI.get_sm_config ~__context ~self:vdi in + Some + ( List.assoc_opt "on_boot" sm_config = Some "reset" + , bool_of_assoc "caching" sm_config + ) + with _ -> None + ) + vdis + in + let sriov_pcis = nvidia_sriov_pcis ~__context vmr.Db_actions.vM_VGPUs in + let is_not_sriov pci = not @@ List.mem pci sriov_pcis in + let pcis = vmr.Db_actions.vM_attached_PCIs in + let is_appliance_valid = + Db.is_valid_ref __context vmr.Db_actions.vM_appliance + in + let is_protection_policy_valid = + Db.is_valid_ref __context vmr.Db_actions.vM_protection_policy + in + let rolling_upgrade_in_progress = + Helpers.rolling_upgrade_in_progress ~__context + in + let is_snapshort_schedule_valid = + Db.is_valid_ref __context vmr.Db_actions.vM_snapshot_schedule + in + fun ~op ~strict -> let current_error = None in let check c f = match c with Some e -> Some e | None -> f () in @@ -470,7 +507,6 @@ let check_operation_error ~__context ~ref = (* if other operations are in progress, check that the new operation is allowed concurrently with them. *) let current_error = check current_error (fun () -> - let current_ops = vmr.Db_actions.vM_current_operations in if List.length current_ops <> 0 && not (is_allowed_concurrently ~op ~current_ops) @@ -520,18 +556,16 @@ let check_operation_error ~__context ~ref = check current_error (fun () -> match op with | (`suspend | `checkpoint | `pool_migrate | `migrate_send) - when not (is_mobile ~__context ref strict) -> + when not (is_mobile ~__context ref strict metrics) -> Some (Api_errors.vm_is_immobile, [ref_str]) | _ -> None ) in let current_error = - let metrics = Db.VM.get_metrics ~__context ~self:ref in check current_error (fun () -> match op with - | `changing_dynamic_range - when nested_virt ~__context ref metrics && strict -> + | `changing_dynamic_range when is_nested_virt && strict -> Some (Api_errors.vm_is_using_nested_virt, [ref_str]) | _ -> None @@ -542,13 +576,7 @@ let check_operation_error ~__context ~ref = (* make use of the Helpers.ballooning_enabled_for_vm function. *) let current_error = check current_error (fun () -> - let vm_ref () = - Db.VM.get_by_uuid ~__context ~uuid:vmr.Db_actions.vM_uuid - in - if - (op = `changing_VCPUs || op = `destroy) - && Helpers.is_domain_zero ~__context (vm_ref ()) - then + if (op = `changing_VCPUs || op = `destroy) && is_domain_zero then Some ( Api_errors.operation_not_allowed , ["This operation is not allowed on dom0"] @@ -594,19 +622,6 @@ let check_operation_error ~__context ~ref = (* Check for an error due to VDI caching/reset behaviour *) let current_error = check current_error (fun () -> - let vdis_reset_and_caching = - List.filter_map - (fun vdi -> - try - let sm_config = Db.VDI.get_sm_config ~__context ~self:vdi in - Some - ( List.assoc_opt "on_boot" sm_config = Some "reset" - , bool_of_assoc "caching" sm_config - ) - with _ -> None - ) - vdis - in if op = `checkpoint || op = `snapshot @@ -635,9 +650,6 @@ let check_operation_error ~__context ~ref = (* If a PCI device is passed-through, check if the operation is allowed *) let current_error = check current_error @@ fun () -> - let sriov_pcis = nvidia_sriov_pcis ~__context vmr.Db_actions.vM_VGPUs in - let is_not_sriov pci = not @@ List.mem pci sriov_pcis in - let pcis = vmr.Db_actions.vM_attached_PCIs in match op with | (`suspend | `checkpoint | `pool_migrate | `migrate_send) when List.exists is_not_sriov pcis -> @@ -669,7 +681,7 @@ let check_operation_error ~__context ~ref = (* Check for errors caused by VM being in an appliance. *) let current_error = check current_error (fun () -> - if Db.is_valid_ref __context vmr.Db_actions.vM_appliance then + if is_appliance_valid then check_appliance ~vmr ~op ~ref_str else None @@ -678,7 +690,7 @@ let check_operation_error ~__context ~ref = (* Check for errors caused by VM being assigned to a protection policy. *) let current_error = check current_error (fun () -> - if Db.is_valid_ref __context vmr.Db_actions.vM_protection_policy then + if is_protection_policy_valid then check_protection_policy ~vmr ~op ~ref_str else None @@ -687,7 +699,7 @@ let check_operation_error ~__context ~ref = (* Check for errors caused by VM being assigned to a snapshot schedule. *) let current_error = check current_error (fun () -> - if Db.is_valid_ref __context vmr.Db_actions.vM_snapshot_schedule then + if is_snapshort_schedule_valid then check_snapshot_schedule ~vmr ~ref_str op else None @@ -711,7 +723,7 @@ let check_operation_error ~__context ~ref = let current_error = check current_error (fun () -> if - Helpers.rolling_upgrade_in_progress ~__context + rolling_upgrade_in_progress && not (List.mem op Xapi_globs.rpu_allowed_vm_operations) then Some (Api_errors.not_supported_during_upgrade, []) @@ -777,12 +789,9 @@ let allowable_ops = List.filter (fun op -> not (List.mem op ignored_ops)) API.vm_operations__all let update_allowed_operations ~__context ~self = + let check' = check_operation_error ~__context ~ref:self in let check accu op = - match check_operation_error ~__context ~ref:self ~op ~strict:true with - | None -> - op :: accu - | Some _err -> - accu + match check' ~op ~strict:true with None -> op :: accu | Some _err -> accu in let allowed = List.fold_left check [] allowable_ops in (* FIXME: need to be able to deal with rolling-upgrade for orlando as well *) @@ -856,8 +865,6 @@ let force_state_reset_keep_current_operations ~__context ~self ~value:state = if state = `Suspended then remove_pending_guidance ~__context ~self ~value:`restart_device_model ; if state = `Halted then ( - remove_pending_guidance ~__context ~self ~value:`restart_device_model ; - remove_pending_guidance ~__context ~self ~value:`restart_vm ; (* mark all devices as disconnected *) List.iter (fun vbd -> @@ -899,7 +906,9 @@ let force_state_reset_keep_current_operations ~__context ~self ~value:state = ) (Db.VM.get_VUSBs ~__context ~self) ; (* Blank the requires_reboot flag *) - Db.VM.set_requires_reboot ~__context ~self ~value:false + Db.VM.set_requires_reboot ~__context ~self ~value:false ; + remove_pending_guidance ~__context ~self ~value:`restart_device_model ; + remove_pending_guidance ~__context ~self ~value:`restart_vm ) ; (* Do not clear resident_on for VM and VGPU in a checkpoint operation *) if diff --git a/ocaml/xapi/xapi_vm_migrate.ml b/ocaml/xapi/xapi_vm_migrate.ml index b0a7d17774d..07406014afb 100644 --- a/ocaml/xapi/xapi_vm_migrate.ml +++ b/ocaml/xapi/xapi_vm_migrate.ml @@ -244,7 +244,7 @@ let assert_licensed_storage_motion ~__context = let rec migrate_with_retries ~__context ~queue_name ~max ~try_no ~dbg:_ ~vm_uuid ~xenops_vdi_map ~xenops_vif_map ~xenops_vgpu_map ~xenops_url ~compress - ~verify_cert = + ~verify_cert ~localhost_migration = let open Xapi_xenops_queue in let module Client = (val make_client queue_name : XENOPS) in let dbg = Context.string_of_task_and_tracing __context in @@ -254,7 +254,7 @@ let rec migrate_with_retries ~__context ~queue_name ~max ~try_no ~dbg:_ ~vm_uuid progress := "Client.VM.migrate" ; let t1 = Client.VM.migrate dbg vm_uuid xenops_vdi_map xenops_vif_map - xenops_vgpu_map xenops_url compress verify_dest + xenops_vgpu_map xenops_url compress verify_dest localhost_migration in progress := "sync_with_task" ; ignore (Xapi_xenops.sync_with_task __context queue_name t1) @@ -281,7 +281,7 @@ let rec migrate_with_retries ~__context ~queue_name ~max ~try_no ~dbg:_ ~vm_uuid (Printexc.to_string e) !progress try_no max ; migrate_with_retries ~__context ~queue_name ~max ~try_no:(try_no + 1) ~dbg ~vm_uuid ~xenops_vdi_map ~xenops_vif_map ~xenops_vgpu_map - ~xenops_url ~compress ~verify_cert + ~xenops_url ~compress ~verify_cert ~localhost_migration (* Something else went wrong *) | e -> debug @@ -374,7 +374,8 @@ let pool_migrate ~__context ~vm ~host ~options = Pool_features.assert_enabled ~__context ~f:Features.Xen_motion ; let dbg = Context.string_of_task __context in let localhost = Helpers.get_localhost ~__context in - if host = localhost then + let localhost_migration = host = localhost in + if localhost_migration then info "This is a localhost migration" ; let open Xapi_xenops_queue in let queue_name = queue_of_vm ~__context ~self:vm in @@ -431,7 +432,7 @@ let pool_migrate ~__context ~vm ~host ~options = let verify_cert = Stunnel_client.pool () in migrate_with_retry ~__context ~queue_name ~dbg ~vm_uuid ~xenops_vdi_map:[] ~xenops_vif_map:[] ~xenops_vgpu_map - ~xenops_url ~compress ~verify_cert ; + ~xenops_url ~compress ~verify_cert ~localhost_migration ; (* Delete all record of this VM locally (including caches) *) Xapi_xenops.Xenopsd_metadata.delete ~__context vm_uuid ) @@ -488,6 +489,11 @@ let pool_migrate_complete ~__context ~vm ~host:_ = ~value:`restart_device_model ; let dbg = Context.string_of_task __context in let queue_name = Xapi_xenops_queue.queue_of_vm ~__context ~self:vm in + (* Reset the state, which will update allowed operations, clear reservations + for halted VMs, disconnect devices *) + let power_state = Db.VM.get_power_state ~__context ~self:vm in + Xapi_vm_lifecycle.force_state_reset_keep_current_operations ~__context + ~self:vm ~value:power_state ; if Xapi_xenops.vm_exists_in_xenopsd queue_name dbg id then ( remove_stale_pcis ~__context ~vm ; Xapi_xenops.set_resident_on ~__context ~self:vm ; @@ -1019,24 +1025,32 @@ let vdi_copy_fun __context dbg vdi_map remote is_intra_pool remote_vdis so_far (* Though we have no intention of "write", here we use the same mode as the associated VBD on a mirrored VDIs (i.e. always RW). This avoids problem when we need to start/stop the VM along the migration. *) - let read_write = true in - (* DP set up is only essential for MIRROR.start/stop due to their open ended pattern. - It's not necessary for copy which will take care of that itself. *) - ignore - (SMAPI.VDI.attach3 dbg new_dp vconf.sr vconf.location vconf.mirror_vm - read_write - ) ; - SMAPI.VDI.activate3 dbg new_dp vconf.sr vconf.location vconf.mirror_vm ; let id = - Storage_migrate.State.mirror_id_of (vconf.sr, vconf.location) + Storage_migrate_helper.State.mirror_id_of (vconf.sr, vconf.location) + in + let live_vm = + match Db.VDI.get_VBDs ~__context ~self:vconf.vdi with + | [] -> + Storage_migrate_helper.failwith_fmt + "VDI %s does not have a corresponding VBD" + (Ref.string_of vconf.vdi) + | vbd_ref :: _ -> + (* XX Is it possible that this VDI might be used as multiple VBDs attached to different VMs? *) + let vm_ref = Db.VBD.get_VM ~__context ~self:vbd_ref in + let domid = + Db.VM.get_domid ~__context ~self:vm_ref |> Int64.to_string + in + Vm.of_string domid in - debug "%s mirror_vm is %s copy_vm is %s" __FUNCTION__ + debug "%s mirror_vm is %s copy_vm is %s live_vm is %s" __FUNCTION__ (Vm.string_of vconf.mirror_vm) - (Vm.string_of vconf.copy_vm) ; + (Vm.string_of vconf.copy_vm) + (Vm.string_of live_vm) ; (* Layering violation!! *) ignore (Storage_access.register_mirror __context id) ; - SMAPI.DATA.MIRROR.start dbg vconf.sr vconf.location new_dp - vconf.mirror_vm vconf.copy_vm remote.sm_url dest_sr is_intra_pool + Storage_migrate.start ~dbg ~sr:vconf.sr ~vdi:vconf.location ~dp:new_dp + ~mirror_vm:vconf.mirror_vm ~copy_vm:vconf.copy_vm ~live_vm + ~url:remote.sm_url ~dest:dest_sr ~verify_dest:is_intra_pool in let mapfn x = let total = Int64.to_float total_size in @@ -1091,7 +1105,7 @@ let vdi_copy_fun __context dbg vdi_map remote is_intra_pool remote_vdis so_far | Some mid -> ignore (Storage_access.unregister_mirror mid) ; let m = SMAPI.DATA.MIRROR.stat dbg mid in - (try SMAPI.DATA.MIRROR.stop dbg mid with _ -> ()) ; + (try Storage_migrate.stop ~dbg ~id:mid with _ -> ()) ; m.Mirror.failed | None -> false @@ -1585,7 +1599,8 @@ let migrate_send' ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~vgpu_map let dbg = Context.string_of_task __context in migrate_with_retry ~__context ~queue_name ~dbg ~vm_uuid ~xenops_vdi_map ~xenops_vif_map ~xenops_vgpu_map - ~xenops_url:remote.xenops_url ~compress ~verify_cert ; + ~xenops_url:remote.xenops_url ~compress ~verify_cert + ~localhost_migration:is_same_host ; Xapi_xenops.Xenopsd_metadata.delete ~__context vm_uuid ) with @@ -1778,14 +1793,6 @@ let assert_can_migrate ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~options let vbds = Db.VM.get_VBDs ~__context ~self:vm in let vms_vdis = List.filter_map (vdi_filter __context true) vbds in check_vdi_map ~__context vms_vdis vdi_map ; - (* Prevent SXM when the VM has a VDI on which changed block tracking is enabled *) - List.iter - (fun vconf -> - let vdi = vconf.vdi in - if Db.VDI.get_cbt_enabled ~__context ~self:vdi then - raise Api_errors.(Server_error (vdi_cbt_enabled, [Ref.string_of vdi])) - ) - vms_vdis ; (* operations required for migration *) let required_src_sr_operations = Smint.Feature.[Vdi_snapshot; Vdi_mirror] in let required_dst_sr_operations = @@ -1919,6 +1926,9 @@ let assert_can_migrate ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~options ) ) ; (* check_vdi_map above has already verified that all VDIs are in the vdi_map *) + (* Previously there was also a check that none of the VDIs have CBT enabled. + This is unnecessary, we only need to check that none of the VDIs that + *will be moved* have CBT enabled. *) assert_can_migrate_vdis ~__context ~vdi_map let assert_can_migrate_sender ~__context ~vm ~dest ~live:_ ~vdi_map:_ ~vif_map:_ diff --git a/ocaml/xapi/xapi_vm_snapshot.ml b/ocaml/xapi/xapi_vm_snapshot.ml index a7fc76a8417..fe7c7bed9dd 100644 --- a/ocaml/xapi/xapi_vm_snapshot.ml +++ b/ocaml/xapi/xapi_vm_snapshot.ml @@ -167,8 +167,10 @@ let copy_vm_fields ~__context ~metadata ~dst ~do_not_copy ~overrides = debug "copying metadata into %s" (Ref.string_of dst) ; let db = Context.database_of __context in let module DB = - (val Xapi_database.Db_cache.get db : Xapi_database.Db_interface.DB_ACCESS) - in + Xapi_database.Db_interface_compat.OfCached + (( val Xapi_database.Db_cache.get db + : Xapi_database.Db_interface.DB_ACCESS2 + )) in List.iter (fun (key, value) -> let value = Option.value ~default:value (List.assoc_opt key overrides) in diff --git a/ocaml/xapi/xapi_vmss.ml b/ocaml/xapi/xapi_vmss.ml index 611b729f6c4..ff32580365d 100644 --- a/ocaml/xapi/xapi_vmss.ml +++ b/ocaml/xapi/xapi_vmss.ml @@ -174,7 +174,7 @@ let set_type ~__context ~self ~value = (* Workaround for `param-set` calling `remove_from_schedule` first then `add_to_schedule` * In case `value` supplied is invalid for `add_to_schedule` it must not remove the key * We need the cache the original value before removing the key - * *) + *) let schedule_backup = ref [] let remove_from_schedule ~__context ~self ~key = diff --git a/ocaml/xapi/xapi_vusb_helpers.ml b/ocaml/xapi/xapi_vusb_helpers.ml index 9b1870cf141..19298735a06 100644 --- a/ocaml/xapi/xapi_vusb_helpers.ml +++ b/ocaml/xapi/xapi_vusb_helpers.ml @@ -16,8 +16,6 @@ open Xapi_stdext_std.Xstringext module D = Debug.Make (struct let name = "xapi_vusb_helpers" end) -open D - (**************************************************************************************) (* current/allowed operations checking *) @@ -31,11 +29,11 @@ let valid_operations ~__context record _ref' : table = let _ref = Ref.string_of _ref' in let current_ops = record.Db_actions.vUSB_current_operations in (* Policy: - * one operation at a time - * a running VM can do plug depending on whether the VUSB is already attached to VM. - * a running VM can do unplug depending on whether the VUSB is already attached to VM. - * - *) + * one operation at a time + * a running VM can do plug depending on whether the VUSB is already attached to VM. + * a running VM can do unplug depending on whether the VUSB is already attached to VM. + * + *) let table : table = Hashtbl.create 10 in List.iter (fun x -> Hashtbl.replace table x None) all_ops ; let set_errors (code : string) (params : string list) @@ -48,18 +46,20 @@ let valid_operations ~__context record _ref' : table = ops in (* Any current_operations preclude everything else *) - if current_ops <> [] then ( - debug "No operations are valid because current-operations = [ %s ]" - (String.concat "; " - (List.map - (fun (task, op) -> task ^ " -> " ^ vusb_operations_to_string op) - current_ops - ) - ) ; - let concurrent_op = snd (List.hd current_ops) in - set_errors Api_errors.other_operation_in_progress - ["VUSB"; _ref; vusb_operations_to_string concurrent_op] - all_ops + ( if current_ops <> [] then + let concurrent_op_refs, concurrent_op_types = + List.fold_left + (fun (refs, types) (ref, op) -> + (ref :: refs, vusb_operations_to_string op :: types) + ) + ([], []) current_ops + in + let format x = Printf.sprintf "{%s}" (String.concat "; " x) in + let concurrent_op_refs = format concurrent_op_refs in + let concurrent_op_types = format concurrent_op_types in + set_errors Api_errors.other_operation_in_progress + ["VUSB"; _ref; concurrent_op_types; concurrent_op_refs] + all_ops ) ; let vm = Db.VUSB.get_VM ~__context ~self:_ref' in let power_state = Db.VM.get_power_state ~__context ~self:vm in diff --git a/ocaml/xapi/xapi_xenops.ml b/ocaml/xapi/xapi_xenops.ml index 11e7f7c941f..9b12bcec5a6 100644 --- a/ocaml/xapi/xapi_xenops.ml +++ b/ocaml/xapi/xapi_xenops.ml @@ -33,7 +33,10 @@ let finally = Xapi_stdext_pervasives.Pervasiveext.finally let rpc_of t x = Rpcmarshal.marshal t.Rpc.Types.ty x +let ( let@ ) f x = f x + let check_power_state_is ~__context ~self ~expected = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in if expected <> `Running then Xapi_vm_lifecycle.assert_final_power_state_is ~__context ~self ~expected else @@ -48,6 +51,8 @@ let check_power_state_is ~__context ~self ~expected = (Record_util.vm_power_state_to_lowercase_string expected) let event_wait queue_name dbg ?from p = + Debug_info.with_dbg ~name:__FUNCTION__ ~dbg @@ fun di -> + let dbg = Debug_info.to_string di in let finished = ref false in let event_id = ref from in let module Client = (val make_client queue_name : XENOPS) in @@ -58,6 +63,8 @@ let event_wait queue_name dbg ?from p = done let task_ended queue_name dbg id = + Debug_info.with_dbg ~name:__FUNCTION__ ~dbg @@ fun di -> + let dbg = Debug_info.to_string di in let module Client = (val make_client queue_name : XENOPS) in match (Client.TASK.stat dbg id).Task.state with | Task.Completed _ | Task.Failed _ -> @@ -66,6 +73,8 @@ let task_ended queue_name dbg id = false let wait_for_task queue_name dbg id = + Debug_info.with_dbg ~name:__FUNCTION__ ~dbg @@ fun di -> + let dbg = Debug_info.to_string di in let module Client = (val make_client queue_name : XENOPS) in let finished = function | Dynamic.Task id' -> @@ -106,6 +115,7 @@ let xenops_vdi_locator_of sr vdi = (Storage_interface.Vdi.string_of vdi) let xenops_vdi_locator ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let sr = Db.VDI.get_SR ~__context ~self in let sr_uuid = Db.SR.get_uuid ~__context ~self:sr in let vdi_location = Db.VDI.get_location ~__context ~self in @@ -114,9 +124,11 @@ let xenops_vdi_locator ~__context ~self = (Storage_interface.Vdi.of_string vdi_location) let disk_of_vdi ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in try Some (VDI (xenops_vdi_locator ~__context ~self)) with _ -> None let vdi_of_disk ~__context x = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in match String.split ~limit:2 '/' x with | [sr_uuid; location] -> ( let open Xapi_database.Db_filter_types in @@ -151,6 +163,7 @@ let backend_of_network net = (* PR-1255 *) let backend_of_vif ~__context ~vif = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vif_record = Db.VIF.get_record_internal ~__context ~self:vif in let net = Db.Network.get_record ~__context ~self:vif_record.Db_actions.vIF_network @@ -255,6 +268,7 @@ let firmware_of_vm vm = default_firmware let varstore_rm_with_sandbox ~__context ~vm_uuid f = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in let domid = 0 in let chroot, socket_path = @@ -265,6 +279,7 @@ let varstore_rm_with_sandbox ~__context ~vm_uuid f = (fun () -> Xenops_sandbox.Varstore_guard.stop dbg ~domid ~vm_uuid) let nvram_post_clone ~__context ~self ~uuid = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in match Db.VM.get_NVRAM ~__context ~self with | [] -> () @@ -292,6 +307,7 @@ let nvram_post_clone ~__context ~self ~uuid = debug "VM %s: NVRAM changed due to clone" uuid let rtc_timeoffset_of_vm ~__context (vm, vm_t) vbds = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let timeoffset = string vm_t.API.vM_platform "0" Vm_platform.timeoffset in (* If any VDI has on_boot = reset AND has a VDI.other_config:timeoffset then we override the platform/timeoffset. This is needed because windows @@ -365,6 +381,7 @@ let kernel_path filename = Ok real_path let builder_of_vm ~__context (vmref, vm) timeoffset pci_passthrough vgpu = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let open Vm in let video_mode = if vgpu then @@ -525,6 +542,7 @@ let builder_of_vm ~__context (vmref, vm) timeoffset pci_passthrough vgpu = Helpers.internal_error "invalid boot configuration" let list_net_sriov_vf_pcis ~__context ~vm = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in vm.API.vM_VIFs |> List.filter (fun self -> Db.VIF.get_currently_attached ~__context ~self) |> List.filter_map (fun vif -> @@ -535,10 +553,15 @@ let list_net_sriov_vf_pcis ~__context ~vm = None ) +module StringMap = Map.Make (String) + +let sr_version_cache = ref StringMap.empty + module MD = struct (** Convert between xapi DB records and xenopsd records *) let of_vbd ~__context ~vm ~vbd = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let hvm = match vm.API.vM_domain_type with | `hvm -> @@ -665,6 +688,31 @@ module MD = struct ) else disk_of_vdi ~__context ~self:vbd.API.vBD_VDI in + let can_attach_early = + let sr_opt = + try Some (Db.VDI.get_SR ~__context ~self:vbd.API.vBD_VDI) + with _ -> None + in + match sr_opt with + | Some sr -> ( + let sr_key = Ref.string_of sr in + match StringMap.find_opt sr_key !sr_version_cache with + | Some cached_api_version -> + Version.String.ge cached_api_version "3.0" + | None -> ( + match Xapi_sr.required_api_version_of_sr ~__context ~sr with + | Some api_version -> + sr_version_cache := + StringMap.add sr_key api_version !sr_version_cache ; + Version.String.ge api_version "3.0" + | None -> + false + ) + ) + | None -> + (* If we can't get the SR, we have to default to false *) + false + in { id= (vm.API.vM_uuid, Device_number.to_linux_device device_number) ; position= Some device_number @@ -688,9 +736,11 @@ module MD = struct ( try Db.VDI.get_on_boot ~__context ~self:vbd.API.vBD_VDI = `persist with _ -> true ) + ; can_attach_early } let of_pvs_proxy ~__context vif proxy = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let site = Db.PVS_proxy.get_site ~__context ~self:proxy in let site_uuid = Db.PVS_site.get_uuid ~__context ~self:site in let servers = Db.PVS_site.get_servers ~__context ~self:site in @@ -710,6 +760,7 @@ module MD = struct (site_uuid, servers, interface) let of_vif ~__context ~vm ~vif:(vif_ref, vif) = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let net = Db.Network.get_record ~__context ~self:vif.API.vIF_network in let net_mtu = Int64.to_int net.API.network_MTU in let mtu = @@ -853,6 +904,7 @@ module MD = struct } let pcis_of_vm ~__context (vmref, vm) = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vgpu_pcidevs = Vgpuops.list_pcis_for_passthrough ~__context ~vm:vmref in let devs = List.concat_map (fun (_, dev) -> dev) (Pciops.sort_pcidevs vgpu_pcidevs) @@ -883,6 +935,7 @@ module MD = struct devs let get_target_pci_address ~__context vgpu = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let pgpu = if Db.is_valid_ref __context @@ -911,6 +964,7 @@ module MD = struct * is passed trough completely. *) let sriov_vf ~__context vgpu = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let is_sriov () = let ty = vgpu.Db_actions.vGPU_type in match Db.VGPU_type.get_implementation ~__context ~self:ty with @@ -931,6 +985,7 @@ module MD = struct Xenops_interface.Pci.address_of_string str |> fun addr -> Some addr let of_nvidia_vgpu ~__context vm vgpu = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let open Vgpu in (* Get the PCI address. *) let physical_pci_address = get_target_pci_address ~__context vgpu in @@ -967,6 +1022,7 @@ module MD = struct } let of_gvt_g_vgpu ~__context vm vgpu = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let open Vgpu in (* Get the PCI address. *) let physical_pci_address = get_target_pci_address ~__context vgpu in @@ -1007,6 +1063,7 @@ module MD = struct failwith "Intel GVT-g settings invalid" let of_mxgpu_vgpu ~__context vm vgpu = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let open Vgpu in (* Get the PCI address. *) let physical_pci_address = get_target_pci_address ~__context vgpu in @@ -1043,6 +1100,7 @@ module MD = struct failwith "AMD MxGPU settings invalid" let vgpus_of_vm ~__context (_, vm) = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in List.fold_left (fun acc vgpu -> let vgpu_record = Db.VGPU.get_record_internal ~__context ~self:vgpu in @@ -1064,6 +1122,7 @@ module MD = struct [] vm.API.vM_VGPUs let of_vusb ~__context ~vm ~pusb = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let open Vusb in try let path = pusb.API.pUSB_path in @@ -1087,6 +1146,7 @@ module MD = struct raise e let vusbs_of_vm ~__context (_, vm) = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in vm.API.vM_VUSBs |> List.map (fun self -> Db.VUSB.get_record ~__context ~self) |> List.filter (fun self -> self.API.vUSB_currently_attached) @@ -1096,6 +1156,7 @@ module MD = struct |> List.map (fun pusb -> of_vusb ~__context ~vm ~pusb) let of_vm ~__context (vmref, vm) vbds pci_passthrough vgpu = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let on_action_behaviour = function | `preserve -> [Vm.Pause] @@ -1213,7 +1274,7 @@ module MD = struct if not (List.mem_assoc Vm_platform.featureset platformdata) then let featureset = match - List.assoc_opt Xapi_globs.cpu_info_features_key + List.assoc_opt Constants.cpu_info_features_key vm.API.vM_last_boot_CPU_flags with | _ when vm.API.vM_power_state <> `Suspended -> @@ -1351,6 +1412,7 @@ module Guest_agent_features = struct auto_update_enabled @ auto_update_url let of_config ~__context config = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let open Features in let vss = let name = Features.name_of_feature VSS in @@ -1370,6 +1432,7 @@ module Guest_agent_features = struct end let apply_guest_agent_config ~__context config = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in let features = Guest_agent_features.of_config ~__context config in let module Client = (val make_client (default_xenopsd ()) : XENOPS) in @@ -1377,6 +1440,7 @@ let apply_guest_agent_config ~__context config = (* Create an instance of Metadata.t, suitable for uploading to the xenops service *) let create_metadata ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VM.get_record ~__context ~self in let vbds = List.filter @@ -1419,6 +1483,8 @@ let id_of_vm ~__context ~self = Db.VM.get_uuid ~__context ~self let vm_of_id ~__context uuid = Db.VM.get_by_uuid ~__context ~uuid let vm_exists_in_xenopsd queue_name dbg id = + Debug_info.with_dbg ~name:__FUNCTION__ ~dbg @@ fun di -> + let dbg = Debug_info.to_string di in let module Client = (val make_client queue_name : XENOPS) in Client.VM.exists dbg id @@ -1627,6 +1693,7 @@ module Xenopsd_metadata = struct (* If the VM has Xapi_globs.persist_xenopsd_md -> filename in its other_config, we persist the xenopsd metadata to a well-known location in the filesystem *) let maybe_persist_md ~__context ~self md = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let oc = Db.VM.get_other_config ~__context ~self in if List.mem_assoc Xapi_globs.persist_xenopsd_md oc then let file_path = @@ -1647,6 +1714,7 @@ module Xenopsd_metadata = struct ) let push ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in with_lock metadata_m (fun () -> let md = create_metadata ~__context ~self in let txt = md |> rpc_of Metadata.t |> Jsonrpc.to_string in @@ -1663,6 +1731,7 @@ module Xenopsd_metadata = struct ) let delete_nolock ~__context id = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in info "xenops: VM.remove %s" id ; try @@ -1687,6 +1756,7 @@ module Xenopsd_metadata = struct (* Unregisters a VM with xenopsd, and cleans up metadata and caches *) let pull ~__context id = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in with_lock metadata_m (fun () -> info "xenops: VM.export_metadata %s" id ; let dbg = Context.string_of_task_and_tracing __context in @@ -1717,9 +1787,11 @@ module Xenopsd_metadata = struct ) let delete ~__context id = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in with_lock metadata_m (fun () -> delete_nolock ~__context id) let update ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let id = id_of_vm ~__context ~self in let queue_name = queue_of_vm ~__context ~self in with_lock metadata_m (fun () -> @@ -1793,6 +1865,18 @@ module Events_from_xenopsd = struct let module Client = (val make_client queue_name : XENOPS) in let t = make () in let id = register t in + Debug_info.with_dbg + ~attributes: + [ + ("messaging.operation.name", "subscribe") + ; ("messaging.system", "event") + ; ("messaging.destination.subscription.name", vm_id) + ; ("messaging.message.id", string_of_int id) + ] + ~name:("subscribe" ^ " " ^ queue_name) + ~dbg + @@ fun di -> + let dbg = Debug_info.to_string di in debug "Client.UPDATES.inject_barrier %d" id ; Client.UPDATES.inject_barrier dbg vm_id id ; with_lock t.m (fun () -> @@ -1802,6 +1886,17 @@ module Events_from_xenopsd = struct ) let wakeup queue_name dbg id = + Debug_info.with_dbg + ~attributes: + [ + ("messaging.operation.name", "settle") + ; ("messaging.system", "event") + ; ("messaging.message.id", string_of_int id) + ] + ~name:("settle" ^ " " ^ queue_name) + ~dbg + @@ fun di -> + let dbg = Debug_info.to_string di in let module Client = (val make_client queue_name : XENOPS) in Client.UPDATES.remove_barrier dbg id ; let t = @@ -1852,572 +1947,560 @@ module Events_from_xenopsd = struct ) end -let update_vm ~__context id = - try - if Events_from_xenopsd.are_suppressed id then - debug "xenopsd event: ignoring event for VM (VM %s migrating away)" id +let update_vm_internal ~__context ~id ~self ~previous ~info ~localhost = + debug "xenopsd event: processing event for VM %s" id ; + if info = None then + debug "xenopsd event: VM state missing: assuming VM has shut down" ; + let should_update_allowed_operations = ref false in + + (* If a field (accessed by [accessor] for [Vm.state]) changed in an + update and [predicate has_changed], call [f (accessor info)] *) + let different accessor predicate f = + let a = Option.map (fun x -> accessor x) info in + let b = Option.map accessor previous in + let diff = a <> b in + if predicate diff then + Option.iter f a else - let self = Db.VM.get_by_uuid ~__context ~uuid:id in - let localhost = Helpers.get_localhost ~__context in - if Db.VM.get_resident_on ~__context ~self = localhost then - let previous = Xenops_cache.find_vm id in - let dbg = Context.string_of_task_and_tracing __context in - let module Client = - (val make_client (queue_of_vm ~__context ~self) : XENOPS) - in - let info = try Some (Client.VM.stat dbg id) with _ -> None in - if Option.map snd info <> previous then ( - debug "xenopsd event: processing event for VM %s" id ; - if info = None then - debug "xenopsd event: VM state missing: assuming VM has shut down" ; - let should_update_allowed_operations = ref false in - let different f = - let a = Option.map (fun x -> f (snd x)) info in - let b = Option.map f previous in - a <> b - in - (* Helpers to create and update guest metrics when needed *) - let lookup state key = List.assoc_opt key state.Vm.guest_agent in - let list state dir = - let dir = - if dir.[0] = '/' then - String.sub dir 1 (String.length dir - 1) - else - dir + () + in + (* Helpers to create and update guest metrics when needed *) + let lookup state key = List.assoc_opt key state.Vm.guest_agent in + let list state dir = + let dir = + if dir.[0] = '/' then + String.sub dir 1 (String.length dir - 1) + else + dir + in + let results = + List.filter_map + (fun (path, _) -> + if String.starts_with ~prefix:dir path then + let rest = + String.sub path (String.length dir) + (String.length path - String.length dir) in - let results = - List.filter_map - (fun (path, _) -> - if String.starts_with ~prefix:dir path then - let rest = - String.sub path (String.length dir) - (String.length path - String.length dir) - in - match - List.filter (fun x -> x <> "") (String.split '/' rest) - with - | x :: _ -> - Some x - | _ -> - None - else - None + match List.filter (fun x -> x <> "") (String.split '/' rest) with + | x :: _ -> + Some x + | _ -> + None + else + None + ) + state.Vm.guest_agent + |> Listext.setify + in + results + in + let create_guest_metrics_if_needed () = + let gm = Db.VM.get_guest_metrics ~__context ~self in + if gm = Ref.null then + Option.iter + (fun state -> + List.iter + (fun domid -> + try + let new_gm_ref = + Xapi_guest_agent.create_and_set_guest_metrics (lookup state) + (list state) ~__context ~domid ~uuid:id + ~pV_drivers_detected:state.pv_drivers_detected + in + debug "xenopsd event: created guest metrics %s for VM %s" + (Ref.string_of new_gm_ref) id + with e -> + error "Caught %s: while creating VM %s guest metrics" + (Printexc.to_string e) id + ) + state.domids + ) + info + in + let check_guest_agent () = + Option.iter + (fun state -> + Option.iter + (fun oldstate -> + let old_ga = oldstate.Vm.guest_agent in + let new_ga = state.Vm.guest_agent in + (* Remove memory keys *) + let ignored_keys = + ["data/meminfo_free"; "data/updated"; "data/update_cnt"] + in + let remove_ignored ga = + List.fold_left + (fun acc k -> List.filter (fun x -> fst x <> k) acc) + ga ignored_keys + in + let old_ga = remove_ignored old_ga in + let new_ga = remove_ignored new_ga in + if new_ga <> old_ga then ( + debug + "Will update VM.allowed_operations because guest_agent has \ + changed." ; + should_update_allowed_operations := true + ) else + debug + "Supressing VM.allowed_operations update because guest_agent \ + data is largely the same" + ) + previous ; + List.iter + (fun domid -> + try + debug "xenopsd event: Updating VM %s domid %d guest_agent" id + domid ; + Xapi_guest_agent.all (lookup state) (list state) ~__context ~domid + ~uuid:id ~pV_drivers_detected:state.pv_drivers_detected + with e -> + error "Caught %s: while updating VM %s guest_agent" + (Printexc.to_string e) id + ) + state.domids + ) + info + in + (* Notes on error handling: if something fails we log and continue, to + maximise the amount of state which is correctly synced. If something + does fail then we may end up permanently out-of-sync until either a + process restart or an event is generated. We may wish to periodically + inject artificial events IF there has been an event sync failure? *) + let power_state = + xenapi_of_xenops_power_state (Option.map (fun x -> x.Vm.power_state) info) + in + let power_state_before_update = Db.VM.get_power_state ~__context ~self in + (* We preserve the current_domain_type of suspended VMs like we preserve + the currently_attached fields for VBDs/VIFs etc - it's important to know + whether suspended VMs are going to resume into PV or PVinPVH for example. + We do this before updating the power_state to maintain the invariant that + any VM that's not `Halted cannot have an unspecified current_domain_type *) + different + (fun x -> x.Vm.domain_type) + (( && ) (power_state <> `Suspended)) + (fun domain_type -> + let metrics = Db.VM.get_metrics ~__context ~self in + let update domain_type = + debug "xenopsd event: Updating VM %s current_domain_type <- %s" id + (Record_util.domain_type_to_string domain_type) ; + Db.VM_metrics.set_current_domain_type ~__context ~self:metrics + ~value:domain_type + in + match domain_type with + | Vm.Domain_HVM -> + update `hvm + | Domain_PV -> + update `pv + | Domain_PVinPVH -> + update `pv_in_pvh + | Domain_PVH -> + update `pvh + | Domain_undefined -> + if power_state <> `Halted then + debug + "xenopsd returned an undefined domain type for non-halted VM \ + %s;assuming this is transient, so not updating \ + current_domain_type" + id + else + update `unspecified + ) ; + different + (fun x -> x.Vm.power_state) + Fun.id + (fun _ -> + try + debug + "Will update VM.allowed_operations because power_state has changed." ; + should_update_allowed_operations := true ; + (* Update ha_always_run before the power_state (if needed), to avoid racing + with the HA monitor thread. *) + let pool = Helpers.get_pool ~__context in + if + power_state = `Halted + && not + (Db.Pool.get_ha_reboot_vm_on_internal_shutdown ~__context + ~self:pool + ) + then ( + Db.VM.set_ha_always_run ~__context ~self ~value:false ; + debug "Setting ha_always_run on vm=%s as false after shutdown" + (Ref.string_of self) + ) ; + debug "xenopsd event: Updating VM %s power_state <- %s" id + (Record_util.vm_power_state_to_string power_state) ; + + (* NOTE: Pull xenopsd metadata as soon as possible so that + nothing comes inbetween the power state change and the + Xenopsd_metadata.pull and overwrites it. *) + ( if power_state = `Suspended then + let md = Xenopsd_metadata.pull ~__context id in + match md.Metadata.domains with + | None -> + error "Suspended VM has no domain-specific metadata" + | Some x -> + Db.VM.set_last_booted_record ~__context ~self ~value:x ; + debug "VM %s last_booted_record set to %s" (Ref.string_of self) + x + ) ; + + (* This will mark VBDs, VIFs as detached and clear resident_on + if the VM has permanently shutdown. current-operations + should not be reset as there maybe a checkpoint is ongoing*) + Xapi_vm_lifecycle.force_state_reset_keep_current_operations ~__context + ~self ~value:power_state ; + if power_state = `Running then + create_guest_metrics_if_needed () ; + if power_state = `Suspended || power_state = `Halted then ( + Xapi_network.detach_for_vm ~__context ~host:localhost ~vm:self ; + Storage_access.reset ~__context ~vm:self + ) ; + if power_state = `Halted then ( + Xenopsd_metadata.delete ~__context id ; + !trigger_xenapi_reregister () + ) + with e -> + error "Caught %s: while updating VM %s power_state" + (Printexc.to_string e) id + ) ; + different + (fun x -> x.Vm.domids) + Fun.id + (fun _ -> + try + debug "Will update VM.allowed_operations because domid has changed." ; + should_update_allowed_operations := true ; + debug "xenopsd event: Updating VM %s domid" id ; + Option.iter + (fun state -> + match state.Vm.domids with + | value :: _ -> + Db.VM.set_domid ~__context ~self ~value:(Int64.of_int value) + | [] -> + () + (* happens when the VM is shutdown *) + ) + info ; + (* If this is a storage domain, attempt to plug the PBD *) + Option.iter + (fun pbd -> + let (_ : Thread.t) = + Thread.create + (fun () -> + (* Don't block the database update thread *) + Xapi_pbd.plug ~__context ~self:pbd ) - state.Vm.guest_agent - |> Listext.setify + () in - results - in - let create_guest_metrics_if_needed () = - let gm = Db.VM.get_guest_metrics ~__context ~self in - if gm = Ref.null then - Option.iter - (fun (_, state) -> - List.iter - (fun domid -> - try - let new_gm_ref = - Xapi_guest_agent.create_and_set_guest_metrics - (lookup state) (list state) ~__context ~domid - ~uuid:id - ~pV_drivers_detected:state.pv_drivers_detected - in - debug - "xenopsd event: created guest metrics %s for VM %s" - (Ref.string_of new_gm_ref) id - with e -> - error "Caught %s: while creating VM %s guest metrics" - (Printexc.to_string e) id - ) - state.domids + () + ) + (System_domains.pbd_of_vm ~__context ~vm:self) + with e -> + error "Caught %s: while updating VM %s domids" (Printexc.to_string e) id + ) ; + (* consoles *) + different + (fun x -> x.Vm.consoles) + Fun.id + (fun consoles -> + try + debug "xenopsd event: Updating VM %s consoles" id ; + let address = Db.Host.get_address ~__context ~self:localhost in + let uri = + Uri.( + make ~scheme:"https" ~host:address ~path:Constants.console_uri () + |> to_string + ) + in + let get_uri_from_location loc = + try + let n = String.index loc '?' in + String.sub loc 0 n + with Not_found -> loc + in + let current_protocols = + List.map + (fun self -> + ( ( Db.Console.get_protocol ~__context ~self + |> to_xenops_console_protocol + , Db.Console.get_location ~__context ~self + |> get_uri_from_location ) - info - in - let check_guest_agent () = - Option.iter - (fun (_, state) -> - Option.iter - (fun oldstate -> - let old_ga = oldstate.Vm.guest_agent in - let new_ga = state.Vm.guest_agent in - (* Remove memory keys *) - let ignored_keys = - ["data/meminfo_free"; "data/updated"; "data/update_cnt"] - in - let remove_ignored ga = - List.fold_left - (fun acc k -> List.filter (fun x -> fst x <> k) acc) - ga ignored_keys - in - let old_ga = remove_ignored old_ga in - let new_ga = remove_ignored new_ga in - if new_ga <> old_ga then ( - debug - "Will update VM.allowed_operations because guest_agent \ - has changed." ; - should_update_allowed_operations := true - ) else - debug - "Supressing VM.allowed_operations update because \ - guest_agent data is largely the same" - ) - previous ; - List.iter - (fun domid -> - try - debug "xenopsd event: Updating VM %s domid %d guest_agent" - id domid ; - Xapi_guest_agent.all (lookup state) (list state) - ~__context ~domid ~uuid:id - ~pV_drivers_detected:state.pv_drivers_detected - with e -> - error "Caught %s: while updating VM %s guest_agent" - (Printexc.to_string e) id - ) - state.domids + , self ) - info - in - (* Notes on error handling: if something fails we log and continue, to - maximise the amount of state which is correctly synced. If something - does fail then we may end up permanently out-of-sync until either a - process restart or an event is generated. We may wish to periodically - inject artificial events IF there has been an event sync failure? *) - let power_state = - xenapi_of_xenops_power_state - (Option.map (fun x -> (snd x).Vm.power_state) info) - in - let power_state_before_update = - Db.VM.get_power_state ~__context ~self - in - (* We preserve the current_domain_type of suspended VMs like we preserve - the currently_attached fields for VBDs/VIFs etc - it's important to know - whether suspended VMs are going to resume into PV or PVinPVH for example. - We do this before updating the power_state to maintain the invariant that - any VM that's not `Halted cannot have an unspecified current_domain_type *) - if different (fun x -> x.domain_type) && power_state <> `Suspended - then - Option.iter - (fun (_, state) -> - let metrics = Db.VM.get_metrics ~__context ~self in - let update domain_type = - debug - "xenopsd event: Updating VM %s current_domain_type <- %s" id - (Record_util.domain_type_to_string domain_type) ; - Db.VM_metrics.set_current_domain_type ~__context ~self:metrics - ~value:domain_type - in - match state.Vm.domain_type with - | Domain_HVM -> - update `hvm - | Domain_PV -> - update `pv - | Domain_PVinPVH -> - update `pv_in_pvh - | Domain_PVH -> - update `pvh - | Domain_undefined -> - if power_state <> `Halted then - debug - "xenopsd returned an undefined domain type for \ - non-halted VM %s;assuming this is transient, so not \ - updating current_domain_type" - id - else - update `unspecified - ) - info ; - ( if different (fun x -> x.power_state) then - try - debug - "Will update VM.allowed_operations because power_state has \ - changed." ; - should_update_allowed_operations := true ; - debug "xenopsd event: Updating VM %s power_state <- %s" id - (Record_util.vm_power_state_to_string power_state) ; - (* This will mark VBDs, VIFs as detached and clear resident_on - if the VM has permanently shutdown. current-operations - should not be reset as there maybe a checkpoint is ongoing*) - Xapi_vm_lifecycle.force_state_reset_keep_current_operations - ~__context ~self ~value:power_state ; - if power_state = `Running then create_guest_metrics_if_needed () ; - if power_state = `Suspended || power_state = `Halted then ( - Xapi_network.detach_for_vm ~__context ~host:localhost ~vm:self ; - Storage_access.reset ~__context ~vm:self - ) ; - if power_state = `Halted then - Xenopsd_metadata.delete ~__context id ; - ( if power_state = `Suspended then - let md = Xenopsd_metadata.pull ~__context id in - match md.Metadata.domains with - | None -> - error "Suspended VM has no domain-specific metadata" - | Some x -> - Db.VM.set_last_booted_record ~__context ~self ~value:x ; - debug "VM %s last_booted_record set to %s" - (Ref.string_of self) x - ) ; - if power_state = `Halted then - !trigger_xenapi_reregister () - with e -> - error "Caught %s: while updating VM %s power_state" - (Printexc.to_string e) id - ) ; - ( if different (fun x -> x.domids) then - try - debug - "Will update VM.allowed_operations because domid has changed." ; - should_update_allowed_operations := true ; - debug "xenopsd event: Updating VM %s domid" id ; - Option.iter - (fun (_, state) -> - match state.Vm.domids with - | value :: _ -> - Db.VM.set_domid ~__context ~self - ~value:(Int64.of_int value) - | [] -> - () - (* happens when the VM is shutdown *) - ) - info ; - (* If this is a storage domain, attempt to plug the PBD *) - Option.iter - (fun pbd -> - let (_ : Thread.t) = - Thread.create - (fun () -> - (* Don't block the database update thread *) - Xapi_pbd.plug ~__context ~self:pbd - ) - () - in - () - ) - (System_domains.pbd_of_vm ~__context ~vm:self) - with e -> - error "Caught %s: while updating VM %s domids" - (Printexc.to_string e) id - ) ; - (* consoles *) - ( if different (fun x -> x.consoles) then - try - debug "xenopsd event: Updating VM %s consoles" id ; - Option.iter - (fun (_, state) -> - let localhost = Helpers.get_localhost ~__context in - let address = - Db.Host.get_address ~__context ~self:localhost - in - let uri = - Uri.( - make ~scheme:"https" ~host:address - ~path:Constants.console_uri () - |> to_string - ) - in - let get_uri_from_location loc = - try - let n = String.index loc '?' in - String.sub loc 0 n - with Not_found -> loc - in - let current_protocols = - List.map - (fun self -> - ( ( Db.Console.get_protocol ~__context ~self - |> to_xenops_console_protocol - , Db.Console.get_location ~__context ~self - |> get_uri_from_location - ) - , self - ) - ) - (Db.VM.get_consoles ~__context ~self) - in - let new_protocols = - List.map - (fun c -> ((c.Vm.protocol, uri), c)) - state.Vm.consoles - in - (* Destroy consoles that have gone away *) - List.iter - (fun protocol -> - let self = List.assoc protocol current_protocols in - Db.Console.destroy ~__context ~self - ) - (Listext.set_difference - (List.map fst current_protocols) - (List.map fst new_protocols) - ) ; - (* Create consoles that have appeared *) - List.iter - (fun (protocol, _) -> - let ref = Ref.make () in - let uuid = Uuidx.to_string (Uuidx.make ()) in - let location = Printf.sprintf "%s?uuid=%s" uri uuid in - let port = - try - Int64.of_int - (List.find - (fun c -> c.Vm.protocol = protocol) - state.Vm.consoles - ) - .port - with Not_found -> -1L - in - Db.Console.create ~__context ~ref ~uuid - ~protocol:(to_xenapi_console_protocol protocol) - ~location ~vM:self ~other_config:[] ~port - ) - (Listext.set_difference - (List.map fst new_protocols) - (List.map fst current_protocols) - ) - ) - info - with e -> - error "Caught %s: while updating VM %s consoles" - (Printexc.to_string e) id - ) ; - ( if different (fun x -> x.memory_target) then - try - Option.iter - (fun (_, state) -> - debug "xenopsd event: Updating VM %s memory_target <- %Ld" - id state.Vm.memory_target ; - Db.VM.set_memory_target ~__context ~self - ~value:state.memory_target - ) - info - with e -> - error "Caught %s: while updating VM %s consoles" - (Printexc.to_string e) id + ) + (Db.VM.get_consoles ~__context ~self) + in + let new_protocols = + List.map (fun c -> ((c.Vm.protocol, uri), c)) consoles + in + (* Destroy consoles that have gone away *) + List.iter + (fun protocol -> + let self = List.assoc protocol current_protocols in + Db.Console.destroy ~__context ~self + ) + (Listext.set_difference + (List.map fst current_protocols) + (List.map fst new_protocols) ) ; - ( if different (fun x -> x.rtc_timeoffset) then + (* Create consoles that have appeared *) + List.iter + (fun (protocol, _) -> + let ref = Ref.make () in + let uuid = Uuidx.to_string (Uuidx.make ()) in + let location = Printf.sprintf "%s?uuid=%s" uri uuid in + let port = try - Option.iter - (fun (_, state) -> - if state.Vm.rtc_timeoffset <> "" then ( - debug - "xenopsd event: Updating VM %s platform:timeoffset <- \ - %s" - id state.rtc_timeoffset ; - ( try - Db.VM.remove_from_platform ~__context ~self - ~key:Vm_platform.timeoffset - with _ -> () - ) ; - Db.VM.add_to_platform ~__context ~self - ~key:Vm_platform.timeoffset ~value:state.rtc_timeoffset - ) - ) - info - with e -> - error "Caught %s: while updating VM %s rtc/timeoffset" - (Printexc.to_string e) id + Int64.of_int + (List.find (fun c -> c.Vm.protocol = protocol) consoles).port + with Not_found -> -1L + in + Db.Console.create ~__context ~ref ~uuid + ~protocol:(to_xenapi_console_protocol protocol) + ~location ~vM:self ~other_config:[] ~port + ) + (Listext.set_difference + (List.map fst new_protocols) + (List.map fst current_protocols) + ) + with e -> + error "Caught %s: while updating VM %s consoles" (Printexc.to_string e) + id + ) ; + different + (fun x -> x.Vm.memory_target) + Fun.id + (fun memory_target -> + try + debug "xenopsd event: Updating VM %s memory_target <- %Ld" id + memory_target ; + Db.VM.set_memory_target ~__context ~self ~value:memory_target + with e -> + error "Caught %s: while updating VM %s consoles" (Printexc.to_string e) + id + ) ; + different + (fun x -> x.rtc_timeoffset) + Fun.id + (fun rtc_timeoffset -> + try + if rtc_timeoffset <> "" then ( + debug "xenopsd event: Updating VM %s platform:timeoffset <- %s" id + rtc_timeoffset ; + ( try + Db.VM.remove_from_platform ~__context ~self + ~key:Vm_platform.timeoffset + with _ -> () ) ; - if different (fun x -> x.hvm) then - Option.iter - (fun (_, state) -> - let metrics = Db.VM.get_metrics ~__context ~self in - debug "xenopsd event: Updating VM %s hvm <- %s" id - (string_of_bool state.Vm.hvm) ; - Db.VM_metrics.set_hvm ~__context ~self:metrics - ~value:state.Vm.hvm - ) - info ; - if different (fun x -> x.nomigrate) then - Option.iter - (fun (_, state) -> - let metrics = Db.VM.get_metrics ~__context ~self in - debug "xenopsd event: Updating VM %s nomigrate <- %s" id - (string_of_bool state.Vm.nomigrate) ; - Db.VM_metrics.set_nomigrate ~__context ~self:metrics - ~value:state.Vm.nomigrate - ) - info ; - if different (fun x -> x.nested_virt) then - Option.iter - (fun (_, state) -> - let metrics = Db.VM.get_metrics ~__context ~self in - debug "xenopsd event: Updating VM %s nested_virt <- %s" id - (string_of_bool state.Vm.nested_virt) ; - Db.VM_metrics.set_nested_virt ~__context ~self:metrics - ~value:state.Vm.nested_virt - ) - info ; - let update_pv_drivers_detected () = - Option.iter - (fun (_, state) -> - try - let gm = Db.VM.get_guest_metrics ~__context ~self in - debug "xenopsd event: Updating VM %s PV drivers detected %b" - id state.Vm.pv_drivers_detected ; - Db.VM_guest_metrics.set_PV_drivers_detected ~__context - ~self:gm ~value:state.Vm.pv_drivers_detected ; - Db.VM_guest_metrics.set_PV_drivers_up_to_date ~__context - ~self:gm ~value:state.Vm.pv_drivers_detected - with e -> - debug "Caught %s: while updating VM %s PV drivers" - (Printexc.to_string e) id - ) - info - in - (* Chack last_start_time before updating anything in the guest metrics *) - ( if different (fun x -> x.last_start_time) then + Db.VM.add_to_platform ~__context ~self ~key:Vm_platform.timeoffset + ~value:rtc_timeoffset + ) + with e -> + error "Caught %s: while updating VM %s rtc/timeoffset" + (Printexc.to_string e) id + ) ; + different + (fun x -> x.hvm) + Fun.id + (fun hvm -> + let metrics = Db.VM.get_metrics ~__context ~self in + debug "xenopsd event: Updating VM %s hvm <- %s" id (string_of_bool hvm) ; + Db.VM_metrics.set_hvm ~__context ~self:metrics ~value:hvm + ) ; + different + (fun x -> x.nomigrate) + Fun.id + (fun nomigrate -> + let metrics = Db.VM.get_metrics ~__context ~self in + debug "xenopsd event: Updating VM %s nomigrate <- %s" id + (string_of_bool nomigrate) ; + Db.VM_metrics.set_nomigrate ~__context ~self:metrics ~value:nomigrate + ) ; + different + (fun x -> x.nested_virt) + Fun.id + (fun nested_virt -> + let metrics = Db.VM.get_metrics ~__context ~self in + debug "xenopsd event: Updating VM %s nested_virt <- %s" id + (string_of_bool nested_virt) ; + Db.VM_metrics.set_nested_virt ~__context ~self:metrics ~value:nested_virt + ) ; + (* Chack last_start_time before updating anything in the guest metrics *) + different + (fun x -> x.last_start_time) + Fun.id + (fun last_start_time -> + try + let metrics = Db.VM.get_metrics ~__context ~self in + (* Clamp time to full seconds, stored timestamps do not + have decimals *) + let start_time = Float.floor last_start_time |> Date.of_unix_time in + let expected_time = + Db.VM_metrics.get_start_time ~__context ~self:metrics + in + if Date.is_later ~than:expected_time start_time then ( + debug "xenopsd event: Updating VM %s last_start_time <- %s" id + Date.(to_rfc3339 (of_unix_time last_start_time)) ; + Db.VM_metrics.set_start_time ~__context ~self:metrics + ~value:start_time ; + if + (* VM start and VM reboot *) + power_state = `Running && power_state_before_update <> `Suspended + then ( + Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self + ~value:`restart_device_model ; + Xapi_vm_lifecycle.remove_pending_guidance ~__context ~self + ~value:`restart_vm + ) + ) ; + create_guest_metrics_if_needed () ; + let gm = Db.VM.get_guest_metrics ~__context ~self in + let update_time = + Db.VM_guest_metrics.get_last_updated ~__context ~self:gm + in + if update_time < start_time then ( + debug + "VM %s guest metrics update time (%s) < VM start time (%s): \ + deleting" + id + (Date.to_rfc3339 update_time) + (Date.to_rfc3339 start_time) ; + Xapi_vm_helpers.delete_guest_metrics ~__context ~self ; + check_guest_agent () + ) + with e -> + error "Caught %s: while updating VM %s last_start_time" + (Printexc.to_string e) id + ) ; + Option.iter + (fun state -> + List.iter + (fun domid -> + (* Guest metrics could have been destroyed during the last_start_time check + by recreating them, we avoid CA-223387 *) + create_guest_metrics_if_needed () ; + different + (fun x -> x.Vm.uncooperative_balloon_driver) + Fun.id + (fun uncooperative_balloon_driver -> + debug + "xenopsd event: VM %s domid %d uncooperative_balloon_driver = \ + %b" + id domid uncooperative_balloon_driver + ) ; + different + (fun x -> x.Vm.guest_agent) + Fun.id + (fun _ -> check_guest_agent ()) ; + different + (fun x -> x.Vm.pv_drivers_detected) + Fun.id + (fun pv_drivers_detected -> try - Option.iter - (fun (_, state) -> - let metrics = Db.VM.get_metrics ~__context ~self in - (* Clamp time to full seconds, stored timestamps do not - have decimals *) - let start_time = - Float.floor state.Vm.last_start_time |> Date.of_unix_time - in - let expected_time = - Db.VM_metrics.get_start_time ~__context ~self:metrics - in - if Date.is_later ~than:expected_time start_time then ( - debug - "xenopsd event: Updating VM %s last_start_time <- %s" id - Date.(to_rfc3339 (of_unix_time state.Vm.last_start_time)) ; - Db.VM_metrics.set_start_time ~__context ~self:metrics - ~value:start_time ; - if - (* VM start and VM reboot *) - power_state = `Running - && power_state_before_update <> `Suspended - then ( - Xapi_vm_lifecycle.remove_pending_guidance ~__context - ~self ~value:`restart_device_model ; - Xapi_vm_lifecycle.remove_pending_guidance ~__context - ~self ~value:`restart_vm - ) - ) ; - create_guest_metrics_if_needed () ; - let gm = Db.VM.get_guest_metrics ~__context ~self in - let update_time = - Db.VM_guest_metrics.get_last_updated ~__context ~self:gm - in - if update_time < start_time then ( - debug - "VM %s guest metrics update time (%s) < VM start time \ - (%s): deleting" - id - (Date.to_rfc3339 update_time) - (Date.to_rfc3339 start_time) ; - Xapi_vm_helpers.delete_guest_metrics ~__context ~self ; - check_guest_agent () - ) - ) - info + let gm = Db.VM.get_guest_metrics ~__context ~self in + debug "xenopsd event: Updating VM %s PV drivers detected %b" id + pv_drivers_detected ; + Db.VM_guest_metrics.set_PV_drivers_detected ~__context ~self:gm + ~value:pv_drivers_detected ; + Db.VM_guest_metrics.set_PV_drivers_up_to_date ~__context + ~self:gm ~value:pv_drivers_detected with e -> - error "Caught %s: while updating VM %s last_start_time" + debug "Caught %s: while updating VM %s PV drivers" (Printexc.to_string e) id - ) ; - Option.iter - (fun (_, state) -> - List.iter - (fun domid -> - (* Guest metrics could have been destroyed during the last_start_time check - by recreating them, we avoid CA-223387 *) - create_guest_metrics_if_needed () ; - if different (fun x -> x.Vm.uncooperative_balloon_driver) then - debug - "xenopsd event: VM %s domid %d \ - uncooperative_balloon_driver = %b" - id domid state.Vm.uncooperative_balloon_driver ; - if different (fun x -> x.Vm.guest_agent) then - check_guest_agent () ; - if different (fun x -> x.Vm.pv_drivers_detected) then - update_pv_drivers_detected () ; - ( if different (fun x -> x.Vm.xsdata_state) then - try - debug "xenopsd event: Updating VM %s domid %d xsdata" id - domid ; - Db.VM.set_xenstore_data ~__context ~self - ~value:state.Vm.xsdata_state - with e -> - error "Caught %s: while updating VM %s xsdata" - (Printexc.to_string e) id - ) ; - if different (fun x -> x.Vm.memory_target) then - try - debug - "xenopsd event: Updating VM %s domid %d memory target" - id domid ; - Rrdd.update_vm_memory_target domid state.Vm.memory_target - with e -> - error "Caught %s: while updating VM %s memory_target" - (Printexc.to_string e) id - ) - state.Vm.domids - ) - info ; - if different (fun x -> x.Vm.vcpu_target) then - Option.iter - (fun (_, state) -> - try - debug "xenopsd event: Updating VM %s vcpu_target <- %d" id - state.Vm.vcpu_target ; - let metrics = Db.VM.get_metrics ~__context ~self in - Db.VM_metrics.set_VCPUs_number ~__context ~self:metrics - ~value:(Int64.of_int state.Vm.vcpu_target) - with e -> - error "Caught %s: while updating VM %s VCPUs_number" - (Printexc.to_string e) id - ) - info ; - ( if different (fun x -> x.shadow_multiplier_target) then + ) ; + different + (fun x -> x.Vm.xsdata_state) + Fun.id + (fun xsdata_state -> try - Option.iter - (fun (_, state) -> - debug - "xenopsd event: Updating VM %s shadow_multiplier <- %.2f" - id state.Vm.shadow_multiplier_target ; - if - state.Vm.power_state <> Halted - && state.Vm.shadow_multiplier_target >= 0.0 - then - Db.VM.set_HVM_shadow_multiplier ~__context ~self - ~value:state.Vm.shadow_multiplier_target - ) - info + debug "xenopsd event: Updating VM %s domid %d xsdata" id domid ; + Db.VM.set_xenstore_data ~__context ~self ~value:xsdata_state with e -> - error "Caught %s: while updating VM %s HVM_shadow_multiplier" + error "Caught %s: while updating VM %s xsdata" (Printexc.to_string e) id - ) ; - (* Preserve last_boot_CPU_flags when suspending (see current_domain_type) *) - if different (fun x -> x.Vm.featureset) && power_state <> `Suspended - then - Option.iter - (fun (_, state) -> - try - debug - "xenopsd event: Updating VM %s last_boot_CPU_flags <- %s" id - state.Vm.featureset ; - let vendor = - Db.Host.get_cpu_info ~__context ~self:localhost - |> List.assoc Xapi_globs.cpu_info_vendor_key - in - let value = - [ - (Xapi_globs.cpu_info_vendor_key, vendor) - ; (Xapi_globs.cpu_info_features_key, state.Vm.featureset) - ] - in - Db.VM.set_last_boot_CPU_flags ~__context ~self ~value - with e -> - error "Caught %s: while updating VM %s last_boot_CPU_flags" - (Printexc.to_string e) id - ) - info ; - Xenops_cache.update_vm id (Option.map snd info) ; - if !should_update_allowed_operations then - Helpers.call_api_functions ~__context (fun rpc session_id -> - XenAPI.VM.update_allowed_operations ~rpc ~session_id ~self ) ) + state.Vm.domids + ) + info ; + different + (fun x -> x.Vm.vcpu_target) + Fun.id + (fun vcpu_target -> + try + debug "xenopsd event: Updating VM %s vcpu_target <- %d" id vcpu_target ; + let metrics = Db.VM.get_metrics ~__context ~self in + Db.VM_metrics.set_VCPUs_number ~__context ~self:metrics + ~value:(Int64.of_int vcpu_target) + with e -> + error "Caught %s: while updating VM %s VCPUs_number" + (Printexc.to_string e) id + ) ; + different + (fun x -> x.shadow_multiplier_target) + Fun.id + (fun shadow_multiplier_target -> + try + debug "xenopsd event: Updating VM %s shadow_multiplier <- %.2f" id + shadow_multiplier_target ; + if power_state <> `Halted && shadow_multiplier_target >= 0.0 then + Db.VM.set_HVM_shadow_multiplier ~__context ~self + ~value:shadow_multiplier_target + with e -> + error "Caught %s: while updating VM %s HVM_shadow_multiplier" + (Printexc.to_string e) id + ) ; + (* Preserve last_boot_CPU_flags when suspending (see current_domain_type) *) + different + (fun x -> x.Vm.featureset) + (( && ) (power_state <> `Suspended)) + (fun featureset -> + try + debug "xenopsd event: Updating VM %s last_boot_CPU_flags <- %s" id + featureset ; + let vendor = + Db.Host.get_cpu_info ~__context ~self:localhost + |> List.assoc Constants.cpu_info_vendor_key + in + let value = + [ + (Constants.cpu_info_vendor_key, vendor) + ; (Constants.cpu_info_features_key, featureset) + ] + in + Db.VM.set_last_boot_CPU_flags ~__context ~self ~value + with e -> + error "Caught %s: while updating VM %s last_boot_CPU_flags" + (Printexc.to_string e) id + ) ; + Xenops_cache.update_vm id info ; + if !should_update_allowed_operations then + Helpers.call_api_functions ~__context (fun rpc session_id -> + XenAPI.VM.update_allowed_operations ~rpc ~session_id ~self + ) + +let update_vm ~__context id = + let@ __context = + Context.with_tracing + ~attributes:[("xapi.event.on.vm", id)] + ~__context __FUNCTION__ + in + try + if Events_from_xenopsd.are_suppressed id then + debug "xenopsd event: ignoring event for VM (VM %s migrating away)" id + else + let self = Db.VM.get_by_uuid ~__context ~uuid:id in + let localhost = Helpers.get_localhost ~__context in + if Db.VM.get_resident_on ~__context ~self = localhost then + let previous = Xenops_cache.find_vm id in + let dbg = Context.string_of_task_and_tracing __context in + let module Client = + (val make_client (queue_of_vm ~__context ~self) : XENOPS) + in + let info = try Some (snd (Client.VM.stat dbg id)) with _ -> None in + if info <> previous then + update_vm_internal ~__context ~id ~self ~previous ~info ~localhost with e -> error "xenopsd event: Caught %s while updating VM: has this VM been removed \ @@ -2425,6 +2508,11 @@ let update_vm ~__context id = (string_of_exn e) let update_vbd ~__context (id : string * string) = + let@ __context = + Context.with_tracing + ~attributes:[("xapi.event.on.vm", fst id); ("xapi.event.on.vbd", snd id)] + ~__context __FUNCTION__ + in try if Events_from_xenopsd.are_suppressed (fst id) then debug "xenopsd event: ignoring event for VBD (VM %s migrating away)" @@ -2438,8 +2526,8 @@ let update_vbd ~__context (id : string * string) = let module Client = (val make_client (queue_of_vm ~__context ~self:vm) : XENOPS) in - let info = try Some (Client.VBD.stat dbg id) with _ -> None in - if Option.map snd info <> previous then ( + let info = try Some (snd (Client.VBD.stat dbg id)) with _ -> None in + if info <> previous then ( let vbds = Db.VM.get_VBDs ~__context ~self:vm in let vbdrs = List.map @@ -2474,7 +2562,7 @@ let update_vbd ~__context (id : string * string) = debug "VBD %s.%s matched device %s" (fst id) (snd id) vbd_r.API.vBD_userdevice ; Option.iter - (fun (_, state) -> + (fun state -> let currently_attached = state.Vbd.plugged || state.Vbd.active in debug "xenopsd event: Updating VBD %s.%s device <- %s; \ @@ -2517,7 +2605,7 @@ let update_vbd ~__context (id : string * string) = ) ) info ; - Xenops_cache.update_vbd id (Option.map snd info) ; + Xenops_cache.update_vbd id info ; Xapi_vbd_helpers.update_allowed_operations ~__context ~self:vbd ; if not (Db.VBD.get_empty ~__context ~self:vbd) then let vdi = Db.VBD.get_VDI ~__context ~self:vbd in @@ -2527,6 +2615,11 @@ let update_vbd ~__context (id : string * string) = error "xenopsd event: Caught %s while updating VBD" (string_of_exn e) let update_vif ~__context id = + let@ __context = + Context.with_tracing + ~attributes:[("xapi.event.on.vm", fst id); ("xapi.event.on.vif", snd id)] + ~__context __FUNCTION__ + in try if Events_from_xenopsd.are_suppressed (fst id) then debug "xenopsd event: ignoring event for VIF (VM %s migrating away)" @@ -2540,8 +2633,8 @@ let update_vif ~__context id = let module Client = (val make_client (queue_of_vm ~__context ~self:vm) : XENOPS) in - let info = try Some (Client.VIF.stat dbg id) with _ -> None in - if Option.map snd info <> previous then ( + let info = try Some (snd (Client.VIF.stat dbg id)) with _ -> None in + if info <> previous then ( let vifs = Db.VM.get_VIFs ~__context ~self:vm in let vifrs = List.map @@ -2552,7 +2645,7 @@ let update_vif ~__context id = List.find (fun (_, vifr) -> vifr.API.vIF_device = snd id) vifrs in Option.iter - (fun (_, state) -> + (fun state -> if not (state.Vif.plugged || state.Vif.active) then ( ( try Xapi_network.deregister_vif ~__context vif with e -> @@ -2628,13 +2721,18 @@ let update_vif ~__context id = ~value:(state.plugged || state.active) ) info ; - Xenops_cache.update_vif id (Option.map snd info) ; + Xenops_cache.update_vif id info ; Xapi_vif_helpers.update_allowed_operations ~__context ~self:vif ) with e -> error "xenopsd event: Caught %s while updating VIF" (string_of_exn e) let update_pci ~__context id = + let@ __context = + Context.with_tracing + ~attributes:[("xapi.event.on.vm", fst id); ("xapi.event.on.pci", snd id)] + ~__context __FUNCTION__ + in try if Events_from_xenopsd.are_suppressed (fst id) then debug "xenopsd event: ignoring event for PCI (VM %s migrating away)" @@ -2648,8 +2746,8 @@ let update_pci ~__context id = let module Client = (val make_client (queue_of_vm ~__context ~self:vm) : XENOPS) in - let info = try Some (Client.PCI.stat dbg id) with _ -> None in - if Option.map snd info <> previous then ( + let info = try Some (snd (Client.PCI.stat dbg id)) with _ -> None in + if info <> previous then ( let pcis = Db.Host.get_PCIs ~__context ~self:localhost in let pcirs = List.map @@ -2666,7 +2764,7 @@ let update_pci ~__context id = List.mem vm (Db.PCI.get_attached_VMs ~__context ~self:pci) in Option.iter - (fun (_, state) -> + (fun state -> debug "xenopsd event: Updating PCI %s.%s currently_attached <- %b" (fst id) (snd id) state.Pci.plugged ; if attached_in_db && not state.Pci.plugged then @@ -2697,12 +2795,17 @@ let update_pci ~__context id = vgpu_opt ) info ; - Xenops_cache.update_pci id (Option.map snd info) + Xenops_cache.update_pci id info ) with e -> error "xenopsd event: Caught %s while updating PCI" (string_of_exn e) let update_vgpu ~__context id = + let@ __context = + Context.with_tracing + ~attributes:[("xapi.event.on.vm", fst id); ("xapi.event.on.vgpu", snd id)] + ~__context __FUNCTION__ + in try if Events_from_xenopsd.are_suppressed (fst id) then debug "xenopsd event: ignoring event for VGPU (VM %s migrating away)" @@ -2716,8 +2819,8 @@ let update_vgpu ~__context id = let module Client = (val make_client (queue_of_vm ~__context ~self:vm) : XENOPS) in - let info = try Some (Client.VGPU.stat dbg id) with _ -> None in - if Option.map snd info <> previous then ( + let info = try Some (snd (Client.VGPU.stat dbg id)) with _ -> None in + if info <> previous then ( let vgpus = Db.VM.get_VGPUs ~__context ~self:vm in let vgpu_records = List.map @@ -2738,7 +2841,7 @@ let update_vgpu ~__context id = = None then Option.iter - (fun (_, state) -> + (fun state -> ( if state.Vgpu.plugged then let scheduled = Db.VGPU.get_scheduled_to_be_resident_on ~__context @@ -2761,12 +2864,17 @@ let update_vgpu ~__context id = ) ) info ; - Xenops_cache.update_vgpu id (Option.map snd info) + Xenops_cache.update_vgpu id info ) with e -> error "xenopsd event: Caught %s while updating VGPU" (string_of_exn e) let update_vusb ~__context (id : string * string) = + let@ __context = + Context.with_tracing + ~attributes:[("xapi.event.on.vm", fst id); ("xapi.event.on.vusb", snd id)] + ~__context __FUNCTION__ + in try if Events_from_xenopsd.are_suppressed (fst id) then debug "xenopsd event: ignoring event for VUSB (VM %s migrating away)" @@ -2780,8 +2888,8 @@ let update_vusb ~__context (id : string * string) = let module Client = (val make_client (queue_of_vm ~__context ~self:vm) : XENOPS) in - let info = try Some (Client.VUSB.stat dbg id) with _ -> None in - if Option.map snd info <> previous then ( + let info = try Some (snd (Client.VUSB.stat dbg id)) with _ -> None in + if info <> previous then ( let pusb, _ = Db.VM.get_VUSBs ~__context ~self:vm |> List.map (fun self -> Db.VUSB.get_USB_group ~__context ~self) @@ -2796,7 +2904,7 @@ let update_vusb ~__context (id : string * string) = let usb_group = Db.PUSB.get_USB_group ~__context ~self:pusb in let vusb = Helpers.get_first_vusb ~__context usb_group in Option.iter - (fun (_, state) -> + (fun state -> debug "xenopsd event: Updating USB %s.%s; plugged <- %b" (fst id) (snd id) state.Vusb.plugged ; let currently_attached = state.Vusb.plugged in @@ -2804,7 +2912,7 @@ let update_vusb ~__context (id : string * string) = ~value:currently_attached ) info ; - Xenops_cache.update_vusb id (Option.map snd info) ; + Xenops_cache.update_vusb id info ; Xapi_vusb_helpers.update_allowed_operations ~__context ~self:vusb ) with e -> @@ -2822,14 +2930,21 @@ let unwrap x = raise Not_a_xenops_task let register_task __context ?cancellable queue_name id = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in TaskHelper.register_task __context ?cancellable (wrap queue_name id) ; id let unregister_task __context queue_name id = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in TaskHelper.unregister_task __context (wrap queue_name id) ; id let update_task ~__context queue_name id = + let@ __context = + Context.with_tracing + ~attributes:[("xapi.event.on.task", id)] + ~__context __FUNCTION__ + in try let self = TaskHelper.id_to_task_exn (TaskHelper.Xenops (queue_name, id)) in (* throws Not_found *) @@ -2863,59 +2978,65 @@ let update_task ~__context queue_name id = error "xenopsd event: Caught %s while updating task" (string_of_exn e) let rec events_watch ~__context cancel queue_name from = - let dbg = Context.string_of_task_and_tracing __context in - if Xapi_fist.delay_xenopsd_event_threads () then Thread.delay 30.0 ; - let module Client = (val make_client queue_name : XENOPS) in - let barriers, events, next = Client.UPDATES.get dbg from None in - if !cancel then - raise (Api_errors.Server_error (Api_errors.task_cancelled, [])) ; - let done_events = ref [] in - let already_done x = List.mem x !done_events in - let add_event x = done_events := x :: !done_events in - let do_updates l = - let open Dynamic in - List.iter - (fun ev -> - debug "Processing event: %s" - (ev |> Dynamic.rpc_of_id |> Jsonrpc.to_string) ; - if already_done ev then - debug "Skipping (already processed this round)" - else ( - add_event ev ; - match ev with - | Vm id -> - debug "xenops event on VM %s" id ; - update_vm ~__context id - | Vbd id -> - debug "xenops event on VBD %s.%s" (fst id) (snd id) ; - update_vbd ~__context id - | Vif id -> - debug "xenops event on VIF %s.%s" (fst id) (snd id) ; - update_vif ~__context id - | Pci id -> - debug "xenops event on PCI %s.%s" (fst id) (snd id) ; - update_pci ~__context id - | Vgpu id -> - debug "xenops event on VGPU %s.%s" (fst id) (snd id) ; - update_vgpu ~__context id - | Vusb id -> - debug "xenops event on VUSB %s.%s" (fst id) (snd id) ; - update_vusb ~__context id - | Task id -> - debug "xenops event on Task %s" id ; - update_task ~__context queue_name id - ) - ) - l - in - List.iter - (fun (id, b_events) -> - debug "Processing barrier %d" id ; - do_updates b_events ; - Events_from_xenopsd.wakeup queue_name dbg id + Context.complete_tracing __context ; + let next = + Context.with_tracing ~__context __FUNCTION__ (fun __context -> + let dbg = Context.string_of_task_and_tracing __context in + if Xapi_fist.delay_xenopsd_event_threads () then Thread.delay 30.0 ; + let module Client = (val make_client queue_name : XENOPS) in + let barriers, events, next = Client.UPDATES.get dbg from None in + if !cancel then + raise (Api_errors.Server_error (Api_errors.task_cancelled, [])) ; + let done_events = ref [] in + let already_done x = List.mem x !done_events in + let add_event x = done_events := x :: !done_events in + let do_updates l = + let open Dynamic in + List.iter + (fun ev -> + debug "Processing event: %s" + (ev |> Dynamic.rpc_of_id |> Jsonrpc.to_string) ; + if already_done ev then + debug "Skipping (already processed this round)" + else ( + add_event ev ; + match ev with + | Vm id -> + debug "xenops event on VM %s" id ; + update_vm ~__context id + | Vbd id -> + debug "xenops event on VBD %s.%s" (fst id) (snd id) ; + update_vbd ~__context id + | Vif id -> + debug "xenops event on VIF %s.%s" (fst id) (snd id) ; + update_vif ~__context id + | Pci id -> + debug "xenops event on PCI %s.%s" (fst id) (snd id) ; + update_pci ~__context id + | Vgpu id -> + debug "xenops event on VGPU %s.%s" (fst id) (snd id) ; + update_vgpu ~__context id + | Vusb id -> + debug "xenops event on VUSB %s.%s" (fst id) (snd id) ; + update_vusb ~__context id + | Task id -> + debug "xenops event on Task %s" id ; + update_task ~__context queue_name id + ) + ) + l + in + List.iter + (fun (id, b_events) -> + debug "Processing barrier %d" id ; + do_updates b_events ; + Events_from_xenopsd.wakeup queue_name dbg id + ) + barriers ; + do_updates events ; + next ) - barriers ; - do_updates events ; + in events_watch ~__context cancel queue_name (Some next) let events_from_xenopsd queue_name = @@ -2930,6 +3051,7 @@ let events_from_xenopsd queue_name = ) let refresh_vm ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let id = id_of_vm ~__context ~self in info "xenops: UPDATES.refresh_vm %s" id ; let dbg = Context.string_of_task_and_tracing __context in @@ -2939,6 +3061,7 @@ let refresh_vm ~__context ~self = Events_from_xenopsd.wait queue_name dbg id () let resync_resident_on ~__context = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in let localhost = Helpers.get_localhost ~__context in let domain0 = Helpers.get_domain_zero ~__context in @@ -3081,6 +3204,7 @@ let resync_resident_on ~__context = xapi_vms_not_in_xenopsd let resync_all_vms ~__context = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in (* This should now be correct *) let localhost = Helpers.get_localhost ~__context in let domain0 = Helpers.get_domain_zero ~__context in @@ -3090,7 +3214,15 @@ let resync_all_vms ~__context = in List.iter (fun vm -> refresh_vm ~__context ~self:vm) resident_vms_in_db +(* experimental feature for hard-pinning vcpus *) +let hard_numa_enabled ~__context = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in + let pool = Helpers.get_pool ~__context in + let restrictions = Db.Pool.get_restrictions ~__context ~self:pool in + List.assoc_opt "restrict_hard_numa" restrictions = Some "false" + let set_numa_affinity_policy ~__context ~value = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task __context in let open Xapi_xenops_queue in let module Client = (val make_client (default_xenopsd ()) : XENOPS) in @@ -3098,15 +3230,18 @@ let set_numa_affinity_policy ~__context ~value = let open Xenops_interface.Host in match value with | `any -> - Any + Some Any + | `best_effort when hard_numa_enabled ~__context -> + Some Best_effort_hard | `best_effort -> - Best_effort + Some Best_effort | `default_policy -> - Any + None in Client.HOST.set_numa_affinity_policy dbg value let on_xapi_restart ~__context = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let host = Helpers.get_localhost ~__context in let value = Db.Host.get_numa_affinity_policy ~__context ~self:host in info "Setting NUMA affinity policy in xenopsd on startup to %s" @@ -3130,6 +3265,7 @@ let on_xapi_restart ~__context = apply_guest_agent_config ~__context config let assert_resident_on ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let localhost = Helpers.get_localhost ~__context in if not (Db.VM.get_resident_on ~__context ~self = localhost) then Helpers.internal_error "the VM %s is not resident on this host" @@ -3462,6 +3598,7 @@ let transform_xenops_exn ~__context ~vm queue_name f = should not be any other suppression going on. *) let set_resident_on ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let id = id_of_vm ~__context ~self in debug "VM %s set_resident_on" id ; let localhost = Helpers.get_localhost ~__context in @@ -3476,6 +3613,7 @@ let set_resident_on ~__context ~self = Xenopsd_metadata.update ~__context ~self let update_debug_info __context t = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let task = Context.get_task_id __context in let debug_info = List.map (fun (k, v) -> ("debug_info:" ^ k, v)) t.Task.debug_info @@ -3490,6 +3628,7 @@ let update_debug_info __context t = debug_info let sync_with_task_result __context ?cancellable queue_name x = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in x |> register_task __context ?cancellable queue_name @@ -3501,6 +3640,7 @@ let sync_with_task __context ?cancellable queue_name x = sync_with_task_result __context ?cancellable queue_name x |> ignore let sync __context queue_name x = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in x |> wait_for_task queue_name dbg @@ -3508,6 +3648,7 @@ let sync __context queue_name x = |> ignore let pause ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3521,6 +3662,7 @@ let pause ~__context ~self = ) let unpause ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3533,6 +3675,7 @@ let unpause ~__context ~self = ) let request_rdp ~__context ~self enabled = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3545,6 +3688,7 @@ let request_rdp ~__context ~self enabled = ) let run_script ~__context ~self script = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3561,6 +3705,7 @@ let run_script ~__context ~self script = ) let set_xenstore_data ~__context ~self xsdata = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3572,6 +3717,7 @@ let set_xenstore_data ~__context ~self xsdata = ) let set_vcpus ~__context ~self n = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3599,6 +3745,7 @@ let set_vcpus ~__context ~self n = ) let set_shadow_multiplier ~__context ~self target = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3628,6 +3775,7 @@ let set_shadow_multiplier ~__context ~self target = ) let set_memory_dynamic_range ~__context ~self min max = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3640,6 +3788,7 @@ let set_memory_dynamic_range ~__context ~self min max = ) let maybe_refresh_vm ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in let queue_name = queue_of_vm ~__context ~self in let id = id_of_vm ~__context ~self in @@ -3652,6 +3801,7 @@ let maybe_refresh_vm ~__context ~self = ) let start ~__context ~self paused force = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> @@ -3713,6 +3863,7 @@ let start ~__context ~self paused force = ) let start ~__context ~self paused force = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> try start ~__context ~self paused force @@ -3738,6 +3889,7 @@ let start ~__context ~self paused force = ) let reboot ~__context ~self timeout = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> assert_resident_on ~__context ~self ; @@ -3760,6 +3912,7 @@ let reboot ~__context ~self timeout = ) let shutdown ~__context ~self timeout = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> assert_resident_on ~__context ~self ; @@ -3793,6 +3946,7 @@ let shutdown ~__context ~self timeout = ) let suspend ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> assert_resident_on ~__context ~self ; @@ -3869,6 +4023,7 @@ let suspend ~__context ~self = ) let resume ~__context ~self ~start_paused ~force:_ = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in let queue_name = queue_of_vm ~__context ~self in let vm_id = id_of_vm ~__context ~self in @@ -3922,6 +4077,7 @@ let resume ~__context ~self ~start_paused ~force:_ = ~expected:(if start_paused then `Paused else `Running) let s3suspend ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3933,6 +4089,7 @@ let s3suspend ~__context ~self = ) let s3resume ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let queue_name = queue_of_vm ~__context ~self in transform_xenops_exn ~__context ~vm:self queue_name (fun () -> let id = id_of_vm ~__context ~self in @@ -3944,12 +4101,14 @@ let s3resume ~__context ~self = ) let md_of_vbd ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VBD.get_VM ~__context ~self in MD.of_vbd ~__context ~vm:(Db.VM.get_record ~__context ~self:vm) ~vbd:(Db.VBD.get_record ~__context ~self) let vbd_plug ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VBD.get_VM ~__context ~self in let vm_id = id_of_vm ~__context ~self:vm in let queue_name = queue_of_vm ~__context ~self:vm in @@ -3976,6 +4135,7 @@ let vbd_plug ~__context ~self = ) let vbd_unplug ~__context ~self force = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VBD.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4005,6 +4165,7 @@ let vbd_unplug ~__context ~self force = ) let vbd_eject_hvm ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VBD.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4027,6 +4188,7 @@ let vbd_eject_hvm ~__context ~self = ) let vbd_insert_hvm ~__context ~self ~vdi = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VBD.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4052,6 +4214,7 @@ let vbd_insert_hvm ~__context ~self ~vdi = ) let has_qemu ~__context ~vm = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let dbg = Context.string_of_task_and_tracing __context in let id = Db.VM.get_uuid ~__context ~self:vm in let queue_name = queue_of_vm ~__context ~self:vm in @@ -4060,10 +4223,12 @@ let has_qemu ~__context ~vm = state.Vm.domain_type = Domain_HVM let ejectable ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VBD.get_VM ~__context ~self in has_qemu ~__context ~vm let vbd_eject ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in if ejectable ~__context ~self then vbd_eject_hvm ~__context ~self else ( @@ -4073,6 +4238,7 @@ let vbd_eject ~__context ~self = ) let vbd_insert ~__context ~self ~vdi = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in if ejectable ~__context ~self then vbd_insert_hvm ~__context ~self ~vdi else ( @@ -4082,12 +4248,14 @@ let vbd_insert ~__context ~self ~vdi = ) let md_of_vif ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in MD.of_vif ~__context ~vm:(Db.VM.get_record ~__context ~self:vm) ~vif:(self, Db.VIF.get_record ~__context ~self) let vif_plug ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in let vm_id = id_of_vm ~__context ~self:vm in let queue_name = queue_of_vm ~__context ~self:vm in @@ -4116,6 +4284,7 @@ let vif_plug ~__context ~self = ) let vif_set_locking_mode ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4130,6 +4299,7 @@ let vif_set_locking_mode ~__context ~self = ) let vif_set_pvs_proxy ~__context ~self creating = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4145,6 +4315,7 @@ let vif_set_pvs_proxy ~__context ~self creating = ) let vif_unplug ~__context ~self force = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4167,6 +4338,7 @@ let vif_unplug ~__context ~self force = ) let vif_move ~__context ~self _network = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4193,6 +4365,7 @@ let vif_move ~__context ~self _network = ) let vif_set_ipv4_configuration ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4209,6 +4382,7 @@ let vif_set_ipv4_configuration ~__context ~self = ) let vif_set_ipv6_configuration ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VIF.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4225,6 +4399,7 @@ let vif_set_ipv6_configuration ~__context ~self = ) let task_cancel ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in try let queue_name, id = TaskHelper.task_to_id_exn self |> unwrap in let module Client = (val make_client queue_name : XENOPS) in @@ -4240,6 +4415,7 @@ let task_cancel ~__context ~self = false let md_of_vusb ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VUSB.get_VM ~__context ~self in let usb_group = Db.VUSB.get_USB_group ~__context ~self in let pusb = Helpers.get_first_pusb ~__context usb_group in @@ -4247,6 +4423,7 @@ let md_of_vusb ~__context ~self = MD.of_vusb ~__context ~vm:(Db.VM.get_record ~__context ~self:vm) ~pusb:pusbr let vusb_unplug_hvm ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VUSB.get_VM ~__context ~self in let queue_name = queue_of_vm ~__context ~self:vm in transform_xenops_exn ~__context ~vm queue_name (fun () -> @@ -4263,10 +4440,12 @@ let vusb_unplug_hvm ~__context ~self = ) let vusb_plugable ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in let vm = Db.VUSB.get_VM ~__context ~self in has_qemu ~__context ~vm let vusb_unplug ~__context ~self = + let@ __context = Context.with_tracing ~__context __FUNCTION__ in if vusb_plugable ~__context ~self then vusb_unplug_hvm ~__context ~self else @@ -4314,6 +4493,11 @@ module Observer = struct let dbg = Context.string_of_task __context in Client.Observer.set_export_interval dbg interval + let set_export_chunk_size ~__context ~size = + let module Client = (val make_client (default_xenopsd ()) : XENOPS) in + let dbg = Context.string_of_task __context in + Client.Observer.set_export_chunk_size dbg size + let set_max_spans ~__context ~spans = let module Client = (val make_client (default_xenopsd ()) : XENOPS) in let dbg = Context.string_of_task __context in @@ -4324,6 +4508,11 @@ module Observer = struct let dbg = Context.string_of_task __context in Client.Observer.set_max_traces dbg traces + let set_max_depth ~__context ~depth = + let module Client = (val make_client (default_xenopsd ()) : XENOPS) in + let dbg = Context.string_of_task __context in + Client.Observer.set_max_depth dbg depth + let set_max_file_size ~__context ~file_size = let module Client = (val make_client (default_xenopsd ()) : XENOPS) in let dbg = Context.string_of_task __context in diff --git a/ocaml/xapi/xha_interface.ml b/ocaml/xapi/xha_interface.ml index e89d22978ab..0935e06619d 100644 --- a/ocaml/xapi/xha_interface.ml +++ b/ocaml/xapi/xha_interface.ml @@ -493,7 +493,7 @@ module LiveSetInformation = struct ( match first_xml_element_with_name elements "localhost" with | Some (Xml.Element - (_, _, [Xml.Element ("HostID", _, [Xml.PCData local_host_id])]) + (_, _, Xml.Element ("HostID", _, [Xml.PCData local_host_id]) :: _) ) -> ( match Uuidx.of_string local_host_id with | None -> diff --git a/ocaml/xcp-rrdd/bin/rrdd/dune b/ocaml/xcp-rrdd/bin/rrdd/dune index b8419b12fb8..2f215e8a7cf 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/dune +++ b/ocaml/xcp-rrdd/bin/rrdd/dune @@ -10,8 +10,8 @@ http_lib httpsvr inotify - mtime - mtime.clock.os + clock + mtime.clock rpclib.core rrd-transport rrd-transport.lib @@ -41,15 +41,16 @@ (libraries astring ezxenstore.core - ezxenstore.watch forkexec http_lib httpsvr inotify + clock rpclib.core rpclib.json rpclib.xml rrdd_libs_internal + rrdd_plugin_xenctrl rrd-transport threads.posix uuid @@ -65,9 +66,6 @@ xapi-stdext-threads xapi-stdext-unix xenctrl - xenstore - xenstore.unix - xenstore_transport.unix ) ) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_bindings.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_bindings.ml index f5d977d632c..3ddc24e462f 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_bindings.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_bindings.ml @@ -47,8 +47,6 @@ module type RRDD_IMPLEMENTATION = sig val update_use_min_max : bool -> unit - val update_vm_memory_target : int -> int64 -> unit - val set_cache_sr : string -> unit val unset_cache_sr : unit -> unit @@ -119,7 +117,6 @@ module Make (Impl : RRDD_IMPLEMENTATION) = struct Server.query_possible_sr_dss Impl.query_possible_sr_dss ; Server.query_sr_ds Impl.query_sr_ds ; Server.update_use_min_max Impl.update_use_min_max ; - Server.update_vm_memory_target Impl.update_vm_memory_target ; Server.set_cache_sr Impl.set_cache_sr ; Server.unset_cache_sr Impl.unset_cache_sr ; (* module Plugin*) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index 0d5ac4d4201..172735708b4 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -51,12 +51,19 @@ let merge_new_dss rrdi dss = !Rrdd_shared.enable_all_dss || ds.ds_default in let default_dss = StringMap.filter should_enable_ds dss in + let ds_names = + Array.fold_left + (fun (acc : StringSet.t) (e : Rrd.ds) : StringSet.t -> + StringSet.add e.ds_name acc + ) + StringSet.empty rrdi.rrd.rrd_dss + in (* NOTE: Only add enabled dss to the live rrd, ignoring non-default ones. This is because non-default ones are added to the RRD when they are enabled. *) let new_enabled_dss = StringMap.filter - (fun ds_name _ -> not (StringMap.mem ds_name rrdi.dss)) + (fun ds_name _ -> not (StringSet.mem ds_name ds_names)) default_dss in (* fold on Map is not tail-recursive, but the depth of the stack should be @@ -148,9 +155,7 @@ let convert_to_owner_map dss = Also resets the value of datasources that are enabled in the RRD, but weren't updated on this refresh cycle. *) -let update_rrds uuid_domids paused_vms plugins_dss = - let uuid_domids = List.to_seq uuid_domids |> StringMap.of_seq in - let paused_vms = List.to_seq paused_vms |> StringSet.of_seq in +let update_rrds uuid_domids plugins_dss = let per_owner_flattened_map, per_plugin_map = convert_to_owner_map plugins_dss in @@ -230,18 +235,11 @@ let update_rrds uuid_domids paused_vms plugins_dss = match vm_rrd with | Some rrdi -> let updated_dss, rrd = merge_new_dss rrdi dss in - (* CA-34383: Memory updates from paused domains serve no useful - purpose. During a migrate such updates can also cause undesirable - discontinuities in the observed value of memory_actual. Hence, we - ignore changes from paused domains: *) - ( if not (StringSet.mem vm_uuid paused_vms) then - let named_updates = - StringMap.map to_named_updates dss - in - Rrd.ds_update_named rrd - ~new_rrd:(domid <> rrdi.domid) timestamp - named_updates - ) ; + let named_updates = + StringMap.map to_named_updates dss + in + Rrd.ds_update_named rrd ~new_rrd:(domid <> rrdi.domid) + timestamp named_updates ; Some {rrd; dss= updated_dss; domid} | None -> debug "%s: Creating fresh RRD for VM uuid=%s" @@ -299,7 +297,7 @@ let update_rrds uuid_domids paused_vms plugins_dss = reset_missing_data sr_rrdi.rrd missing_updates ; Hashtbl.replace sr_rrds sr_uuid sr_rrdi - with _ -> log_backtrace () + with e -> log_backtrace e in let process_host plugins_dss available_dss = let host_rrdi = !host_rrd in diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index 3a883a56986..15eee76cfe6 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -300,7 +300,6 @@ let migrate_rrd (session_id : string option) (remote_address : string) Some x | None -> debug "VM %s RRDs not found on migrate! Continuing anyway..." vm_uuid ; - log_backtrace () ; None ) |> Option.iter (fun rrdi -> @@ -572,11 +571,6 @@ let update_use_min_max (value : bool) : unit = debug "Updating use_min_max: New value=%b" value ; use_min_max := value -let update_vm_memory_target (domid : int) (target : int64) : unit = - with_lock memory_targets_m (fun _ -> - Hashtbl.replace memory_targets domid target - ) - let set_cache_sr (sr_uuid : string) : unit = with_lock cache_sr_lock (fun () -> cache_sr_uuid := Some sr_uuid) @@ -693,12 +687,13 @@ module Plugin = struct (* reset skip counts *) payload with e -> ( + Backtrace.is_important e ; incr_skip_count uid plugin ; (* increase skip count *) let log e = info "Failed to process plugin metrics file: %s (%s)" (P.string_of_uid ~uid) (Printexc.to_string e) ; - log_backtrace () + log_backtrace e in let open Rrd_protocol in match e with @@ -716,8 +711,12 @@ module Plugin = struct let next_reading (uid : P.uid) : float = let open Rrdd_shared in if with_lock registered_m (fun _ -> Hashtbl.mem registered uid) then - with_lock last_loop_end_time_m (fun _ -> - !last_loop_end_time +. !timeslice -. Unix.gettimeofday () + with_lock next_iteration_start_m (fun _ -> + match Clock.Timer.remaining !next_iteration_start with + | Remaining diff -> + Clock.Timer.span_to_s diff + | Expired diff -> + Clock.Timer.span_to_s diff *. -1. ) else -1. diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli index 000c53de121..bd8ae2e6c99 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.mli @@ -50,8 +50,6 @@ val query_sr_ds : string -> string -> float val update_use_min_max : bool -> unit -val update_vm_memory_target : int -> int64 -> unit - val set_cache_sr : string -> unit val unset_cache_sr : unit -> unit diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml index 883f9844cb5..b15e91b50cb 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml @@ -20,19 +20,15 @@ module StringSet = Set.Make (String) (* Whether to enable all non-default datasources *) let enable_all_dss = ref false -(* The time between each monitoring loop. *) -let timeslice : float ref = ref 5. +(* The expected time span between each monitoring loop. *) +let timeslice : Mtime.span ref = ref Mtime.Span.(5 * s) -(* Timestamp of the last monitoring loop end. *) -let last_loop_end_time : float ref = ref neg_infinity +(* A timer that expires at the start of the next iteration *) +let next_iteration_start : Clock.Timer.t ref = + ref (Clock.Timer.start ~duration:!timeslice) -(* The mutex that protects the last_loop_end_time against data corruption. *) -let last_loop_end_time_m : Mutex.t = Mutex.create () - -(** Cache memory/target values *) -let memory_targets : (int, int64) Hashtbl.t = Hashtbl.create 20 - -let memory_targets_m = Mutex.create () +(* The mutex that protects the next_iteration_start against data corruption. *) +let next_iteration_start_m : Mutex.t = Mutex.create () let cache_sr_uuid : string option ref = ref None @@ -140,7 +136,7 @@ let send_rrd ?(session_id : string option) let open Xmlrpc_client in with_transport transport (with_http request (fun (_response, fd) -> - try Rrd_unix.to_fd ~internal:true rrd fd with _ -> log_backtrace () + try Rrd_unix.to_fd ~internal:true rrd fd with e -> log_backtrace e ) ) ; debug "Sending RRD complete." @@ -171,7 +167,7 @@ let archive_rrd_internal ?(transport = None) ~uuid ~rrd () = Xapi_stdext_unix.Unixext.unlink_safe base_filename ) else debug "No local storage: not persisting RRDs" - with _ -> log_backtrace () + with e -> log_backtrace e ) | Some transport -> (* Stream it to the master to store, or maybe to a host in the migrate diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_stats.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_stats.ml index 09ffd401a09..c0404e17b1b 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_stats.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_stats.ml @@ -238,5 +238,6 @@ let print_snapshot () = print_stats () ) with e -> + Backtrace.is_important e ; debug "Caught: %s" (Printexc.to_string e) ; - log_backtrace () + log_backtrace e diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index e2b6f741fc0..17ca619440d 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -119,213 +119,6 @@ let start (xmlrpc_path, http_fwd_path) process = let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute -(*****************************************************) -(* xenstore related code *) -(*****************************************************) - -module XSW_Debug = Debug.Make (struct let name = "xenstore_watch" end) - -module Watch = Ez_xenstore_watch.Make (XSW_Debug) - -module Xs = struct - module Client = Xs_client_unix.Client (Xs_transport_unix_client) - - let client = ref None - - (* Initialise the clients on demand - must be done after daemonisation! *) - let get_client () = - match !client with - | Some client -> - client - | None -> - let c = Client.make () in - client := Some c ; - c -end - -(* Map from domid to the latest seen meminfo_free value *) -let current_meminfofree_values = ref Watch.IntMap.empty - -let meminfo_path domid = - Printf.sprintf "/local/domain/%d/data/meminfo_free" domid - -module Meminfo = struct - let watch_token domid = Printf.sprintf "xcp-rrdd:domain-%d" domid - - let interesting_paths_for_domain domid _uuid = [meminfo_path domid] - - let fire_event_on_vm domid domains = - let d = int_of_string domid in - if not (Watch.IntMap.mem d domains) then - info "Ignoring watch on shutdown domain %d" d - else - let path = meminfo_path d in - try - let client = Xs.get_client () in - let meminfo_free_string = - Xs.Client.immediate client (fun xs -> Xs.Client.read xs path) - in - let meminfo_free = Int64.of_string meminfo_free_string in - info "memfree has changed to %Ld in domain %d" meminfo_free d ; - current_meminfofree_values := - Watch.IntMap.add d meminfo_free !current_meminfofree_values - with Xs_protocol.Enoent _hint -> - info - "Couldn't read path %s; forgetting last known memfree value for \ - domain %d" - path d ; - current_meminfofree_values := - Watch.IntMap.remove d !current_meminfofree_values - - let watch_fired _ _xc path domains _ = - match - List.filter (fun x -> x <> "") Astring.String.(cuts ~sep:"/" path) - with - | ["local"; "domain"; domid; "data"; "meminfo_free"] -> - fire_event_on_vm domid domains - | _ -> - debug "Ignoring unexpected watch: %s" path - - let unmanaged_domain _ _ = false - - let found_running_domain _ _ = () - - let domain_appeared _ _ _ = () - - let domain_disappeared _ _ _ = () -end - -module Watcher = Watch.WatchXenstore (Meminfo) - -(*****************************************************) -(* memory stats *) -(*****************************************************) -let dss_mem_host xc = - let physinfo = Xenctrl.physinfo xc in - let total_kib = - Xenctrl.pages_to_kib (Int64.of_nativeint physinfo.Xenctrl.total_pages) - and free_kib = - Xenctrl.pages_to_kib (Int64.of_nativeint physinfo.Xenctrl.free_pages) - in - [ - ( Rrd.Host - , Ds.ds_make ~name:"memory_total_kib" - ~description:"Total amount of memory in the host" - ~value:(Rrd.VT_Int64 total_kib) ~ty:Rrd.Gauge ~min:0.0 ~default:true - ~units:"KiB" () - ) - ; ( Rrd.Host - , Ds.ds_make ~name:"memory_free_kib" - ~description:"Total amount of free memory" - ~value:(Rrd.VT_Int64 free_kib) ~ty:Rrd.Gauge ~min:0.0 ~default:true - ~units:"KiB" () - ) - ] - -(** estimate the space needed to serialize all the dss_mem_vms in a host. the - json-like serialization for the 3 dss in dss_mem_vms takes 622 bytes. these - bytes plus some overhead make 1024 bytes an upper bound. *) -let max_supported_vms = 1024 - -let bytes_per_mem_vm = 1024 - -let mem_vm_writer_pages = ((max_supported_vms * bytes_per_mem_vm) + 4095) / 4096 - -let res_error fmt = Printf.ksprintf Result.error fmt - -let ok x = Result.ok x - -let ( let* ) = Result.bind - -let finally f always = Fun.protect ~finally:always f - -let scanning path f = - let io = Scanf.Scanning.open_in path in - finally (fun () -> f io) (fun () -> Scanf.Scanning.close_in io) - -let scan path = - try - scanning path @@ fun io -> - Scanf.bscanf io {|MemTotal: %_d %_s MemFree: %_d %_s MemAvailable: %Ld %s|} - (fun size kb -> ok (size, kb) - ) - with _ -> res_error "failed to scan %s" path - -let mem_available () = - let* size, kb = scan "/proc/meminfo" in - match kb with "kB" -> ok size | _ -> res_error "unexpected unit: %s" kb - -let dss_mem_vms doms = - List.fold_left - (fun acc (dom, uuid, domid) -> - let kib = - Xenctrl.pages_to_kib (Int64.of_nativeint dom.Xenctrl.total_memory_pages) - in - let memory = Int64.mul kib 1024L in - let main_mem_ds = - ( Rrd.VM uuid - , Ds.ds_make ~name:"memory" - ~description:"Memory currently allocated to VM" ~units:"B" - ~value:(Rrd.VT_Int64 memory) ~ty:Rrd.Gauge ~min:0.0 ~default:true () - ) - in - let memory_target_opt = - with_lock Rrdd_shared.memory_targets_m (fun _ -> - Hashtbl.find_opt Rrdd_shared.memory_targets domid - ) - in - let mem_target_ds = - Option.map - (fun memory_target -> - ( Rrd.VM uuid - , Ds.ds_make ~name:"memory_target" - ~description:"Target of VM balloon driver" ~units:"B" - ~value:(Rrd.VT_Int64 memory_target) ~ty:Rrd.Gauge ~min:0.0 - ~default:true () - ) - ) - memory_target_opt - in - let other_ds = - if domid = 0 then - match mem_available () with - | Ok mem -> - Some - ( Rrd.VM uuid - , Ds.ds_make ~name:"memory_internal_free" ~units:"KiB" - ~description:"Dom0 current free memory" - ~value:(Rrd.VT_Int64 mem) ~ty:Rrd.Gauge ~min:0.0 - ~default:true () - ) - | Error msg -> - let _ = - error "%s: retrieving Dom0 free memory failed: %s" __FUNCTION__ - msg - in - None - else - try - let mem_free = - Watch.IntMap.find domid !current_meminfofree_values - in - Some - ( Rrd.VM uuid - , Ds.ds_make ~name:"memory_internal_free" ~units:"KiB" - ~description:"Memory used as reported by the guest agent" - ~value:(Rrd.VT_Int64 mem_free) ~ty:Rrd.Gauge ~min:0.0 - ~default:true () - ) - with Not_found -> None - in - List.concat - [ - main_mem_ds :: Option.to_list other_ds - ; Option.to_list mem_target_ds - ; acc - ] - ) - [] doms - (**** Local cache SR stuff *) type last_vals = { @@ -429,87 +222,22 @@ let handle_exn log f default = (Printexc.to_string e) ; default -let uuid_blacklist = ["00000000-0000-0000"; "deadbeef-dead-beef"] - -module IntSet = Set.Make (Int) - -let domain_snapshot xc = - let metadata_of_domain dom = - let ( let* ) = Option.bind in - let* uuid_raw = Uuidx.of_int_array dom.Xenctrl.handle in - let uuid = Uuidx.to_string uuid_raw in - let domid = dom.Xenctrl.domid in - let start = String.sub uuid 0 18 in - (* Actively hide migrating VM uuids, these are temporary and xenops writes - the original and the final uuid to xenstore *) - let uuid_from_key key = - let path = Printf.sprintf "/vm/%s/%s" uuid key in - try Ezxenstore_core.Xenstore.(with_xs (fun xs -> xs.read path)) - with Xs_protocol.Enoent _hint -> - info "Couldn't read path %s; falling back to actual uuid" path ; - uuid - in - let stable_uuid = Option.fold ~none:uuid ~some:uuid_from_key in - if List.mem start uuid_blacklist then - None - else - let key = - if Astring.String.is_suffix ~affix:"000000000000" uuid then - Some "origin-uuid" - else if Astring.String.is_suffix ~affix:"000000000001" uuid then - Some "final-uuid" - else - None - in - Some (dom, stable_uuid key, domid) - in - let domains = - Xenctrl.domain_getinfolist xc 0 |> List.filter_map metadata_of_domain - in - let domain_paused (d, uuid, _) = - if d.Xenctrl.paused then Some uuid else None - in - let paused_uuids = List.filter_map domain_paused domains in - let domids = List.map (fun (_, _, i) -> i) domains |> IntSet.of_list in - let domains_only k v = Option.map (Fun.const v) (IntSet.find_opt k domids) in - Hashtbl.filter_map_inplace domains_only Rrdd_shared.memory_targets ; - (domains, paused_uuids) - let dom0_stat_generators = [ - ("ha", fun _ _ _ -> Rrdd_ha_stats.all ()) - ; ("mem_host", fun xc _ _ -> dss_mem_host xc) - ; ("mem_vms", fun _ _ domains -> dss_mem_vms domains) - ; ("cache", fun _ timestamp _ -> dss_cache timestamp) + ("ha", fun _ _ -> Rrdd_ha_stats.all ()) + ; ("cache", fun _ timestamp -> dss_cache timestamp) ] -let generate_all_dom0_stats xc domains = +let generate_all_dom0_stats xc = let handle_generator (name, generator) = let timestamp = Unix.gettimeofday () in - ( name - , (timestamp, handle_exn name (fun _ -> generator xc timestamp domains) []) - ) + (name, (timestamp, handle_exn name (fun _ -> generator xc timestamp) [])) in List.map handle_generator dom0_stat_generators -let write_dom0_stats writers tagged_dss = - let write_dss (name, writer) = - match List.assoc_opt name tagged_dss with - | None -> - debug - "Could not write stats for \"%s\": no stats were associated with \ - this name" - name - | Some (timestamp, dss) -> - writer.Rrd_writer.write_payload {timestamp; datasources= dss} - in - List.iter write_dss writers - -let do_monitor_write xc writers = +let do_monitor_write domains_before xc = Rrdd_libs.Stats.time_this "monitor" (fun _ -> - let domains, my_paused_vms = domain_snapshot xc in - let tagged_dom0_stats = generate_all_dom0_stats xc domains in - write_dom0_stats writers tagged_dom0_stats ; + let tagged_dom0_stats = generate_all_dom0_stats xc in let dom0_stats = tagged_dom0_stats |> List.to_seq @@ -518,37 +246,65 @@ let do_monitor_write xc writers = ) in let plugins_stats = Rrdd_server.Plugin.read_stats () in + let _, domains_after, _ = Xenctrl_lib.domain_snapshot xc in + let domains_after = List.to_seq domains_after in let stats = Seq.append plugins_stats dom0_stats in Rrdd_stats.print_snapshot () ; - let uuid_domids = List.map (fun (_, u, i) -> (u, i)) domains in - + (* merge the domain ids from the previous iteration and the current one + to avoid missing updates *) + let uuid_domids = + Seq.append domains_before domains_after + |> Seq.map (fun (_, u, i) -> (u, i)) + |> Rrd.StringMap.of_seq + in (* stats are grouped per plugin, which provides its timestamp *) - Rrdd_monitor.update_rrds uuid_domids my_paused_vms stats ; + Rrdd_monitor.update_rrds uuid_domids stats ; Rrdd_libs.Constants.datasource_dump_file |> Rrdd_server.dump_host_dss_to_file ; Rrdd_libs.Constants.datasource_vm_dump_file - |> Rrdd_server.dump_vm_dss_to_file + |> Rrdd_server.dump_vm_dss_to_file ; + domains_after ) -let monitor_write_loop writers = +let monitor_write_loop () = Debug.with_thread_named "monitor_write" (fun () -> Xenctrl.with_intf (fun xc -> + let domains = ref Seq.empty in while true do try - do_monitor_write xc writers ; - with_lock Rrdd_shared.last_loop_end_time_m (fun _ -> - Rrdd_shared.last_loop_end_time := Unix.gettimeofday () + domains := do_monitor_write !domains xc ; + with_lock Rrdd_shared.next_iteration_start_m (fun _ -> + Rrdd_shared.next_iteration_start := + Clock.Timer.extend_by !Rrdd_shared.timeslice + !Rrdd_shared.next_iteration_start ) ; - Thread.delay !Rrdd_shared.timeslice + match Clock.Timer.remaining !Rrdd_shared.next_iteration_start with + | Remaining remaining -> + Thread.delay (Clock.Timer.span_to_s remaining) + | Expired missed_by -> + warn + "%s: Monitor write iteration missed cycle by %a, skipping \ + the delay" + __FUNCTION__ Debug.Pp.mtime_span missed_by ; + (* To avoid to use up 100% CPU when the timer is already + expired, still delay 1s *) + Thread.delay 1. with e -> - debug - "Monitor/write thread caught an exception. Pausing for 10s, \ - then restarting: %s" - (Printexc.to_string e) ; - log_backtrace () ; - Thread.delay 10. + Backtrace.is_important e ; + warn + "%s: Monitor/write thread caught an exception. Pausing for \ + 10s, then restarting: %s" + __FUNCTION__ (Printexc.to_string e) ; + log_backtrace e ; + Thread.delay 10. ; + with_lock Rrdd_shared.next_iteration_start_m (fun _ -> + Rrdd_shared.next_iteration_start := + Clock.Timer.extend_by + Mtime.Span.(10 * s) + !Rrdd_shared.next_iteration_start + ) done ) ) @@ -710,45 +466,15 @@ let doc = the datasources and records historical data in RRD format." ] -(** write memory stats to the filesystem so they can be propagated to xapi, - along with the number of pages they require to be allocated *) -let stats_to_write = [("mem_host", 1); ("mem_vms", mem_vm_writer_pages)] - -let writer_basename = ( ^ ) "xcp-rrdd-" - -let configure_writers () = - List.map - (fun (name, n_pages) -> - let path = Rrdd_server.Plugin.get_path (writer_basename name) in - ignore (Xapi_stdext_unix.Unixext.mkdir_safe (Filename.dirname path) 0o644) ; - let writer = - snd - (Rrd_writer.FileWriter.create - {path; shared_page_count= n_pages} - Rrd_protocol_v2.protocol - ) - in - (name, writer) - ) - stats_to_write - -(** we need to make sure we call exit on fatal signals to make sure profiling - data is dumped *) -let stop err writers signal = - debug "caught signal %a" Debug.Pp.signal signal ; - List.iter (fun (_, writer) -> writer.Rrd_writer.cleanup ()) writers ; - exit err - (* Entry point. *) -let _ = +let () = Rrdd_bindings.Rrd_daemon.bind () ; (* bind PPX-generated server calls to implementation of API *) - let writers = configure_writers () in (* Prevent shutdown due to sigpipe interrupt. This protects against potential stunnel crashes. *) Sys.set_signal Sys.sigpipe Sys.Signal_ignore ; - Sys.set_signal Sys.sigterm (Sys.Signal_handle (stop 1 writers)) ; - Sys.set_signal Sys.sigint (Sys.Signal_handle (stop 0 writers)) ; + Sys.set_signal Sys.sigterm (Sys.Signal_handle (fun _ -> exit 1)) ; + Sys.set_signal Sys.sigint (Sys.Signal_handle (fun _ -> exit 0)) ; (* Enable the new logging library. *) Debug.set_facility Syslog.Local5 ; (* Read configuration file. *) @@ -778,15 +504,8 @@ let _ = start (!Rrd_interface.default_path, !Rrd_interface.forwarded_path) (fun () -> Idl.Exn.server Rrdd_bindings.Server.implementation ) ; - ignore - @@ Discover.start - (List.map (fun (name, _) -> writer_basename name) stats_to_write) ; - ignore @@ GCLog.start () ; - debug "Starting xenstore-watching thread .." ; - let () = - try Watcher.create_watcher_thread () - with _ -> error "xenstore-watching thread has failed" - in + let _ : Thread.t = Discover.start [] in + let _ : Thread.t = GCLog.start () in let module Daemon = Xapi_stdext_unix.Unixext.Daemon in if Daemon.systemd_booted () then if Daemon.systemd_notify Daemon.State.Ready then @@ -795,7 +514,7 @@ let _ = warn "Sending systemd notification failed at %s" __LOC__ ; debug "Creating monitoring loop thread .." ; let () = - try Debug.with_thread_associated "main" monitor_write_loop writers + try Debug.with_thread_associated "main" monitor_write_loop () with _ -> error "monitoring loop thread has failed" in while true do diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.mli b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.mli new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml index 7a0db5ec5d7..a677fd17465 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-cpu/rrdp_cpu.ml @@ -20,7 +20,7 @@ module Process = Rrdd_plugin.Process (struct let name = "xcp-rrdd-cpu" end) let xen_flag_complement = Int64.(shift_left 1L 63 |> lognot) -(* This function is used for getting vcpu stats of the VMs present on this host. *) +(* This function is used for getting vCPU stats of the VMs present on this host. *) let dss_vcpus xc doms = List.fold_left (fun dss (dom, uuid, domid) -> @@ -49,7 +49,7 @@ let dss_vcpus xc doms = in cpus (i + 1) (cputime_rrd :: dss) in - (* Runstate info is per-domain rather than per-vcpu *) + (* Runstate info is per-domain rather than per-vCPU *) let dss = let dom_cpu_time = Int64.(to_float @@ logand dom.Xenctrl.cpu_time xen_flag_complement) @@ -57,54 +57,110 @@ let dss_vcpus xc doms = let dom_cpu_time = dom_cpu_time /. (1.0e9 *. float_of_int dom.Xenctrl.nr_online_vcpus) in + let ( ++ ) = Int64.add in try - let ri = Xenctrl.domain_get_runstate_info xc domid in + let ri = Xenctrl.Runstateinfo.V2.domain_get xc domid in + let runnable_vcpus_ds = + match ri.Xenctrl.Runstateinfo.V2.runnable with + | 0L -> + [] + | _ -> + [ + ( Rrd.VM uuid + , Ds.ds_make ~name:"runnable_vcpus" ~units:"(fraction)" + ~value: + (Rrd.VT_Float + (Int64.to_float ri.Xenctrl.Runstateinfo.V2.runnable + /. 1.0e9 + ) + ) + ~description: + "Fraction of time that vCPUs of the domain are runnable" + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () + ) + ] + in ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_fullrun" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time0 /. 1.0e9)) - ~description:"Fraction of time that all VCPUs are running" - ~ty:Rrd.Derive ~default:false ~min:0.0 () + ~value: + (Rrd.VT_Float + (Int64.to_float ri.Xenctrl.Runstateinfo.V2.time0 /. 1.0e9) + ) + ~description:"Fraction of time that all vCPUs are running" + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_full_contention" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time1 /. 1.0e9)) + ~value: + (Rrd.VT_Float + (Int64.to_float ri.Xenctrl.Runstateinfo.V2.time1 /. 1.0e9) + ) ~description: - "Fraction of time that all VCPUs are runnable (i.e., \ + "Fraction of time that all vCPUs are runnable (i.e., \ waiting for CPU)" - ~ty:Rrd.Derive ~default:false ~min:0.0 () + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_concurrency_hazard" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time2 /. 1.0e9)) + ~value: + (Rrd.VT_Float + (Int64.to_float ri.Xenctrl.Runstateinfo.V2.time2 /. 1.0e9) + ) ~description: - "Fraction of time that some VCPUs are running and some are \ + "Fraction of time that some vCPUs are running and some are \ runnable" - ~ty:Rrd.Derive ~default:false ~min:0.0 () + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_blocked" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time3 /. 1.0e9)) + ~value: + (Rrd.VT_Float + (Int64.to_float ri.Xenctrl.Runstateinfo.V2.time3 /. 1.0e9) + ) ~description: - "Fraction of time that all VCPUs are blocked or offline" - ~ty:Rrd.Derive ~default:false ~min:0.0 () + "Fraction of time that all vCPUs are blocked or offline" + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_partial_run" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time4 /. 1.0e9)) + ~value: + (Rrd.VT_Float + (Int64.to_float ri.Xenctrl.Runstateinfo.V2.time4 /. 1.0e9) + ) ~description: - "Fraction of time that some VCPUs are running, and some are \ + "Fraction of time that some vCPUs are running and some are \ blocked" - ~ty:Rrd.Derive ~default:false ~min:0.0 () + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make ~name:"runstate_partial_contention" ~units:"(fraction)" - ~value:(Rrd.VT_Float (Int64.to_float ri.Xenctrl.time5 /. 1.0e9)) + ~value: + (Rrd.VT_Float + (Int64.to_float ri.Xenctrl.Runstateinfo.V2.time5 /. 1.0e9) + ) ~description: - "Fraction of time that some VCPUs are runnable and some are \ + "Fraction of time that some vCPUs are runnable and some are \ blocked" - ~ty:Rrd.Derive ~default:false ~min:0.0 () + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () + ) + :: ( Rrd.VM uuid + , Ds.ds_make ~name:"runnable_any" ~units:"(fraction)" + ~value: + (Rrd.VT_Float + (Int64.to_float + (ri.Xenctrl.Runstateinfo.V2.time1 + ++ ri.Xenctrl.Runstateinfo.V2.time2 + ++ ri.Xenctrl.Runstateinfo.V2.time5 + ) + /. 1.0e9 + ) + ) + ~description: + "Fraction of time that at least one vCPU is runnable in the \ + domain" + ~ty:Rrd.Derive ~default:false ~min:0.0 ~max:1.0 () ) :: ( Rrd.VM uuid , Ds.ds_make @@ -115,6 +171,7 @@ let dss_vcpus xc doms = ~min:0.0 ~max:1.0 () ) :: dss + @ runnable_vcpus_ds with _ -> dss in try cpus 0 dss with _ -> dss @@ -188,12 +245,15 @@ let dss_hostload xc domains = let load = List.fold_left (fun acc (dom, _, domid) -> - sum 0 dom.Xenctrl.max_vcpu_id (fun id -> - let vcpuinfo = Xenctrl.domain_get_vcpuinfo xc domid id in - if vcpuinfo.Xenctrl.online && not vcpuinfo.Xenctrl.blocked then - 1 - else - 0 + ( try + sum 0 dom.Xenctrl.max_vcpu_id (fun id -> + let vcpuinfo = Xenctrl.domain_get_vcpuinfo xc domid id in + if vcpuinfo.Xenctrl.online && not vcpuinfo.Xenctrl.blocked then + 1 + else + 0 + ) + with _ -> 0 ) + acc ) @@ -230,14 +290,20 @@ let generate_cpu_ds_list xc () = let _, domains, _ = Xenctrl_lib.domain_snapshot xc in dss_pcpus xc @ dss_vcpus xc domains @ dss_loadavg () @ dss_hostload xc domains +(* 32 vCPUS ~8659 bytes, so 64 vCPUs should fit in 5 *) +let cpu_pages_per_vm = 5 + let _ = Xenctrl.with_intf (fun xc -> let _, domains, _ = Xenctrl_lib.domain_snapshot xc in Process.initialise () ; (* Share one page per PCPU and dom each *) let physinfo = Xenctrl.physinfo xc in - let shared_page_count = physinfo.Xenctrl.nr_cpus + List.length domains in - (* TODO: Can run out of pages if a lot of domains are added at runtime *) + let shared_page_count = + physinfo.Xenctrl.nr_cpus + + Int.max Rrd_interface.max_supported_vms (List.length domains) + * cpu_pages_per_vm + in Process.main_loop ~neg_shift:0.5 ~target:(Reporter.Local shared_page_count) ~protocol:Rrd_interface.V2 ~dss_f:(generate_cpu_ds_list xc) diff --git a/ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml b/ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml index 0f547015304..6141090eae7 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-iostat/rrdp_iostat.ml @@ -332,7 +332,10 @@ let refresh_phypath_to_sr_vdi () = let exec_tap_ctl_list () : ((string * string) * int) list = let tap_ctl = "/usr/sbin/tap-ctl list" in let extract_vdis pid minor _state kind phypath = - if not (kind = "vhd" || kind = "aio") then raise (Failure "Unknown type") ; + if not (kind = "vhd" || kind = "aio" || kind = "qcow2") then ( + D.warn {|"%s" is not a known type.|} kind ; + raise (Failure "Unknown type") + ) ; (* Look up SR and VDI uuids from the physical path *) if not (Hashtbl.mem phypath_to_sr_vdi phypath) then refresh_phypath_to_sr_vdi () ; diff --git a/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml b/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml index bd31674a03a..72a840f9f15 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-netdev/rrdp_netdev.ml @@ -69,7 +69,7 @@ let get_link_stats () = in List.map (fun link -> (standardise_name (Link.get_name link), link)) links |> (* Only keep interfaces with prefixes on the whitelist, and exclude VLAN - devices (ethx.y). *) + devices (ethx.y). *) List.filter (fun (name, _) -> is_whitelisted name && not (is_vlan name)) in let devs = diff --git a/ocaml/xcp-rrdd/bin/rrdp-squeezed/dune b/ocaml/xcp-rrdd/bin/rrdp-squeezed/dune index d45dd928de1..ca5b6ae7d88 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-squeezed/dune +++ b/ocaml/xcp-rrdd/bin/rrdp-squeezed/dune @@ -2,8 +2,8 @@ (modes exe) (name rrdp_squeezed) (libraries - rrdd-plugin + rrdd_plugin_xenctrl rrdd_plugins_libs xapi-stdext-std ezxenstore diff --git a/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml b/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml index 4c0b13cf3e3..df49dca259f 100644 --- a/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml +++ b/ocaml/xcp-rrdd/bin/rrdp-squeezed/rrdp_squeezed.ml @@ -17,10 +17,6 @@ open Rrdd_plugin module Process = Process (struct let name = "xcp-rrdd-squeezed" end) -open Process - -let with_xc f = Xenctrl.with_intf f - module Xs = struct module Client = Xs_client_unix.Client (Xs_transport_unix_client) include Client @@ -38,10 +34,6 @@ module Xs = struct c end -(* Return a list of domids of VMs running on this host *) -let get_running_domains xc = - Xenctrl.domain_getinfolist xc 0 |> List.map (fun di -> di.Xenctrl.domid) - module D = Debug.Make (struct let name = "rrdd-plugins" end) module XSW = Ez_xenstore_watch.Make (D) @@ -53,12 +45,19 @@ let current_dynamic_min_values = ref IntMap.empty let current_target_values = ref IntMap.empty +let current_free_values = ref IntMap.empty + module MemoryActions = struct let interesting_paths_for_domain domid _ = - let keys = ["dynamic-max"; "dynamic-min"; "target"] in - List.map - (fun key -> Printf.sprintf "/local/domain/%d/memory/%s" domid key) - keys + let keys = + [ + "memory/dynamic-max" + ; "memory/dynamic-min" + ; "memory/target" + ; "data/meminfo_free" + ] + in + List.map (fun key -> Printf.sprintf "/local/domain/%d/%s" domid key) keys let watch_token domid = Printf.sprintf "xcp-rrdd-plugins/squeezed:domain-%d" domid @@ -73,10 +72,7 @@ module MemoryActions = struct try let client = Xs.get_client () in let value = - Xs.immediate client (fun xs -> Xs.read xs path) - |> Int64.of_string - |> Int64.mul 1024L - (* convert from KiB to bytes *) + Xs.immediate client (fun xs -> Xs.read xs path) |> Int64.of_string in current_memory_values := IntMap.add domid value !current_memory_values with Xs_protocol.Enoent _ -> @@ -92,6 +88,8 @@ module MemoryActions = struct read_new_value domid current_dynamic_min_values | ["local"; "domain"; domid; "memory"; "target"] -> read_new_value domid current_target_values + | ["local"; "domain"; domid; "data"; "meminfo_free"] -> + read_new_value domid current_free_values | _ -> D.debug "Ignoring unexpected watch: %s" path @@ -106,43 +104,78 @@ end module Watcher = WatchXenstore (MemoryActions) -(* Return a tuple (dynamic-max, dynamic-min, target) for a running VM *) -let get_squeezed_data domid = - let get_current_value ~label current_values = - try IntMap.find domid !current_values - with _ -> - if domid <> 0 then - D.warn "Couldn't find cached %s value for domain %d, using 0" label - domid ; - 0L +(** All these values are reported in KiB *) +type values = { + dynamic_max: int64 option + ; dynamic_min: int64 option + ; target: int64 option + ; free: int64 option +} + +let get_values ((_, _, domid) as dom) = + let get_current_value current_values = + IntMap.find_opt domid !current_values in - ( get_current_value ~label:"dynamic-max" current_dynamic_max_values - , get_current_value ~label:"dynamic-min" current_dynamic_min_values - , get_current_value ~label:"target" current_target_values + ( dom + , { + dynamic_max= get_current_value current_dynamic_max_values + ; dynamic_min= get_current_value current_dynamic_min_values + ; target= get_current_value current_target_values + ; free= get_current_value current_free_values + } ) -let get_datas () = - (* Create a tuple (dynamic-max, dynamic-min, target) for each VM running on the host *) - let domids = with_xc get_running_domains in - List.map get_squeezed_data domids +let get_domain_stats xc = + let _, domains, _ = Xenctrl_lib.domain_snapshot xc in + List.map get_values domains + +let bytes_of_kib kib = Int64.mul 1024L kib -let generate_squeezed_dss () = +let generate_host_sources xc counters = let memory_reclaimed, memory_possibly_reclaimed = - get_datas () - (* Calculate metrics - - Host memory reclaimed by squeezed = - sum_across_running_vms(dynamic_max - target) - - Host memory that could be reclaimed by squeezed = - sum_across_running_vms(target - dynamic_min) + (* Calculate host metrics + - Host memory reclaimed by squeezed = + sum_across_running_vms(dynamic_max - target) + - Host memory that could be reclaimed by squeezed = + sum_across_running_vms(target - dynamic_min) *) + let ( let* ) = Option.bind in + counters |> List.fold_left - (fun (acc1, acc2) (max, min, target) -> - ( Int64.add acc1 (Int64.sub max target) - , Int64.add acc2 (Int64.sub target min) - ) + (fun (acc1, acc2) (_, {dynamic_max; dynamic_min; target; _}) -> + let r = + let* target in + let acc1 = + let* max = dynamic_max in + Some (Int64.add acc1 (Int64.sub max target)) + in + let acc2 = + let* min = dynamic_min in + Some (Int64.add acc2 (Int64.sub target min)) + in + Some (acc1, acc2) + in + match r with + | None | Some (None, None) -> + (acc1, acc2) + | Some (Some acc1, Some acc2) -> + (acc1, acc2) + | Some (Some acc1, None) -> + (acc1, acc2) + | Some (None, Some acc2) -> + (acc1, acc2) ) (Int64.zero, Int64.zero) in + let memory_reclaimed = bytes_of_kib memory_reclaimed in + let memory_possibly_reclaimed = bytes_of_kib memory_possibly_reclaimed in + let physinfo = Xenctrl.physinfo xc in + let total_kib = + Xenctrl.pages_to_kib (Int64.of_nativeint physinfo.Xenctrl.total_pages) + in + let free_kib = + Xenctrl.pages_to_kib (Int64.of_nativeint physinfo.Xenctrl.free_pages) + in (* Build corresponding Ds.ds values *) [ ( Rrd.Host @@ -157,13 +190,137 @@ let generate_squeezed_dss () = ~value:(Rrd.VT_Int64 memory_possibly_reclaimed) ~ty:Rrd.Gauge ~default:true ~units:"B" () ) + ; ( Rrd.Host + , Ds.ds_make ~name:"memory_total_kib" + ~description:"Total amount of memory in the host" + ~value:(Rrd.VT_Int64 total_kib) ~ty:Rrd.Gauge ~min:0.0 ~default:true + ~units:"KiB" () + ) + ; ( Rrd.Host + , Ds.ds_make ~name:"memory_free_kib" + ~description:"Total amount of free memory" + ~value:(Rrd.VT_Int64 free_kib) ~ty:Rrd.Gauge ~min:0.0 ~default:true + ~units:"KiB" () + ) ] -(* This plugin always reports two datasources only, so one page is fine. *) -let shared_page_count = 1 +let res_error fmt = Printf.ksprintf Result.error fmt + +let finally f finally = Fun.protect ~finally f + +let scanning path f = + let io = Scanf.Scanning.open_in path in + finally (fun () -> f io) (fun () -> Scanf.Scanning.close_in io) + +let scan path = + try + scanning path @@ fun io -> + Scanf.bscanf io {|MemTotal: %_d %_s MemFree: %_d %_s MemAvailable: %Ld %s|} + (fun size kb -> Ok (size, kb) + ) + with _ -> res_error "failed to scan %s" path + +let free_dom0 uuid = + let result = + match scan "/proc/meminfo" with + | Ok (size, "kB") -> + Ok size + | Ok (_, unit) -> + res_error "unexpected unit: %s" unit + | Error e -> + Error e + in + match result with + | Ok mem -> + Some + ( Rrd.VM uuid + , Ds.ds_make ~name:"memory_internal_free" ~units:"KiB" + ~description:"Dom0 current free memory" ~value:(Rrd.VT_Int64 mem) + ~ty:Rrd.Gauge ~min:0.0 ~default:true () + ) + | Error msg -> + let _ = + D.error "%s: retrieving Dom0 free memory failed: %s" __FUNCTION__ msg + in + None + +let free_other uuid free = + Some + ( Rrd.VM uuid + , Ds.ds_make ~name:"memory_internal_free" ~units:"KiB" + ~description:"Memory used as reported by the guest agent" + ~value:(Rrd.VT_Int64 free) ~ty:Rrd.Gauge ~min:0.0 ~default:true () + ) + +let get_list f = Option.to_list (f ()) + +let generate_vm_sources domains = + let metrics_of ((dom, uuid, domid), {target; free; _}) = + let target () = + Option.map + (fun target -> + let target = bytes_of_kib target in + ( Rrd.VM uuid + , Ds.ds_make ~name:"memory_target" + ~description:"Target of VM balloon driver" ~units:"B" + ~value:(Rrd.VT_Int64 target) ~ty:Rrd.Gauge ~min:0.0 ~default:true + () + ) + ) + target + in + let free () = + if domid = 0 then + free_dom0 uuid + else + Option.bind free (free_other uuid) + in + let total () = + let memory = + Int64.of_nativeint dom.Xenctrl.total_memory_pages + |> Xenctrl.pages_to_kib + |> bytes_of_kib + in + Some + ( Rrd.VM uuid + , Ds.ds_make ~name:"memory" + ~description:"Memory currently allocated to VM" ~units:"B" + ~value:(Rrd.VT_Int64 memory) ~ty:Rrd.Gauge ~min:0.0 ~default:true () + ) + in + (* CA-34383: Memory updates from paused domains serve no useful purpose. + During a migrate such updates can also cause undesirable + discontinuities in the observed value of memory_actual. Hence, we + ignore changes from paused domains: *) + if dom.Xenctrl.paused then + [] + else + get_list target @ get_list free @ get_list total + in + + List.concat_map metrics_of domains + +let generate_sources xc () = + let domain_stats = get_domain_stats xc in + generate_host_sources xc domain_stats @ generate_vm_sources domain_stats + +(** The json-like serialization for 3 dss in dss_mem_vms takes 622 bytes. These + bytes plus some overhead make 1024 bytes an upper bound. *) + +let bytes_per_mem_vm = 1024 -let _ = - initialise () ; +let host_page_count = 1 + +let vm_page_count = + ((Rrd_interface.max_supported_vms * bytes_per_mem_vm) + 4095) / 4096 + +let shared_page_count = host_page_count + vm_page_count + +let () = Watcher.create_watcher_thread () ; - main_loop ~neg_shift:0.5 ~target:(Reporter.Local shared_page_count) - ~protocol:Rrd_interface.V2 ~dss_f:generate_squeezed_dss + Process.initialise () ; + Xenctrl.with_intf (fun xc -> + Process.main_loop ~neg_shift:0.5 + ~target:(Reporter.Local shared_page_count) ~protocol:Rrd_interface.V2 + ~dss_f:(generate_sources xc) + ) diff --git a/ocaml/xcp-rrdd/bin/rrdview/dune b/ocaml/xcp-rrdd/bin/rrdview/dune new file mode 100644 index 00000000000..e2b2401ff76 --- /dev/null +++ b/ocaml/xcp-rrdd/bin/rrdview/dune @@ -0,0 +1,17 @@ +(executable + (modes byte exe) + (name rrdview) + ;(public_name rrdview) + (libraries + threads + xapi-rrd.unix + bos.setup + astring + fpath + rresult + xmlm + tyre + xapi-rrd + result) + ;(package xapi-tools) + ) diff --git a/ocaml/xcp-rrdd/bin/rrdview/rrdgraph.ml b/ocaml/xcp-rrdd/bin/rrdview/rrdgraph.ml new file mode 100644 index 00000000000..80717c21e36 --- /dev/null +++ b/ocaml/xcp-rrdd/bin/rrdview/rrdgraph.ml @@ -0,0 +1,83 @@ +open Rrd + +type vname = VName of string + +module Rpn = struct + module VDef = struct + (* see rrdgraph_rpn(3) *) + type t = vname * string + + type op = vname -> t + + let op kind vname = (vname, kind) + + let maximum = op "MAXIMUM" + + let minimum = op "MINIMUM" + + let average = op "AVERAGE" + + let stdev = op "STDEV" + + let last = op "LAST" + + let first = op "FIRST" + + let total = op "TOTAL" + + let percent = op "PERCENT" + + let percentnan = op "PERCENTNAN" + + let lsl_slope = op "LSLSLOPE" + + let lsl_intercept = op "LSLSLINT" + + let lsl_correlation = op "LSLCORREL" + end + + module CDef = struct + type t = string Seq.t (* stores a serialized RPN expression *) + + let to_string r = r |> List.of_seq |> String.concat "," + + let vname (VName vname) = Seq.return vname + + let value f = Printf.sprintf "%g" f |> Seq.return + + (* reverse polish notation: arguments first, operator last *) + + let opn op args = Seq.append (List.to_seq args |> Seq.concat) (Seq.return op) + + let op1 op arg = opn op [arg] + + let op2 op arg1 arg2 = opn op [arg1; arg2] + + let op3 op arg1 arg2 arg3 = opn op [arg1; arg2; arg3] + end +end + +module Data = struct + type t = string + + (* see rrdgraph_data (3) *) + + let def vname rrdfile rrd rra ds = + let step = Int64.mul rrd.timestep @@ Int64.of_int rra.rra_pdp_cnt in + ( VName vname + , String.concat ":" + [ + "DEF" + ; vname ^ "=" ^ Fpath.to_string rrdfile + ; ds.ds_name + ; Rrd.cf_type_to_string rra.rra_cf + ; Printf.sprintf "step=%Lu" step + ] + ) + + let vdef vname (VName var, rpnvdefop) = + (VName vname, Printf.sprintf "CDEF:%s=%s,%s" vname var rpnvdefop) + + let cdef vname rpn = + (VName vname, Printf.sprintf "CDEF:%s=%s" vname (Rpn.CDef.to_string rpn)) +end diff --git a/ocaml/xcp-rrdd/bin/rrdview/rrdgraph.mli b/ocaml/xcp-rrdd/bin/rrdview/rrdgraph.mli new file mode 100644 index 00000000000..0c4ac9738e9 --- /dev/null +++ b/ocaml/xcp-rrdd/bin/rrdview/rrdgraph.mli @@ -0,0 +1,88 @@ +(** a variable name *) +type vname + +module Rpn : sig + (** RPN expressions for VDEF statements, see [rrdgraph_rpn(3)] *) + module VDef : sig + (** an RPN expression for VDEF, see [rrdgraph_data(3)] *) + type t + + (** a VDEF RPN expression, see [rrdgraph_rpn(3)] *) + type op = vname -> t + + val maximum : op + (** see [rrdgraph_rpn(3)] *) + + val minimum : op + (** see [rrdgraph_rpn(3)] *) + + val average : op + (** see [rrdgraph_rpn(3)] *) + + val stdev : op + (** see [rrdgraph_rpn(3)] *) + + val last : op + (** see [rrdgraph_rpn(3)] *) + + val first : op + (** see [rrdgraph_rpn(3)] *) + + val total : op + (** see [rrdgraph_rpn(3)] *) + + val percent : op + (** see [rrdgraph_rpn(3)] *) + + val percentnan : op + (** see [rrdgraph_rpn(3)] *) + + val lsl_slope : op + (** see [rrdgraph_rpn(3)] *) + + val lsl_intercept : op + (** see [rrdgraph_rpn(3)] *) + + val lsl_correlation : op + (** see [rrdgraph_rpn(3)] *) + end + + module CDef : sig + (** an RPN expression for CDEF, see [rrdgraph_data(3)] *) + type t + + val vname : vname -> t + (** [vname v] is [v] as an RPN expression *) + + val value : float -> t + (** [value v] is [v] as an RPN expression *) + + val op1 : string -> t -> t + (** [op1 op arg1] is [op arg1]. For valid operators see [rrdgraph_rpn(3)] *) + + val op2 : string -> t -> t -> t + (** [op2 op arg1 arg2] is [op arg1 arg2]. For valid operators see [rrdgraph_rpn(3)] *) + + val op3 : string -> t -> t -> t -> t + (** [op3 op arg1 arg2 arg3] is [op arg1 arg2 arg3]. For valid operators see [rrdgraph_rpn(3)] *) + end +end + +module Data : sig + (** an rrd graph data definition, see [rrdgraph_data(3)] *) + type t + + val def : string -> Fpath.t -> Rrd.rrd -> Rrd.rra -> Rrd.ds -> vname * t + (** [def vname rrdfile rrd rra datasource] is a [DEF] (see [rrdgraph_data(3)]) that loads + [datasource.ds_name] from the [rrdfile] and plots it according to the consolidation function in the + specified [rra] and timestep calculated based on [rrd]. This data can be refered to as [vname] + elsewhere. *) + + val vdef : string -> Rpn.VDef.t -> vname * t + (** [vdef vname vdefrpn] defines [vname] through a [VDEF] (see [rrdgraph_data(3)]) using the + specified [vdefrpn] expression. Conversion to RPN form is handled internally. *) + + val cdef : string -> Rpn.CDef.t -> vname * t + (** [cdef vname cdefrpn] defines [vname] through a [CDEF] (see [rrdgraph_data(3)]) using the + specified [cdefrpn] expression. Conversion to RPN form is handled internally. *) +end diff --git a/ocaml/xcp-rrdd/bin/rrdview/rrdview.ml b/ocaml/xcp-rrdd/bin/rrdview/rrdview.ml new file mode 100644 index 00000000000..3716f4cfded --- /dev/null +++ b/ocaml/xcp-rrdd/bin/rrdview/rrdview.ml @@ -0,0 +1,483 @@ +(* + * Copyright (C) 2006-2009 Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +open Bos_setup + +type def = Def of string * Rrd.cf_type | Cdef of string + +let name ~ds_name ~cf_type = + cf_type + |> Rrd.cf_type_to_string + |> String.Ascii.lowercase + |> Printf.sprintf "%s_%s" ds_name + +type ds_def = {units: string option} + +let default_def = {units= None} + +let def ~data ~step ~ds_name ~cf_type = + let cfstr = Rrd.cf_type_to_string cf_type in + let namestr = name ~ds_name ~cf_type in + ( Def (ds_name, cf_type) + , Printf.sprintf "DEF:%s=%s:%s:%s:step=%Ld" namestr (Fpath.to_string data) + ds_name cfstr step + ) + +type ds = Ds : string -> ds + +type cdef = Op of cdef * string * cdef | Var of def + +let rec string_of_cdef = function + | Op (lhs, op, rhs) -> + String.concat ~sep:"," [string_of_cdef lhs; string_of_cdef rhs; op] + | Var (Def (ds_name, cf_type)) -> + name ~ds_name ~cf_type + | Var (Cdef s) -> + s + +let cdef name ops = + (Cdef name, Printf.sprintf "CDEF:%s=%s" name @@ string_of_cdef ops) + +type rgb = {r: int; g: int; b: int; alpha: int option} + +type fill = RGB of rgb + +let shape ?(stack = false) kind ?label ~def fill = + let defstr = + match def with + | Def (ds_name, cf_type) -> + name ~ds_name ~cf_type + | Cdef str -> + str + in + let fillstr = + match fill with + | Some (RGB {r; g; b; alpha}) -> + Printf.sprintf "#%02x%02x%02x%s" r g b + (Option.fold ~none:"" ~some:(Printf.sprintf "%02u") alpha) + | None -> + "" + in + Printf.sprintf "%s:%s%s%s%s" kind defstr fillstr + (if stack then ":STACK" else "") + (match label with None -> "" | Some x -> ":" ^ x) + +let area = shape "AREA" + +let area_stack = shape ~stack:true "AREA" + +let line ?label = shape ?label "LINE" + +(* colors from rrdtool wiki OutlinedAreaGraph *) +let rgb ?alpha hex = + let r = (hex lsr 16) land 0xff + and g = (hex lsr 8) land 0xff + and b = hex land 0xff in + RGB {r; g; b; alpha} + +let rgb light dark = (rgb light, rgb dark) + +let colors = + [| + rgb 0x54EC48 0x24BC14 + ; rgb 0x48C4EC 0x1598C3 + ; rgb 0xDE48EC 0xB415C7 + ; rgb 0x7648EC 0x4D18E4 + ; rgb 0xEA644A 0xCC3118 + ; rgb 0xEC9D48 0xCC7016 + ; rgb 0xECD748 0xC9B215 + |] + +let get_color ~dark i = + let RGB col_light, col_dark = colors.(i mod Array.length colors) in + Some (if dark then col_dark else RGB {col_light with alpha= Some 50}) + +let rrdtool ~filename ~data title ~ds_names ~first ~last ~step ~width + ~has_min_max = + let graph = + List.of_seq + (ds_names + |> List.mapi (fun x s -> (s, x)) + |> List.to_seq + |> Seq.flat_map @@ fun (ds_name, i) -> + Seq.append + ( if has_min_max then + let ds_min, def1 = def ~step ~data ~ds_name ~cf_type:Rrd.CF_Min + and ds_max, def2 = + def ~step ~data ~ds_name ~cf_type:Rrd.CF_Max + in + let ds_range, cdef1 = + cdef (ds_name ^ "range") (Op (Var ds_max, "-", Var ds_min)) + in + List.to_seq + [ + def1 + ; def2 + ; cdef1 + ; area ~def:ds_min None + ; area_stack ~def:ds_range @@ get_color ~dark:false i + ] + else + Seq.empty + ) + (let ds_avg, def3 = + def ~step ~data ~ds_name ~cf_type:Rrd.CF_Average + in + List.to_seq + [def3; line ~label:ds_name ~def:ds_avg @@ get_color ~dark:true i] + ) + ) + in + Cmd.( + v "rrdtool" + % "graph" + % "--imgformat" + % "SVG" + % Fpath.to_string filename + % "--title" + % title + % "--width" + % string_of_int width + % "--height" + % "256" (* ~4 rows *) + % "--start" + % Int64.to_string first + % "--end" + % Int64.to_string last + %% of_list graph + ) + +let prepare_plot_cmds ~filename ~data rrd = + let open Rrd in + let has cf rra = rra.rra_cf = cf in + let has_min = + Array.find_opt (has Rrd.CF_Min) rrd.rrd_rras |> Option.is_some + in + let has_max = + Array.find_opt (has Rrd.CF_Max) rrd.rrd_rras |> Option.is_some + in + rrd.rrd_rras + |> Array.to_seq + |> Seq.map @@ fun rra -> + let timespan = + Int64.mul (Int64.of_int (rra.rra_pdp_cnt * rra.rra_row_cnt)) rrd.timestep + in + let start = rrd.last_updated -. Int64.to_float timespan in + let filename = + Fpath.add_ext (Int64.to_string timespan) filename |> Fpath.add_ext "svg" + in + let title = + Fpath.rem_ext filename + |> Fpath.basename + |> String.cuts ~sep:"." + |> String.concat ~sep:"
" + in + let step = Int64.(mul (of_int rra.rra_pdp_cnt) rrd.timestep) in + let width = 2 * rra.rra_row_cnt in + (* 1 point = 1 CDP from the RRA *) + (* TODO: could look up original names in original_ds *) + rrdtool ~step ~width ~data ~filename title ~ds_names:(ds_names rrd) + ~has_min_max:(has_min && has_max) ~first:(Int64.of_float start) + ~last:(Int64.of_float rrd.last_updated) + +let prepare_plots ?(exec = false) ~filename ~data rrd = + let output = Fpath.set_ext ".sh" filename in + let cmds = prepare_plot_cmds ~filename ~data rrd in + if exec then + cmds + |> Seq.iter @@ fun cmd -> + OS.Cmd.run cmd + |> Logs.on_error_msg ~use:(fun () -> failwith "failed to run rrdtool") + else + cmds + |> Seq.map Cmd.to_string + |> List.of_seq + |> OS.File.write_lines output + |> Logs.on_error_msg ~use:(fun _ -> exit 2) + +let finally f ~(always : unit -> unit) = + match f () with + | result -> + always () ; result + | exception e -> + always () ; raise e + +let with_input_file path f = + if Fpath.has_ext "gz" path then + let cmd = Cmd.(v "zcat" % p path) in + let ic = cmd |> Cmd.to_string |> Unix.open_process_in in + finally + (fun () -> f ic) + ~always:(fun () -> + let (_ : Unix.process_status) = Unix.close_process_in ic in + () + ) + else + let ic = open_in Fpath.(to_string path) in + finally (fun () -> f ic) ~always:(fun () -> close_in ic) + +let with_input_rrd f filename = + with_input_file filename @@ fun ic -> + Logs.info (fun m -> m "Parsing RRD %a" Fpath.pp filename) ; + let input = Xmlm.make_input (`Channel ic) in + let rrd = Rrd.from_xml input in + f ~filename rrd + +(* to avoid mixing data source and filenames we use a different type here *) + +let make_ds ?filename dsname = + let dsname = + if String.length dsname >= 20 then ( + Logs.warn (fun m -> + m "RRD data source name exceeds 20 char limit: %s" dsname + ) ; + String.with_range dsname ~len:19 + ) else + dsname + in + (Option.map Fpath.v filename, Ds dsname) + +let make_sr (dsname, uuid) = make_ds ~filename:("_sr_" ^ uuid) dsname + +let make_vbd (vbd, dsname) = make_ds ~filename:vbd dsname + +let make_runstate dsname = make_ds ~filename:"runstate" dsname + +(* top-level value to compile regexes only once *) +let classify = + (* some RRD data source names are too long, max is 20 chars. + Splitting RRDs into different files allows to shorten the names, + e.g. remove the UUID from SR datasources. + Some names are still too long, but those can be shortened without losing information. *) + let open Tyre in + let uuid8 = pcre "[0-9a-f]{8}" in + let uuid_rest = pcre "(-[0-9a-f]{4}){3}-[0-9a-f]{12}" in + let dsname = pcre "[a-zA-Z_]+" in + let shorten from target = str from --> fun () -> make_ds target in + [ + (dsname <&> char '_' *> uuid8) --> make_sr + ; (str "sr_" *> uuid8 <* uuid_rest <* char '_' <&> dsname) --> make_sr + ; shorten "Tapdisks_in_low_memory_mode" "Tapdisks_in_lowmem" + ; ( (opt dsname <* str "memory_" <&> dsname) --> fun (pre, post) -> + make_ds (Option.value ~default:"" pre ^ "mem_" ^ post) + ) + ; (pcre "vbd_[^_]+" <* char '_' <&> dsname) --> make_vbd + ; (str "runstate_" *> dsname) --> make_runstate + ; ( (str "cpu" *> int <&> opt @@ (str "-C" *> int)) --> fun (cpuidx, cstate) -> + let filename = + match cstate with None -> "cpu" | Some n -> Printf.sprintf "cpu-C%d" n + in + make_ds ~filename ("cpu" ^ string_of_int cpuidx) + ) + ; (str "cpu_avg" --> fun () -> make_ds ~filename:"cpu_avg" "cpu_avg") + ; (pcre "pif_" *> dsname) --> make_ds ~filename:"pif" + (* TODO: could provide info on polarity based on rx/tx and on kind, TICK for errors *) + ] + |> route + +let classify_dsname dsname = + let error _ = make_ds dsname in + dsname |> Tyre.exec classify |> Result.fold ~ok:Fun.id ~error + +let classify ~ds_def ~filename ds = + let open Rrd in + let override, dsname = classify_dsname ds.ds_name in + let pathname = + let name = Fpath.rem_ext filename in + match override with + | None -> + Fpath.(name + "_filtered") + | Some newname -> + Fpath.(name + to_string newname) + in + (* Logs.debug (fun m -> m "%s -> %a" ds.ds_name Fpath.pp pathname); *) + let def = + StringMap.find_opt ds.ds_name ds_def |> Option.value ~default:default_def + in + (* can only plot graphs with same units *) + let extra = + match def.units with + | None -> + (* use RRD type as approximation to "same unit", at least same kind of unit, + e.g. rate vs duration *) + Rrd.ds_type_to_string ds.ds_ty + | Some u -> + String.take ~sat:Char.Ascii.is_alphanum u + in + (Fpath.(pathname + extra |> add_ext "xml"), dsname) + +let rrdtool = + OS.Cmd.resolve (Cmd.v "rrdtool") + |> Logs.on_error_msg ~use:(fun () -> failwith "rrdtool is not installed") + +let rrd_restore filename rrd = + let filename = Fpath.set_ext "xml" filename in + Logs.debug (fun m -> m "Writing RRD xml to %a" Fpath.pp filename) ; + let () = + Out_channel.with_open_text (Fpath.to_string filename) @@ fun ch -> + Rrd_unix.to_fd rrd (Unix.descr_of_out_channel ch) + in + let dot_rrd = Fpath.set_ext "rrd" filename in + Logs.debug (fun m -> m "Restoring RRD to %a" Fpath.pp dot_rrd) ; + Cmd.(rrdtool % "restore" % "-f" % p filename % p dot_rrd) + |> OS.Cmd.run + |> Result.map (fun () -> dot_rrd) + +let split_rrd ~ds_def ~filename rrd = + let open Rrd in + let rrds = Hashtbl.create 3 in + let original_ds = Hashtbl.create 127 in + + (* split the rrd into multiple rrds based on data source name *) + let () = + Logs.info (fun m -> m "classifying data sources") ; + rrd.rrd_dss + |> Array.iteri @@ fun i ds -> + let filename, Ds ds_name = classify ~ds_def ~filename ds in + let get_i rra = (rra.rra_data.(i), rra.rra_cdps.(i)) in + let previous = + Hashtbl.find_opt rrds filename |> Option.value ~default:[] + in + Hashtbl.replace original_ds ds_name ds ; + Hashtbl.replace rrds filename + @@ (({ds with ds_name}, Array.map get_i rrd.rrd_rras) :: previous) + in + Logs.info (fun m -> m "Building and restoring RRDs") ; + (* now build an RRD and restore it to binary .rrd form *) + rrds + |> Hashtbl.iter @@ fun filename lst -> + Logs.debug (fun m -> m "Building %a" Fpath.pp filename) ; + let rrd_dss, rrd_rras = List.split lst in + let rrd_rras = + rrd.rrd_rras + |> Array.mapi @@ fun i rra -> + let rra_seq = List.to_seq rrd_rras in + let geti a = a.(i) in + { + rra with + rra_data= rra_seq |> Seq.map geti |> Seq.map fst |> Array.of_seq + ; rra_cdps= rra_seq |> Seq.map geti |> Seq.map snd |> Array.of_seq + } + in + let rrd = {rrd with rrd_dss= Array.of_list rrd_dss; rrd_rras} in + let data = + rrd_restore filename rrd + |> Logs.on_error_msg ~use:(fun () -> failwith "Failed to restore RRD") + in + prepare_plots ~filename ~data rrd + +type mode = Split | Default | Plot + +let parse_ds_def def k v = + match k with "units" when v <> "unknown" -> {units= Some v} | _ -> def + +let parse_ds_defs path = + Logs.info (fun m -> m "Loading data source definitions from %a" Fpath.pp path) ; + let fields line = + line + |> String.cut ~sep:":" + |> Option.map @@ fun (k, v) -> (String.trim k, String.trim v) + in + let fold (map, key_opt) line = + match (fields line, key_opt) with + | Some ("name_label", ds_name), None -> + (map, Some ds_name) (* start parsing new item *) + | _, None -> + (map, None) (* ignore *) + | None, Some _ -> + (map, None) + | Some (k, v), Some ds_name -> + let map = + map + |> Rrd.StringMap.update ds_name @@ fun def -> + Some (parse_ds_def (Option.value ~default:default_def def) k v) + in + (map, Some ds_name) + in + OS.File.fold_lines fold (Rrd.StringMap.empty, None) path + |> Logs.on_error_msg ~use:(fun _ -> + failwith "Could not parse datasource definitions" + ) + |> fst + +let plot_rrd ~filename rrd = + let data = + rrd_restore filename rrd + |> Logs.on_error_msg ~use:(fun () -> failwith "Failed to restore RRD") + in + prepare_plots ~exec:true ~filename ~data rrd + +let () = + let open OS.Arg in + let level = + let conv = + conv ~docv:"LEVEL" Logs.level_of_string Fmt.(option Logs.pp_level) + in + opt ~doc:"Set log level" ["log"] conv ~absent:(Some Logs.Debug) + in + let mode = + opt + ~doc: + "Used in self-invocation to split rrd into multiple rrds, or to plot \ + an already split rrd" + ["mode"] ~absent:Default + @@ enum [("split", Split); ("plot", Plot); ("default", Default)] + in + + let data_source_list = + opt ~doc:"Load data source definitions" ~docv:"PATH" ["def"] ~absent:None + (some path) + in + let paths = + OS.Arg.( + parse ~doc:"Split and plot xcp-rrdd XML rrd.gz with rrdtool" ~pos:path () + ) + in + + Logs.set_level level ; + let ds_def = + Option.map parse_ds_defs data_source_list + |> Option.value ~default:Rrd.StringMap.empty + in + match mode with + | Default -> + let cmd = + Cmd.( + v "find" %% of_values p paths % "-name" % "*.gz" % "-print0" + |> OS.Cmd.run_out + ) + in + (* TODO: forward level *) + let xargs = + Cmd.( + v "xargs" + % "-0" + % "-P0" + % "-n1" + % Sys.executable_name + %% of_values ~slip:"--def" p (Option.to_list data_source_list) + % "--mode=split" + |> OS.Cmd.run_in + ) + in + let res = + OS.Cmd.out_run_in cmd + |> Logs.on_error_msg ~use:(fun _ -> exit 1) + |> xargs + in + Logs.on_error_msg ~use:(fun _ -> exit 1) res + | Split -> + paths |> List.iter @@ with_input_rrd (split_rrd ~ds_def) + | Plot -> + paths |> List.iter @@ with_input_rrd plot_rrd diff --git a/ocaml/xcp-rrdd/bin/rrdview/rrdview.mli b/ocaml/xcp-rrdd/bin/rrdview/rrdview.mli new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/xcp-rrdd/lib/transport/file/rrd_file_writer.ml b/ocaml/xcp-rrdd/lib/transport/file/rrd_file_writer.ml index 9f8fa196df4..92cff3e4966 100644 --- a/ocaml/xcp-rrdd/lib/transport/file/rrd_file_writer.ml +++ b/ocaml/xcp-rrdd/lib/transport/file/rrd_file_writer.ml @@ -14,6 +14,8 @@ type local_id = {path: string; shared_page_count: int} +let finally f finally = Fun.protect ~finally f + module File = struct let page_size = 4096 @@ -29,14 +31,17 @@ module File = struct let init {path; shared_page_count} = let size = shared_page_count * page_size in let fd = Unix.openfile path [Unix.O_RDWR; Unix.O_CREAT] 0o600 in - let mapping = - Bigarray.( - array1_of_genarray @@ Unix.map_file fd char c_layout true [|size|] + finally + (fun () -> + let mapping = + Bigarray.( + array1_of_genarray @@ Unix.map_file fd char c_layout true [|size|] + ) + in + let cstruct = Cstruct.of_bigarray mapping in + (path, cstruct) ) - in - Unix.close fd ; - let cstruct = Cstruct.of_bigarray mapping in - (path, cstruct) + (fun () -> Unix.close fd) let cleanup _ path _ = Unix.unlink path diff --git a/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml b/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml index bb0f726b5eb..5ff9fac1bf2 100644 --- a/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml @@ -60,11 +60,11 @@ let host_rrds rrd_info = Hashtbl.add h "host" rrd_info ; Some h -let update_rrds_test ~timestamp ~dss ~uuid_domids ~paused_vms ~expected_vm_rrds +let update_rrds_test ~timestamp ~dss ~uuid_domids ~expected_vm_rrds ~expected_sr_rrds ~expected_host_dss = let test () = reset_rrdd_shared_state () ; - Rrdd_monitor.update_rrds uuid_domids paused_vms + Rrdd_monitor.update_rrds uuid_domids (List.to_seq [("update_rrds_test", timestamp, List.to_seq dss)]) ; check_datasources "VM" (Some Rrdd_shared.vm_rrds) expected_vm_rrds ; check_datasources "SR" (Some Rrdd_shared.sr_rrds) expected_sr_rrds ; @@ -74,63 +74,61 @@ let update_rrds_test ~timestamp ~dss ~uuid_domids ~paused_vms ~expected_vm_rrds let update_rrds = let open Rrd in + let map_of_list ls = StringMap.of_seq (List.to_seq ls) in [ ( "Null update" - , update_rrds_test ~timestamp:0. ~dss:[] ~uuid_domids:[] ~paused_vms:[] + , update_rrds_test ~timestamp:0. ~dss:[] ~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Single host update" , update_rrds_test ~timestamp:0. ~dss:[(Host, ds_a)] - ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] + ~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[("host", [ds_a])] ) ; ( "Multiple host updates" , update_rrds_test ~timestamp:0. ~dss:[(Host, ds_a); (Host, ds_b)] - ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] + ~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[("host", [ds_a; ds_b])] ) ; ( "Single non-resident VM update" , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a)] - ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] + ~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple non-resident VM updates" , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a); (VM "b", ds_a)] - ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[] + ~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Single resident VM update" , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a)] - ~uuid_domids:[("a", 1)] - ~paused_vms:[] + ~uuid_domids:(map_of_list [("a", 1)]) ~expected_vm_rrds:[("a", [ds_a])] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple resident VM updates" , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a); (VM "b", ds_a); (VM "b", ds_b)] - ~uuid_domids:[("a", 1); ("b", 1)] - ~paused_vms:[] + ~uuid_domids:(map_of_list [("a", 1); ("b", 1)]) ~expected_vm_rrds:[("a", [ds_a]); ("b", [ds_a; ds_b])] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple resident and non-resident VM updates" , update_rrds_test ~timestamp:0. ~dss:[(VM "a", ds_a); (VM "b", ds_a); (VM "c", ds_a)] - ~uuid_domids:[("a", 1); ("b", 1)] - ~paused_vms:[] + ~uuid_domids:(map_of_list [("a", 1); ("b", 1)]) ~expected_vm_rrds:[("a", [ds_a]); ("b", [ds_a])] ~expected_sr_rrds:[] ~expected_host_dss:[] ) ; ( "Multiple SR updates" , update_rrds_test ~timestamp:0. ~dss:[(SR "a", ds_a); (SR "b", ds_a); (SR "b", ds_b)] - ~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] + ~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[("a", [ds_a]); ("b", [ds_a; ds_b])] ~expected_host_dss:[] ) diff --git a/ocaml/xe-cli/bash-completion b/ocaml/xe-cli/bash-completion index 8120df874f3..d11195c667c 100644 --- a/ocaml/xe-cli/bash-completion +++ b/ocaml/xe-cli/bash-completion @@ -342,7 +342,7 @@ _xe() __xe_debug "triggering autocompletion for type, class is '$fst'" if [[ "$fst" == "vbd" ]]; then - set_completions 'Disk,CD' "$value" + set_completions 'Disk,CD,Floppy' "$value" elif [[ "$fst" == "vdi" ]]; then set_completions 'system,user,suspend,crashdump' "$value" elif [[ "$fst" == "sr" ]]; then @@ -542,12 +542,16 @@ _xe() hvm | nomigrate | nested-virt | PV-drivers-up-to-date | \ PV-drivers-detected | live | cooperative | enforce-homogeneity | \ host-metrics-live | sharable | read-only | storage-lock | missing | \ - metadata-latest | empty | clustered | pool-auto-join | joined) + metadata-latest | empty | clustered | pool-auto-join | joined | \ + dry-run | metadata | paused | approximate | copy | progress | public | \ + include-snapshots | preserve-power-state | soft | update | is-unique) # Until autocompletion can be generated from the # datamodel, this is just naive hardcoding. These cases were # obtained by looking for boolean fields: # 'xapi-cli-server/records.ml | grep bool_of_string' and # 'grep string_of_bool' + # and + # 'xapi-cli-server/cli_frontend.ml | grep get_bool_param' __xe_debug "triggering autocompletion for boolean params" IFS=$'\n,' set_completions 'true,false' "$value" @@ -566,11 +570,18 @@ _xe() else all="--all" fi - if [[ "$fst" == "into-vdi" || "$fst" == "base-vdi" || "$fst" == "vdi-from" || "$fst" == "vdi-to" ]]; then + + case "$fst" in + into-vdi | base-vdi | vdi-from | vdi-to | suspend-VDI) class=vdi - else + ;; + suspend-SR) + class=sr + ;; + *) class="$fst" - fi + ;; + esac # Show corresponding name labels for each UUID SHOW_DESCRIPTION=1 @@ -588,7 +599,21 @@ _xe() __xe_debug "fst is '$fst', snd is '$snd'" if [[ "$snd" == "list" || "$fst" == "vm" ]]; then IFS=$'\n,' - set_completions_for_names "${fst}-list" "$param" "$value" + + # Try to provide a helpful "description" to the suggestions + case "$param" in + resident-on | affinity) + SHOW_DESCRIPTION=1 + class="host" + ;; + *) + ;; + esac + + local name_label_cmd="$xe ${class}-list params=name-label 2>/dev/null --minimal uuid=" + __xe_debug "description class is '$class'" + + set_completions_for_names "${fst}-list" "$param" "$value" "$name_label_cmd" return 0 fi fi @@ -638,7 +663,7 @@ _xe() local previous_params="${OLDSTYLE_WORDS[@]:2:$params_len}" previous_params=$( echo "$previous_params" | cut -d= -f1 | \ sed -r '/^\s*$/d' | cut -d: -f1 | \ - sed -re 's/^/-e "\\s*/g' -e 's/$/[=:]"/g' | paste -sd " ") + sed -re 's/^/-e "^\\s*/g' -e 's/$/[=:]"/g' | paste -sd " ") set_completions "$SUBCOMMAND_PARAMS" "$param" "" "$previous_params" @@ -755,6 +780,10 @@ __add_completion() local description_cmd="$2" local max_cmd_length="$3" + if [ "$word" = "" ]; then + return 0 + fi + COMPLETION_SUGGESTIONS=$((COMPLETION_SUGGESTIONS+1)) __xe_debug "\t$word" @@ -768,8 +797,8 @@ __add_completion() COMPREPLY+=( $(printf '%s%q' "$description" "$word") ) else if [[ $SHOW_DESCRIPTION == 1 ]]; then - __xe_debug "\t showing command description - '$description'" description=" - $(eval $description_cmd$word)" + __xe_debug "\t showing command description - '$description'" fi # Right-pad the command with spaces before the help string COMPREPLY+=( $(printf "%-${max_cmd_length}q %s" "$word" "$description") ) @@ -780,7 +809,8 @@ __preprocess_suggestions() { wordlist=$( echo "$1" | \ sed -re 's/(^|[^\])((\\\\)*),,*/\1\2\n/g' -e 's/\\,/,/g' -e 's/\\\\/\\/g' | \ - sed -e 's/ *$//') + sed -e 's/ *$//' | \ + sort -u ) local IFS=$'\n' for word in $wordlist; do if [[ "$word" =~ ^$prefix.* ]]; then @@ -879,4 +909,4 @@ __autocomplete_reqd_params_names() return 0 } -bind -x '"\C-rq":"__autocomplete_reqd_params_names"' +bind -x '"\eq":"__autocomplete_reqd_params_names"' diff --git a/ocaml/xe-cli/newcli.ml b/ocaml/xe-cli/newcli.ml index c33e32a2e0a..60ecce2a47d 100644 --- a/ocaml/xe-cli/newcli.ml +++ b/ocaml/xe-cli/newcli.ml @@ -66,15 +66,26 @@ let debug fmt = exception Usage let usage () = - error - "Usage: %s [-s server] [-p port] ([-u username] [-pw password] or \ - [-pwf ]) [--traceparent traceparent] \n" - Sys.argv.(0) ; - error - "\n\ - A full list of commands can be obtained by running \n\ - \t%s help -s -p \n" - Sys.argv.(0) + let help = + Printf.sprintf + {|Usage: + %s + [ -s ] XenServer host + [ -p ] XenServer port number + [ -u -pw | -pwf ] + User authentication (password or file) + [ --nossl ] Disable SSL/TLS + [ --debug ] Enable debug output + [ --debug-on-fail ] Enable debug output only on failure + [ --traceparent ] Distributed tracing context + [ ... ] Command-specific options + +A full list of commands can be obtained by running + %s help -s -p +|} + Sys.argv.(0) Sys.argv.(0) + in + error "%s" help let is_localhost ip = ip = "127.0.0.1" diff --git a/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c b/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c index cbf2af76145..4af5e60c8ec 100644 --- a/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c +++ b/ocaml/xenopsd/c_stubs/xenctrlext_stubs.c @@ -323,40 +323,72 @@ CAMLprim value stub_xenctrlext_domain_update_channels(value xch_val, value domid } /* based on xenctrl_stubs.c */ -static int get_cpumap_len(value xch_val, value cpumap) +static int get_cpumap_len(xc_interface *xch, value cpumap_val) { - xc_interface* xch = xch_of_val(xch_val); - int ml_len = Wosize_val(cpumap); + int ml_len = Wosize_val(cpumap_val); int xc_len = xc_get_max_cpus(xch); return (ml_len < xc_len ? ml_len : xc_len); } -CAMLprim value stub_xenctrlext_vcpu_setaffinity_soft(value xch_val, value domid, - value vcpu, value cpumap) +static void populate_cpumap(xc_interface *xch, xc_cpumap_t cpumap, + value cpumap_val) { - CAMLparam4(xch_val, domid, vcpu, cpumap); - int i, len = get_cpumap_len(xch_val, cpumap); - xc_cpumap_t c_cpumap; - int retval; + int i, len = get_cpumap_len(xch, cpumap_val); + for (i=0; i debug "No allocations possible" ; None - | Some allocated -> + | Some (allocated, nodes) -> debug "Allocated resources: %s" (Fmt.to_to_string NUMAResource.pp_dump allocated) ; assert (NUMARequest.fits vm allocated) ; - Some allocated.NUMAResource.affinity + Some (allocated.NUMAResource.affinity, nodes) diff --git a/ocaml/xenopsd/lib/softaffinity.mli b/ocaml/xenopsd/lib/softaffinity.mli index 7bef2079f89..5b1f550af5b 100644 --- a/ocaml/xenopsd/lib/softaffinity.mli +++ b/ocaml/xenopsd/lib/softaffinity.mli @@ -14,7 +14,11 @@ open Topology -val plan : NUMA.t -> NUMAResource.t array -> vm:NUMARequest.t -> CPUSet.t option +val plan : + NUMA.t + -> NUMAResource.t array + -> vm:NUMARequest.t + -> (Topology.CPUSet.t * Topology.NUMA.node list) option (** [plan host nodes ~vm] returns the CPU soft affinity recommended for [vm], Such that the memory latency between the NUMA nodes of the vCPUs is small, and usage of NUMA nodes is balanced. diff --git a/ocaml/xenopsd/lib/storage.ml b/ocaml/xenopsd/lib/storage.ml index 72dd3b03322..a051b34f7ca 100644 --- a/ocaml/xenopsd/lib/storage.ml +++ b/ocaml/xenopsd/lib/storage.ml @@ -62,20 +62,20 @@ let vm_of_domid vmdomid = "Invalid domid, could not be converted to int, passing empty string." ; Storage_interface.Vm.of_string "" -let attach_and_activate ~task ~_vm ~vmdomid ~dp ~sr ~vdi ~read_write = +let attach ~task ~_vm ~vmdomid ~dp ~sr ~vdi ~read_write = + let dbg = get_dbg task in + Xenops_task.with_subtask task + (Printf.sprintf "VDI.attach3 %s" dp) + (transform_exception (fun () -> + Client.VDI.attach3 dbg dp sr vdi vmdomid read_write + ) + ) + +let activate ~task ~_vm ~vmdomid ~dp ~sr ~vdi = let dbg = get_dbg task in - let result = - Xenops_task.with_subtask task - (Printf.sprintf "VDI.attach3 %s" dp) - (transform_exception (fun () -> - Client.VDI.attach3 dbg dp sr vdi vmdomid read_write - ) - ) - in Xenops_task.with_subtask task (Printf.sprintf "VDI.activate3 %s" dp) - (transform_exception (fun () -> Client.VDI.activate3 dbg dp sr vdi vmdomid)) ; - result + (transform_exception (fun () -> Client.VDI.activate3 dbg dp sr vdi vmdomid)) let deactivate task dp sr vdi vmdomid = debug "Deactivating disk %s %s" (Sr.string_of sr) (Vdi.string_of vdi) ; diff --git a/ocaml/xenopsd/lib/topology.ml b/ocaml/xenopsd/lib/topology.ml index f706f542d5e..a2cd401a0cc 100644 --- a/ocaml/xenopsd/lib/topology.ml +++ b/ocaml/xenopsd/lib/topology.ml @@ -298,7 +298,7 @@ module NUMA = struct None else ( List.iter (fun (Node n) -> t.node_usage.(n) <- t.node_usage.(n) + 1) nodes ; - Some result + Some (result, nodes) ) let pp_dump_node = Fmt.(using (fun (Node x) -> x) int) diff --git a/ocaml/xenopsd/lib/topology.mli b/ocaml/xenopsd/lib/topology.mli index 478a7ac2b64..f1bd6f9f569 100644 --- a/ocaml/xenopsd/lib/topology.mli +++ b/ocaml/xenopsd/lib/topology.mli @@ -150,7 +150,10 @@ module NUMA : sig NUMA nodes > 16 it limits the length of the sequence to [n+65520], to avoid exponential blowup. *) - val choose : t -> (node list * NUMAResource.t) Seq.t -> NUMAResource.t option + val choose : + t + -> (node list * NUMAResource.t) Seq.t + -> (NUMAResource.t * node list) option (** [choose t resources] will choose one NUMA node deterministically, trying to keep the overall NUMA node usage balanced *) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index 350227aa028..6a06b36ba14 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -37,6 +37,8 @@ let finally = Xapi_stdext_pervasives.Pervasiveext.finally let domain_shutdown_ack_timeout = ref 60. +let xenopsd_vbd_plug_unplug_legacy = ref true + type context = { transferred_fd: Unix.file_descr option (** some API calls take a file descriptor argument *) @@ -122,10 +124,14 @@ type atomic = | VM_hook_script_stable of (Vm.id * Xenops_hooks.script * string * Vm.id) | VM_hook_script of (Vm.id * Xenops_hooks.script * string) | VBD_plug of Vbd.id + | VBD_attach of Vbd.id + | VBD_activate of Vbd.id | VBD_epoch_begin of (Vbd.id * disk * bool) | VBD_epoch_end of (Vbd.id * disk) | VBD_set_qos of Vbd.id | VBD_unplug of Vbd.id * bool + | VBD_deactivate of Vbd.id * bool + | VBD_detach of Vbd.id | VBD_insert of Vbd.id * disk | VBD_set_active of Vbd.id * bool | VM_remove of Vm.id @@ -162,6 +168,8 @@ type atomic = | VM_rename of (Vm.id * Vm.id * rename_when) | VM_import_metadata of (Vm.id * Metadata.t) | Parallel of Vm.id * string * atomic list + | Nested_parallel of Vm.id * string * atomic list + (** used to make nested parallel atoms explicit, as each atom requires its own worker *) | Serial of Vm.id * string * atomic list | Best_effort of atomic [@@deriving rpcty] @@ -195,6 +203,10 @@ let rec name_of_atomic = function "VM_hook_script" | VBD_plug _ -> "VBD_plug" + | VBD_attach _ -> + "VBD_attach" + | VBD_activate _ -> + "VBD_activate" | VBD_epoch_begin _ -> "VBD_epoch_begin" | VBD_epoch_end _ -> @@ -203,6 +215,10 @@ let rec name_of_atomic = function "VBD_set_qos" | VBD_unplug _ -> "VBD_unplug" + | VBD_deactivate _ -> + "VBD_deactivate" + | VBD_detach _ -> + "VBD_detach" | VBD_insert _ -> "VBD_insert" | VBD_set_active _ -> @@ -272,6 +288,9 @@ let rec name_of_atomic = function | Parallel (_, _, atomics) -> Printf.sprintf "Parallel (%s)" (String.concat " | " (List.map name_of_atomic atomics)) + | Nested_parallel (_, _, atomics) -> + Printf.sprintf "Nested_parallel (%s)" + (String.concat " | " (List.map name_of_atomic atomics)) | Serial (_, _, atomics) -> Printf.sprintf "Serial (%s)" (String.concat " & " (List.map name_of_atomic atomics)) @@ -281,7 +300,7 @@ let rec name_of_atomic = function let rec atomic_expires_after = function | Serial (_, _, ops) -> List.map atomic_expires_after ops |> List.fold_left ( +. ) 0. - | Parallel (_, _, ops) -> + | Parallel (_, _, ops) | Nested_parallel (_, _, ops) -> List.map atomic_expires_after ops |> List.fold_left Float.max 0. | _ -> (* 20 minutes, in seconds *) @@ -297,6 +316,7 @@ type vm_migrate_op = { ; vmm_tmp_dest_id: Vm.id ; vmm_compress: bool ; vmm_verify_dest: bool + ; vmm_localhost_migration: bool } [@@deriving rpcty] @@ -828,10 +848,11 @@ module Queues = struct let get tag qs = with_lock qs.m (fun () -> - if StringMap.mem tag qs.qs then - StringMap.find tag qs.qs - else - Queue.create () + match StringMap.find_opt tag qs.qs with + | Some x -> + x + | None -> + Queue.create () ) let tags qs = @@ -842,10 +863,11 @@ module Queues = struct let push_with_coalesce should_keep tag item qs = with_lock qs.m (fun () -> let q = - if StringMap.mem tag qs.qs then - StringMap.find tag qs.qs - else - Queue.create () + match StringMap.find_opt tag qs.qs with + | Some x -> + x + | None -> + Queue.create () in push_with_coalesce should_keep item q ; qs.qs <- StringMap.add tag q qs.qs ; @@ -901,6 +923,33 @@ module Redirector = struct Parallel atoms, creating a deadlock. *) let parallel_queues = {queues= Queues.create (); mutex= Mutex.create ()} + (* We create another queue only for Nested_parallel atoms for the same reason + as parallel_queues. When a Nested_parallel atom is inside a Parallel atom, + they are both using a worker whilst not doing any work, so they each need + additional space to prevent a deadlock. *) + let nested_parallel_queues = + {queues= Queues.create (); mutex= Mutex.create ()} + + (* We create another queue only for VM_receive_memory operations for the same reason again. + Migration spawns 2 operations, send and receive, so if there is limited available worker space + a deadlock can happen when VMs are migrating between hosts or on localhost migration + as the receiver has no free workers to receive memory. *) + let receive_memory_queues = {queues= Queues.create (); mutex= Mutex.create ()} + + (* we do not want to use = when comparing queues: queues can contain + (uncomparable) functions, and we are only interested in comparing the + equality of their static references *) + let is_same_redirector q1 q2 = q1 == q2 + + let to_string r = + match r with + | w when is_same_redirector w parallel_queues -> + "Parallel" + | w when is_same_redirector w nested_parallel_queues -> + "Nested_parallel" + | _ -> + "Default" + (* When a thread is actively processing a queue, items are redirected to a thread-private queue *) let overrides = ref StringMap.empty @@ -1020,6 +1069,8 @@ module Redirector = struct List.concat_map one (default.queues :: parallel_queues.queues + :: nested_parallel_queues.queues + :: receive_memory_queues.queues :: List.map snd (StringMap.bindings !overrides) ) ) @@ -1204,11 +1255,11 @@ module WorkerPool = struct operate *) let count_active queues = with_lock m (fun () -> - (* we do not want to use = when comparing queues: queues can contain - (uncomparable) functions, and we are only interested in comparing the - equality of their static references *) List.map - (fun w -> w.Worker.redirector == queues && Worker.is_active w) + (fun w -> + Redirector.is_same_redirector w.Worker.redirector queues + && Worker.is_active w + ) !pool |> List.filter (fun x -> x) |> List.length @@ -1216,17 +1267,18 @@ module WorkerPool = struct let find_one queues f = List.fold_left - (fun acc x -> acc || (x.Worker.redirector == queues && f x)) + (fun acc x -> + acc || (Redirector.is_same_redirector x.Worker.redirector queues && f x) + ) false (* Clean up any shutdown threads and remove them from the master list *) let gc queues pool = List.fold_left (fun acc w -> - (* we do not want to use = when comparing queues: queues can contain - (uncomparable) functions, and we are only interested in comparing the - equality of their static references *) - if w.Worker.redirector == queues && Worker.get_state w = Worker.Shutdown + if + Redirector.is_same_redirector w.Worker.redirector queues + && Worker.get_state w = Worker.Shutdown then ( Worker.join w ; acc ) else @@ -1253,7 +1305,9 @@ module WorkerPool = struct let start size = for _i = 1 to size do incr Redirector.default ; - incr Redirector.parallel_queues + incr Redirector.parallel_queues ; + incr Redirector.nested_parallel_queues ; + incr Redirector.receive_memory_queues done let set_size size = @@ -1268,7 +1322,9 @@ module WorkerPool = struct done in inner Redirector.default ; - inner Redirector.parallel_queues + inner Redirector.parallel_queues ; + inner Redirector.nested_parallel_queues ; + inner Redirector.receive_memory_queues end (* Keep track of which VMs we're rebooting so we avoid transient glitches where @@ -1569,6 +1625,11 @@ let collect_into apply = function [] -> [] | [op] -> [op] | lst -> apply lst let parallel name ~id = collect_into (fun ls -> [Parallel (id, Printf.sprintf "%s VM=%s" name id, ls)]) +let nested_parallel name ~id = + collect_into (fun ls -> + [Nested_parallel (id, Printf.sprintf "%s VM=%s" name id, ls)] + ) + let serial name ~id = collect_into (fun ls -> [Serial (id, Printf.sprintf "%s VM=%s" name id, ls)]) @@ -1578,8 +1639,31 @@ let serial_concat name ~id lst = serial name ~id (List.concat lst) let parallel_map name ~id lst f = parallel name ~id (List.concat_map f lst) +let nested_parallel_map name ~id lst f = + nested_parallel name ~id (List.concat_map f lst) + let map_or_empty f x = Option.value ~default:[] (Option.map f x) +(* Creates a Serial of 2 or more Atomics. If the number of Atomics could be + less than this, use serial or serial_concat *) +let serial_of name ~id at1 at2 ats = + Serial (id, Printf.sprintf "%s VM=%s" name id, at1 :: at2 :: ats) + +let vbd_plug vbd_id = + if !xenopsd_vbd_plug_unplug_legacy then + VBD_plug vbd_id + else + serial_of "VBD.attach_and_activate" ~id:(VBD_DB.vm_of vbd_id) + (VBD_attach vbd_id) (VBD_activate vbd_id) [] + +let vbd_unplug vbd_id force = + if !xenopsd_vbd_plug_unplug_legacy then + VBD_unplug (vbd_id, force) + else + serial_of "VBD.deactivate_and_detach" ~id:(VBD_DB.vm_of vbd_id) + (VBD_deactivate (vbd_id, force)) + (VBD_detach vbd_id) [] + let rec atomics_of_operation = function | VM_start (id, force) -> let vbds_rw, vbds_ro = VBD_DB.vbds id |> vbd_plug_sets in @@ -1595,7 +1679,7 @@ let rec atomics_of_operation = function let pf = Printf.sprintf in let name_multi = pf "VBDs.activate_epoch_and_plug %s" typ in let name_one = pf "VBD.activate_epoch_and_plug %s" typ in - parallel_map name_multi ~id vbds (fun vbd -> + nested_parallel_map name_multi ~id vbds (fun vbd -> serial_concat name_one ~id [ [VBD_set_active (vbd.Vbd.id, true)] @@ -1604,7 +1688,7 @@ let rec atomics_of_operation = function [VBD_epoch_begin (vbd.Vbd.id, x, vbd.Vbd.persistent)] ) vbd.Vbd.backend - ; [VBD_plug vbd.Vbd.id] + ; [vbd_plug vbd.Vbd.id] ] ) in @@ -1629,11 +1713,11 @@ let rec atomics_of_operation = function vifs ; serial_concat "VGPUs.activate & PCI.plug (SRIOV)" ~id [ - parallel_map "VGPUs.activate" ~id vgpus (fun vgpu -> + nested_parallel_map "VGPUs.activate" ~id vgpus (fun vgpu -> [VGPU_set_active (vgpu.Vgpu.id, true)] ) - ; parallel_map "PCIs.plug (SRIOV)" ~id pcis_sriov (fun pci -> - [PCI_plug (pci.Pci.id, false)] + ; nested_parallel_map "PCIs.plug (SRIOV)" ~id pcis_sriov + (fun pci -> [PCI_plug (pci.Pci.id, false)] ) ] ] @@ -1668,7 +1752,7 @@ let rec atomics_of_operation = function ] ; parallel_concat "Devices.unplug" ~id [ - List.map (fun vbd -> VBD_unplug (vbd.Vbd.id, true)) vbds + List.map (fun vbd -> vbd_unplug vbd.Vbd.id true) vbds ; List.map (fun vif -> VIF_unplug (vif.Vif.id, true)) vifs ; List.map (fun pci -> PCI_unplug pci.Pci.id) pcis ] @@ -1681,7 +1765,8 @@ let rec atomics_of_operation = function serial "VIF.activate_and_plug" ~id [VIF_set_active (vif.Vif.id, true); VIF_plug vif.Vif.id] ) - | VM_restore_devices (id, restore_vifs) -> + | VM_restore_devices (id, migration) -> + let restore_vifs = not migration in let vbds_rw, vbds_ro = VBD_DB.vbds id |> vbd_plug_sets in let vgpus = VGPU_DB.vgpus id in let pcis = PCI_DB.pcis id |> pci_plug_order in @@ -1691,8 +1776,22 @@ let rec atomics_of_operation = function let name_multi = pf "VBDs.activate_and_plug %s" typ in let name_one = pf "VBD.activate_and_plug %s" typ in parallel_map name_multi ~id vbds (fun vbd -> - serial name_one ~id - [VBD_set_active (vbd.Vbd.id, true); VBD_plug vbd.Vbd.id] + (* When migrating, attach early if the vbd's SM allows it. + Note: there is a bug here for SxM if migrating between API + versions as the Vbd's new SR won't have propagated to xenopsd + yet. This means can_attach_early will be based on the origin SR. + This is a non-issue as v1 <-> v3 migration is still experimental + and v1 is already early-attaching in SxM through mirroring. + *) + if + migration + && (not !xenopsd_vbd_plug_unplug_legacy) + && vbd.Vbd.can_attach_early + then + [VBD_activate vbd.Vbd.id] + else + serial name_one ~id + [VBD_set_active (vbd.Vbd.id, true); vbd_plug vbd.Vbd.id] ) in [ @@ -1815,7 +1914,7 @@ let rec atomics_of_operation = function ] ; vgpu_start_operations ; [VM_restore (id, data, vgpu_data)] - ; atomics_of_operation (VM_restore_devices (id, true)) + ; atomics_of_operation (VM_restore_devices (id, false)) ; [ (* At this point the domain is considered survivable. *) VM_set_domain_action_request (id, None) @@ -1825,9 +1924,9 @@ let rec atomics_of_operation = function ] |> List.concat | VBD_hotplug id -> - [VBD_set_active (id, true); VBD_plug id] + [VBD_set_active (id, true); vbd_plug id] | VBD_hotunplug (id, force) -> - [VBD_unplug (id, force); VBD_set_active (id, false)] + [vbd_unplug id force; VBD_set_active (id, false)] | VIF_hotplug id -> [VIF_set_active (id, true); VIF_plug id] | VIF_hotunplug (id, force) -> @@ -1847,57 +1946,12 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) debug "Ignoring error during best-effort operation: %s" (Printexc.to_string e) ) - | Parallel (_id, description, atoms) -> - (* parallel_id is a unused unique name prefix for a parallel worker queue *) - let parallel_id = - Printf.sprintf "Parallel:task=%s.atoms=%d.(%s)" - (Xenops_task.id_of_handle t) - (List.length atoms) description - in - let with_tracing = id_with_tracing parallel_id t in - debug "begin_%s" parallel_id ; - let task_list = - queue_atomics_and_wait ~progress_callback ~max_parallel_atoms:10 - with_tracing parallel_id atoms - in - debug "end_%s" parallel_id ; - (* make sure that we destroy all the parallel tasks that finished *) - let errors = - List.map - (fun (id, task_handle, task_state) -> - match task_state with - | Some (Task.Completed _) -> - TASK.destroy' id ; None - | Some (Task.Failed e) -> - TASK.destroy' id ; - let e = - match Rpcmarshal.unmarshal Errors.error.Rpc.Types.ty e with - | Ok x -> - Xenopsd_error x - | Error (`Msg x) -> - internal_error "Error unmarshalling failure: %s" x - in - Some e - | None | Some (Task.Pending _) -> - (* Because pending tasks are filtered out in - queue_atomics_and_wait with task_ended the second case will - never be encountered. The previous boolean used in - event_wait was enough to express the possible cases *) - let err_msg = - Printf.sprintf "Timed out while waiting on task %s (%s)" id - (Xenops_task.get_dbg task_handle) - in - error "%s" err_msg ; - Xenops_task.cancel task_handle ; - Some (Xenopsd_error (Internal_error err_msg)) - ) - task_list - in - (* if any error was present, raise first one, so that - trigger_cleanup_after_failure is called *) - List.iter - (fun err -> match err with None -> () | Some e -> raise e) - errors + | Parallel (_id, description, atoms) as atom -> + check_nesting atom ; + parallel_atomic ~progress_callback ~description ~nested:false atoms t + | Nested_parallel (_id, description, atoms) as atom -> + check_nesting atom ; + parallel_atomic ~progress_callback ~description ~nested:true atoms t | Serial (_, _, atoms) -> List.iter (Fun.flip (perform_atomic ~progress_callback) t) atoms | VIF_plug id -> @@ -2017,7 +2071,16 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) Xenops_hooks.vm ~script ~reason ~id ~extra_args | VBD_plug id -> debug "VBD.plug %s" (VBD_DB.string_of_id id) ; - B.VBD.plug t (VBD_DB.vm_of id) (VBD_DB.read_exn id) ; + B.VBD.attach t (VBD_DB.vm_of id) (VBD_DB.read_exn id) ; + B.VBD.activate t (VBD_DB.vm_of id) (VBD_DB.read_exn id) ; + VBD_DB.signal id + | VBD_attach id -> + debug "VBD.attach %s" (VBD_DB.string_of_id id) ; + B.VBD.attach t (VBD_DB.vm_of id) (VBD_DB.read_exn id) ; + VBD_DB.signal id + | VBD_activate id -> + debug "VBD.activate %s" (VBD_DB.string_of_id id) ; + B.VBD.activate t (VBD_DB.vm_of id) (VBD_DB.read_exn id) ; VBD_DB.signal id | VBD_set_active (id, b) -> debug "VBD.set_active %s %b" (VBD_DB.string_of_id id) b ; @@ -2038,6 +2101,17 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) finally (fun () -> B.VBD.unplug t (VBD_DB.vm_of id) (VBD_DB.read_exn id) force) (fun () -> VBD_DB.signal id) + | VBD_deactivate (id, force) -> + debug "VBD.deactivate %s" (VBD_DB.string_of_id id) ; + finally + (fun () -> + B.VBD.deactivate t (VBD_DB.vm_of id) (VBD_DB.read_exn id) force + ) + (fun () -> VBD_DB.signal id) + | VBD_detach id -> + debug "VBD.detach %s" (VBD_DB.string_of_id id) ; + B.VBD.detach t (VBD_DB.vm_of id) (VBD_DB.read_exn id) ; + VBD_DB.signal id | VBD_insert (id, disk) -> ( (* NB this is also used to "refresh" ie signal a qemu that it should re-open a device, useful for when a physical CDROM is inserted into the @@ -2225,11 +2299,18 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) debug "VM.destroy %s" id ; B.VM.destroy t (VM_DB.read_exn id) | VM_create (id, memory_upper_bound, final_id, no_sharept) -> - debug "VM.create %s memory_upper_bound = %s" id + let num_of_vbds = List.length (VBD_DB.vbds id) in + let num_of_vifs = List.length (VIF_DB.vifs id) in + debug + "VM.create %s memory_upper_bound = %s, num_of_vbds = %d, num_of_vifs = \ + %d" + id (Option.value ~default:"None" (Option.map Int64.to_string memory_upper_bound) - ) ; + ) + num_of_vbds num_of_vifs ; B.VM.create t memory_upper_bound (VM_DB.read_exn id) final_id no_sharept + num_of_vbds num_of_vifs | VM_build (id, force) -> debug "VM.build %s" id ; let vbds : Vbd.t list = VBD_DB.vbds id |> vbd_plug_order in @@ -2303,7 +2384,92 @@ let rec perform_atomic ~progress_callback ?result (op : atomic) debug "VM.soft_reset %s" id ; B.VM.soft_reset t (VM_DB.read_exn id) -and queue_atomic_int ~progress_callback dbg id op = +and check_nesting atom = + let msg_prefix = "Nested atomics error" in + let rec check_nesting_inner found_parallel found_nested = function + | Parallel (_, _, rem) -> + if found_parallel then ( + warn + "%s: Two or more Parallel atoms found, use Nested_parallel for the \ + inner atom" + msg_prefix ; + true + ) else + List.exists (check_nesting_inner true found_nested) rem + | Nested_parallel (_, _, rem) -> + if found_nested then ( + warn + "%s: Two or more Nested_parallel atoms found, there should only be \ + one layer of nesting" + msg_prefix ; + true + ) else + List.exists (check_nesting_inner found_parallel true) rem + | Serial (_, _, rem) -> + List.exists (check_nesting_inner found_parallel found_nested) rem + | _ -> + false + in + ignore @@ check_nesting_inner false false atom + +and parallel_atomic ~progress_callback ~description ~nested atoms t = + (* parallel_id is a unused unique name prefix for a parallel worker queue *) + let redirector = + if nested then + Redirector.nested_parallel_queues + else + Redirector.parallel_queues + in + let parallel_id = + Printf.sprintf "%s:task=%s.atoms=%d.(%s)" + (Redirector.to_string redirector) + (Xenops_task.id_of_handle t) + (List.length atoms) description + in + let with_tracing = id_with_tracing parallel_id t in + debug "begin_%s" parallel_id ; + let task_list = + queue_atomics_and_wait ~progress_callback ~max_parallel_atoms:10 + with_tracing parallel_id atoms redirector + in + debug "end_%s" parallel_id ; + (* make sure that we destroy all the parallel tasks that finished *) + let errors = + List.map + (fun (id, task_handle, task_state) -> + match task_state with + | Some (Task.Completed _) -> + TASK.destroy' id ; None + | Some (Task.Failed e) -> + TASK.destroy' id ; + let e = + match Rpcmarshal.unmarshal Errors.error.Rpc.Types.ty e with + | Ok x -> + Xenopsd_error x + | Error (`Msg x) -> + internal_error "Error unmarshalling failure: %s" x + in + Some e + | None | Some (Task.Pending _) -> + (* Because pending tasks are filtered out in + queue_atomics_and_wait with task_ended the second case will + never be encountered. The previous boolean used in + event_wait was enough to express the possible cases *) + let err_msg = + Printf.sprintf "Timed out while waiting on task %s (%s)" id + (Xenops_task.get_dbg task_handle) + in + error "%s" err_msg ; + Xenops_task.cancel task_handle ; + Some (Xenopsd_error (Internal_error err_msg)) + ) + task_list + in + (* if any error was present, raise first one, so that + trigger_cleanup_after_failure is called *) + List.iter (fun err -> match err with None -> () | Some e -> raise e) errors + +and queue_atomic_int ~progress_callback dbg id op redirector = let task = Xenops_task.add tasks dbg (let r = ref None in @@ -2312,10 +2478,12 @@ and queue_atomic_int ~progress_callback dbg id op = !r ) in - Redirector.push Redirector.parallel_queues id (Atomic op, task) ; + debug "Adding to %s queues" (Redirector.to_string redirector) ; + Redirector.push redirector id (Atomic op, task) ; task -and queue_atomics_and_wait ~progress_callback ~max_parallel_atoms dbg id ops = +and queue_atomics_and_wait ~progress_callback ~max_parallel_atoms dbg id ops + redirector = let from = Updates.last_id dbg updates in Xenops_utils.chunks max_parallel_atoms ops |> List.mapi (fun chunk_idx ops -> @@ -2328,7 +2496,9 @@ and queue_atomics_and_wait ~progress_callback ~max_parallel_atoms dbg id ops = let atom_id = Printf.sprintf "%s.chunk=%d.atom=%d" id chunk_idx atom_idx in - (queue_atomic_int ~progress_callback dbg atom_id op, op) + ( queue_atomic_int ~progress_callback dbg atom_id op redirector + , op + ) ) ops in @@ -2445,11 +2615,15 @@ and trigger_cleanup_after_failure_atom op t = match op with | VBD_eject id | VBD_plug id + | VBD_attach id + | VBD_activate id | VBD_set_active (id, _) | VBD_epoch_begin (id, _, _) | VBD_epoch_end (id, _) | VBD_set_qos id | VBD_unplug (id, _) + | VBD_deactivate (id, _) + | VBD_detach id | VBD_insert (id, _) -> immediate_operation dbg (fst id) (VBD_check_state id) | VIF_plug id @@ -2500,7 +2674,9 @@ and trigger_cleanup_after_failure_atom op t = immediate_operation dbg id (VM_check_state id) | Best_effort op -> trigger_cleanup_after_failure_atom op t - | Parallel (_id, _description, ops) | Serial (_id, _description, ops) -> + | Parallel (_id, _description, ops) + | Nested_parallel (_id, _description, ops) + | Serial (_id, _description, ops) -> List.iter (fun op -> trigger_cleanup_after_failure_atom op t) ops | VM_rename (id1, id2, _) -> immediate_operation dbg id1 (VM_check_state id1) ; @@ -2541,9 +2717,9 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit = | VM_restore_vifs id -> debug "VM_restore_vifs %s" id ; perform_atomics (atomics_of_operation op) t - | VM_restore_devices (id, restore_vifs) -> + | VM_restore_devices (id, migration) -> (* XXX: this is delayed due to the 'attach'/'activate' behaviour *) - debug "VM_restore_devices %s %b" id restore_vifs ; + debug "VM_restore_devices %s %b" id migration ; perform_atomics (atomics_of_operation op) t | VM_resume (id, _data) -> debug "VM.resume %s" id ; @@ -2628,19 +2804,30 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit = ~path:(Uri.path_unencoded url ^ snippet ^ id_str) ~query:(Uri.query url) () in - (* CA-78365: set the memory dynamic range to a single value to stop - ballooning. *) - let atomic = - VM_set_memory_dynamic_range - (id, vm.Vm.memory_dynamic_min, vm.Vm.memory_dynamic_min) - in - let (_ : unit) = - perform_atomic ~progress_callback:(fun _ -> ()) atomic t - in - (* Waiting here is not essential but adds a degree of safety and - reducess unnecessary memory copying. *) - ( try B.VM.wait_ballooning t vm - with Xenopsd_error Ballooning_timeout_before_migration -> () + (* CA-78365: set the memory dynamic range to a single value + to stop ballooning, if ballooning is enabled at all *) + ( if vm.memory_dynamic_min <> vm.memory_dynamic_max then + (* There's no need to balloon down when doing localhost migration - + we're not copying any memory in the first place. This would + likely increase VDI migration time as swap would be engaged. + Instead change the ballooning target to the current state *) + let new_balloon_target = + if vmm.vmm_localhost_migration then + (B.VM.get_state vm).memory_actual + else + vm.memory_dynamic_min + in + let atomic = + VM_set_memory_dynamic_range + (id, new_balloon_target, new_balloon_target) + in + let (_ : unit) = + perform_atomic ~progress_callback:(fun _ -> ()) atomic t + in + (* Waiting here is not essential but adds a degree of safety and + reducess unnecessary memory copying. *) + try B.VM.wait_ballooning t vm + with Xenopsd_error Ballooning_timeout_before_migration -> () ) ; (* Find out the VM's current memory_limit: this will be used to allocate memory on the receiver *) @@ -2856,11 +3043,31 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit = ( try let no_sharept = VGPU_DB.vgpus id |> List.exists is_no_sharept in debug "VM %s no_sharept=%b (%s)" id no_sharept __LOC__ ; + (* If plug is split into activate and attach, we could attach + early so that it is outside of the VM downtime (if the SM + supports this) *) + let early_attach = + parallel_map "VBDs.set_active_and_attach" ~id (VBD_DB.vbds id) + (fun vbd -> + if + (not !xenopsd_vbd_plug_unplug_legacy) + && vbd.Vbd.can_attach_early + then + serial "VBD.set_active_and_attach" ~id + [ + VBD_set_active (vbd.Vbd.id, true) + ; VBD_attach vbd.Vbd.id + ] + else + [] + ) + in perform_atomics ([VM_create (id, Some memory_limit, Some final_id, no_sharept)] - @ (* Perform as many operations as possible on the destination - domain before pausing the original domain *) - atomics_of_operation (VM_restore_vifs id) + (* Perform as many operations as possible on the destination + domain before pausing the original domain *) + @ atomics_of_operation (VM_restore_vifs id) + @ early_attach ) t ; Handshake.send s Handshake.Success @@ -2976,7 +3183,7 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit = ) ; debug "VM.receive_memory: restoring remaining devices and unpausing" ; perform_atomics - (atomics_of_operation (VM_restore_devices (final_id, false)) + (atomics_of_operation (VM_restore_devices (final_id, true)) @ [ VM_unpause final_id ; VM_set_domain_action_request (final_id, None) @@ -2988,6 +3195,7 @@ and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit = ] ) t ; + with_tracing ~task:t ~name:"VM_migrate_downtime_end" Fun.id ; Handshake.send s Handshake.Success ; debug "VM.receive_memory: Synchronisation point 4" with e -> @@ -3202,7 +3410,8 @@ let uses_mxgpu id = ) (VGPU_DB.ids id) -let queue_operation_int ?traceparent dbg id op = +let queue_operation_int ?traceparent ?(redirector = Redirector.default) dbg id + op = let task = Xenops_task.add ?traceparent tasks dbg (let r = ref None in @@ -3210,11 +3419,11 @@ let queue_operation_int ?traceparent dbg id op = ) in let tag = if uses_mxgpu id then "mxgpu" else id in - Redirector.push Redirector.default tag (op, task) ; + Redirector.push redirector tag (op, task) ; task -let queue_operation ?traceparent dbg id op = - let task = queue_operation_int ?traceparent dbg id op in +let queue_operation ?traceparent ?redirector dbg id op = + let task = queue_operation_int ?traceparent ?redirector dbg id op in Xenops_task.id_of_handle task let queue_operation_and_wait dbg id op = @@ -3398,7 +3607,25 @@ module VIF = struct () end -let numa_placement = ref Xenops_interface.Host.Any +let default_numa_affinity_policy = ref Xenops_interface.Host.Best_effort + +let numa_placement = ref !default_numa_affinity_policy + +type affinity = Soft | Hard + +let string_of_numa_affinity_policy = + let open Xenops_interface.Host in + function + | Any -> + "any" + | Best_effort -> + "best-effort" + | Best_effort_hard -> + "best-effort-hard" + +let affinity_of_numa_affinity_policy = + let open Xenops_interface.Host in + function Any | Best_effort -> Soft | Best_effort_hard -> Hard module HOST = struct let stat _ dbg = @@ -3413,7 +3640,15 @@ module HOST = struct let set_numa_affinity_policy _ dbg = Debug.with_thread_associated dbg @@ fun policy -> debug "HOST.set_numa_affinity_policy" ; - numa_placement := policy + match policy with + | None -> + info "Enforcing default NUMA affinity policy (%s)" + (string_of_numa_affinity_policy !default_numa_affinity_policy) ; + numa_placement := !default_numa_affinity_policy + | Some p -> + info "Enforcing '%s' NUMA affinity policy" + (string_of_numa_affinity_policy p) ; + numa_placement := p let get_console_data _ dbg = Debug.with_thread_associated dbg @@ -3488,7 +3723,9 @@ end module VM = struct module DB = VM_DB - let add _ dbg x = Debug.with_thread_associated dbg (fun () -> DB.add' x) () + let add _ dbg x = + Debug_info.with_dbg ~with_thread:true ~name:__FUNCTION__ ~dbg @@ fun _ -> + DB.add' x let rename _ dbg id1 id2 when' = queue_operation dbg id1 (Atomic (VM_rename (id1, id2, when'))) @@ -3525,11 +3762,17 @@ module VM = struct in (vm_t, state) - let stat _ dbg id = Debug.with_thread_associated dbg (fun () -> stat' id) () + let stat _ dbg id = + Debug_info.with_dbg ~with_thread:true ~name:__FUNCTION__ ~dbg @@ fun _ -> + stat' id - let exists _ _dbg id = match DB.read id with Some _ -> true | None -> false + let exists _ dbg id = + Debug_info.with_dbg ~name:__FUNCTION__ ~dbg @@ fun _ -> + match DB.read id with Some _ -> true | None -> false - let list _ dbg () = Debug.with_thread_associated dbg (fun () -> DB.list ()) () + let list _ dbg () = + Debug_info.with_dbg ~with_thread:true ~name:__FUNCTION__ ~dbg @@ fun _ -> + DB.list () let create _ dbg id = let no_sharept = false in @@ -3583,7 +3826,7 @@ module VM = struct let s3resume _ dbg id = queue_operation dbg id (Atomic (VM_s3resume id)) let migrate _context dbg id vmm_vdi_map vmm_vif_map vmm_vgpu_pci_map vmm_url - (compress : bool) (verify_dest : bool) = + (compress : bool) (localhost_migration : bool) (verify_dest : bool) = let tmp_uuid_of uuid ~kind = Printf.sprintf "%s00000000000%c" (String.sub uuid 0 24) (match kind with `dest -> '1' | `src -> '0') @@ -3600,6 +3843,7 @@ module VM = struct ; vmm_tmp_dest_id= tmp_uuid_of id ~kind:`dest ; vmm_compress= compress ; vmm_verify_dest= verify_dest + ; vmm_localhost_migration= localhost_migration } ) @@ -3649,7 +3893,12 @@ module VM = struct ; vmr_compressed= compressed_memory } in - let task = Some (queue_operation ?traceparent dbg id op) in + let task = + Some + (queue_operation ?traceparent + ~redirector:Redirector.receive_memory_queues dbg id op + ) + in Option.iter (fun t -> t |> Xenops_client.wait_for_task dbg |> ignore) task @@ -3831,7 +4080,7 @@ module UPDATES = struct Debug.with_thread_associated dbg (fun () -> debug "UPDATES.inject_barrier %s %d" vm_id id ; - let filter k _ = + let filter k = match k with | Dynamic.Task _ -> false @@ -4062,6 +4311,12 @@ module Observer = struct (fun () -> Tracing_export.set_export_interval interval) () + let set_export_chunk_size _ dbg size = + debug "Observer.set_export_chunk_size : dbg=%s" dbg ; + Debug.with_thread_associated dbg + (fun () -> Tracing_export.set_export_chunk_size size) + () + let set_max_spans _ dbg spans = debug "Observer.set_max_spans : dbg=%s" dbg ; Debug.with_thread_associated dbg @@ -4074,6 +4329,12 @@ module Observer = struct (fun () -> Tracing.Spans.set_max_traces traces) () + let set_max_depth _ dbg depth = + debug "Observer.set_max_depth : dbg=%s" dbg ; + Debug.with_thread_associated dbg + (fun () -> Tracing.Spans.set_max_depth depth) + () + let set_max_file_size _ dbg file_size = debug "Observer.set_max_file_size : dbg=%s" dbg ; Debug.with_thread_associated dbg @@ -4197,8 +4458,10 @@ let _ = Server.Observer.init (Observer.init ()) ; Server.Observer.set_trace_log_dir (Observer.set_trace_log_dir ()) ; Server.Observer.set_export_interval (Observer.set_export_interval ()) ; + Server.Observer.set_export_chunk_size (Observer.set_export_chunk_size ()) ; Server.Observer.set_max_spans (Observer.set_max_spans ()) ; Server.Observer.set_max_traces (Observer.set_max_traces ()) ; + Server.Observer.set_max_depth (Observer.set_max_depth ()) ; Server.Observer.set_max_file_size (Observer.set_max_file_size ()) ; Server.Observer.set_host_id (Observer.set_host_id ()) ; Server.Observer.set_compress_tracing_files diff --git a/ocaml/xenopsd/lib/xenops_server_plugin.ml b/ocaml/xenopsd/lib/xenops_server_plugin.ml index fbeb78f3640..e4a61bb9ac8 100644 --- a/ocaml/xenopsd/lib/xenops_server_plugin.ml +++ b/ocaml/xenopsd/lib/xenops_server_plugin.ml @@ -84,6 +84,8 @@ module type S = sig -> Vm.t -> Vm.id option -> bool (* no_sharept*) + -> int (* num_of_vbds *) + -> int (* num_of_vifs *) -> unit val build : @@ -207,10 +209,16 @@ module type S = sig val epoch_end : Xenops_task.task_handle -> Vm.id -> disk -> unit - val plug : Xenops_task.task_handle -> Vm.id -> Vbd.t -> unit + val attach : Xenops_task.task_handle -> Vm.id -> Vbd.t -> unit + + val activate : Xenops_task.task_handle -> Vm.id -> Vbd.t -> unit val unplug : Xenops_task.task_handle -> Vm.id -> Vbd.t -> bool -> unit + val deactivate : Xenops_task.task_handle -> Vm.id -> Vbd.t -> bool -> unit + + val detach : Xenops_task.task_handle -> Vm.id -> Vbd.t -> unit + val insert : Xenops_task.task_handle -> Vm.id -> Vbd.t -> disk -> unit val eject : Xenops_task.task_handle -> Vm.id -> Vbd.t -> unit @@ -284,10 +292,7 @@ module type S = sig end module UPDATES : sig - val get : - Updates.id option - -> int option - -> Dynamic.barrier list * Dynamic.id list * Updates.id + val get : Updates.id option -> int option -> Updates.get_result end module DEBUG : sig diff --git a/ocaml/xenopsd/lib/xenops_server_simulator.ml b/ocaml/xenopsd/lib/xenops_server_simulator.ml index c5123641978..0c6ac3f606b 100644 --- a/ocaml/xenopsd/lib/xenops_server_simulator.ml +++ b/ocaml/xenopsd/lib/xenops_server_simulator.ml @@ -547,7 +547,8 @@ module VM = struct let remove _vm = () - let create _ memory_limit vm _ _ = with_lock m (create_nolock memory_limit vm) + let create _ memory_limit vm _ _ _ _ = + with_lock m (create_nolock memory_limit vm) let destroy _ vm = with_lock m (destroy_nolock vm) @@ -673,10 +674,16 @@ module VBD = struct let epoch_end _ (_vm : Vm.id) (_disk : disk) = () - let plug _ (vm : Vm.id) (vbd : Vbd.t) = with_lock m (add_vbd vm vbd) + let attach _ (vm : Vm.id) (vbd : Vbd.t) = with_lock m (add_vbd vm vbd) + + let activate _ (_vm : Vm.id) (_vbd : Vbd.t) = () let unplug _ vm vbd _ = with_lock m (remove_vbd vm vbd) + let deactivate _ vm vbd _ = with_lock m (remove_vbd vm vbd) + + let detach _ _vm _vbd = () + let insert _ _vm _vbd _disk = () let eject _ _vm _vbd = () diff --git a/ocaml/xenopsd/lib/xenops_server_skeleton.ml b/ocaml/xenopsd/lib/xenops_server_skeleton.ml index dc1b826f85e..f5ef9ed027c 100644 --- a/ocaml/xenopsd/lib/xenops_server_skeleton.ml +++ b/ocaml/xenopsd/lib/xenops_server_skeleton.ml @@ -64,50 +64,49 @@ module VM = struct let remove _ = () - let create _ _ _ _ = unimplemented "VM.create" + let create _ _ _ _ = unimplemented __FUNCTION__ - let build ?restore_fd:_ _ _ _ _ _ = unimplemented "VM.build" + let build ?restore_fd:_ _ _ _ _ _ = unimplemented __FUNCTION__ - let create_device_model _ _ _ _ _ = unimplemented "VM.create_device_model" + let create_device_model _ _ _ _ _ = unimplemented __FUNCTION__ - let destroy_device_model _ _ = unimplemented "VM.destroy_device_model" + let destroy_device_model _ _ = unimplemented __FUNCTION__ - let destroy _ _ = unimplemented "VM.destroy" + let destroy _ _ = unimplemented __FUNCTION__ - let pause _ _ = unimplemented "VM.pause" + let pause _ _ = unimplemented __FUNCTION__ - let unpause _ _ = unimplemented "VM.unpause" + let unpause _ _ = unimplemented __FUNCTION__ - let set_xsdata _ _ _ = unimplemented "VM.set_xsdata" + let set_xsdata _ _ _ = unimplemented __FUNCTION__ - let set_vcpus _ _ _ = unimplemented "VM.set_vcpus" + let set_vcpus _ _ _ = unimplemented __FUNCTION__ - let set_shadow_multiplier _ _ _ = unimplemented "VM.set_shadow_multipler" + let set_shadow_multiplier _ _ _ = unimplemented __FUNCTION__ - let set_memory_dynamic_range _ _ _ _ = - unimplemented "VM.set_memory_dynamic_range" + let set_memory_dynamic_range _ _ _ _ = unimplemented __FUNCTION__ - let request_shutdown _ _ _ _ = unimplemented "VM.request_shutdown" + let request_shutdown _ _ _ _ = unimplemented __FUNCTION__ - let wait_shutdown _ _ _ _ = unimplemented "VM.wait_shutdown" + let wait_shutdown _ _ _ _ = unimplemented __FUNCTION__ - let assert_can_save _ = unimplemented "VM.assert_can_save" + let assert_can_save _ = unimplemented __FUNCTION__ - let save _ _ _ _ _ _ _ = unimplemented "VM.save" + let save _ _ _ _ _ _ _ = unimplemented __FUNCTION__ - let restore _ _ _ _ _ _ _ = unimplemented "VM.restore" + let restore _ _ _ _ _ _ _ = unimplemented __FUNCTION__ - let s3suspend _ _ = unimplemented "VM.s3suspend" + let s3suspend _ _ = unimplemented __FUNCTION__ - let s3resume _ _ = unimplemented "VM.s3resume" + let s3resume _ _ = unimplemented __FUNCTION__ - let soft_reset _ _ = unimplemented "VM.soft_reset" + let soft_reset _ _ = unimplemented __FUNCTION__ let get_state _ = Xenops_utils.halted_vm - let request_rdp _ _ = unimplemented "VM.request_rdp" + let request_rdp _ _ = unimplemented __FUNCTION__ - let run_script _ _ _ = unimplemented "VM.run_script" + let run_script _ _ _ = unimplemented __FUNCTION__ let set_domain_action_request _ _ = () @@ -131,9 +130,9 @@ module PCI = struct let dequarantine _ = () - let plug _ _ _ = unimplemented "PCI.plug" + let plug _ _ _ = unimplemented __FUNCTION__ - let unplug _ _ _ = unimplemented "PCI.unplug" + let unplug _ _ _ = unimplemented __FUNCTION__ let get_device_action_request _ _ = None end @@ -145,13 +144,19 @@ module VBD = struct let epoch_end _ _ _ = () - let plug _ _ _ = unimplemented "VBD.plug" + let attach _ _ _ = unimplemented __FUNCTION__ - let unplug _ _ _ _ = unimplemented "VBD.unplug" + let activate _ _ _ = unimplemented __FUNCTION__ - let insert _ _ _ _ = unimplemented "VBD.insert" + let unplug _ _ _ _ = unimplemented __FUNCTION__ - let eject _ _ _ = unimplemented "VBD.eject" + let deactivate _ _ _ _ = unimplemented __FUNCTION__ + + let detach _ _ _ = unimplemented __FUNCTION__ + + let insert _ _ _ _ = unimplemented __FUNCTION__ + + let eject _ _ _ = unimplemented __FUNCTION__ let set_qos _ _ _ = () @@ -163,23 +168,21 @@ end module VIF = struct let set_active _ _ _ _ = () - let plug _ _ _ = unimplemented "VIF.plug" + let plug _ _ _ = unimplemented __FUNCTION__ - let unplug _ _ _ _ = unimplemented "VIF.unplug" + let unplug _ _ _ _ = unimplemented __FUNCTION__ - let move _ _ _ _ = unimplemented "VIF.move" + let move _ _ _ _ = unimplemented __FUNCTION__ - let set_carrier _ _ _ _ = unimplemented "VIF.set_carrier" + let set_carrier _ _ _ _ = unimplemented __FUNCTION__ - let set_locking_mode _ _ _ _ = unimplemented "VIF.set_locking_mode" + let set_locking_mode _ _ _ _ = unimplemented __FUNCTION__ - let set_ipv4_configuration _ _ _ _ = - unimplemented "VIF.set_ipv4_configuration" + let set_ipv4_configuration _ _ _ _ = unimplemented __FUNCTION__ - let set_ipv6_configuration _ _ _ _ = - unimplemented "VIF.set_ipv6_configuration" + let set_ipv6_configuration _ _ _ _ = unimplemented __FUNCTION__ - let set_pvs_proxy _ _ _ _ = unimplemented "VIF.set_pvs_proxy" + let set_pvs_proxy _ _ _ _ = unimplemented __FUNCTION__ let get_state _ _ = unplugged_vif @@ -187,7 +190,7 @@ module VIF = struct end module VGPU = struct - let start _ _ _ _ = unimplemented "VGPU.start" + let start _ _ _ _ = unimplemented __FUNCTION__ let set_active _ _ _ _ = () @@ -195,9 +198,9 @@ module VGPU = struct end module VUSB = struct - let plug _ _ _ = unimplemented "VUSB.plug" + let plug _ _ _ = unimplemented __FUNCTION__ - let unplug _ _ _ = unimplemented "VUSB.unplug" + let unplug _ _ _ = unimplemented __FUNCTION__ let get_state _ _ = unplugged_vusb @@ -212,4 +215,4 @@ module UPDATES = struct assert false end -module DEBUG = struct let trigger _ _ = unimplemented "DEBUG.trigger" end +module DEBUG = struct let trigger _ _ = unimplemented __FUNCTION__ end diff --git a/ocaml/xenopsd/lib/xenops_utils.ml b/ocaml/xenopsd/lib/xenops_utils.ml index 481ad1b6101..53dc73709a1 100644 --- a/ocaml/xenopsd/lib/xenops_utils.ml +++ b/ocaml/xenopsd/lib/xenops_utils.ml @@ -227,11 +227,13 @@ module MemFS = struct match (path, fs) with | [], Dir d -> d - | p :: ps, Dir d -> - if StringMap.mem p !d then - aux ps (StringMap.find p !d) - else + | p :: ps, Dir d -> ( + match StringMap.find_opt p !d with + | Some x -> + aux ps x + | None -> raise Not_dir + ) | _, Leaf _ -> raise Not_dir in @@ -285,14 +287,13 @@ module MemFS = struct (fun p -> let dir = dir_locked (dirname p) in let deletable = - if StringMap.mem (filename p) !dir then - match StringMap.find (filename p) !dir with - | Dir child -> - StringMap.is_empty !child - | Leaf _ -> - true - else - false + match StringMap.find_opt (filename p) !dir with + | Some (Dir child) -> + StringMap.is_empty !child + | Some (Leaf _) -> + true + | None -> + false in if deletable then dir := StringMap.remove (filename p) !dir ) diff --git a/ocaml/xenopsd/lib/xenopsd.ml b/ocaml/xenopsd/lib/xenopsd.ml index e0a4f5949db..c127b02f673 100644 --- a/ocaml/xenopsd/lib/xenopsd.ml +++ b/ocaml/xenopsd/lib/xenopsd.ml @@ -49,6 +49,15 @@ let default_vbd_backend_kind = ref "vbd" let ca_140252_workaround = ref false +(* Optimize performance: set MTRR WB attribute on Xen PCI MMIO BAR. + This is useful for AMD, and mostly a noop on Intel (which achieves a similar + effect using Intel-only features in Xen) + + Turning on WB is done by disabling UC: + UnCached=false -> WriteBack=true +*) +let xen_platform_pci_bar_uc = ref false + let action_after_qemu_crash = ref None let additional_ballooning_timeout = ref 120. @@ -59,7 +68,7 @@ let feature_flags_path = ref "/etc/xenserver/features.d" let pvinpvh_xen_cmdline = ref "pv-shim console=xen" -let numa_placement_compat = ref false +let numa_placement_compat = ref true (* O(N^2) operations, until we get a xenstore cache, so use a small number here *) let vm_guest_agent_xenstore_quota = ref 128 @@ -207,6 +216,14 @@ let options = , (fun () -> string_of_bool !ca_140252_workaround) , "Workaround for evtchn misalignment for legacy PV tools" ) + ; ( "xen-platform-pci-bar-uc" + , Arg.Bool (fun x -> xen_platform_pci_bar_uc := x) + , (fun () -> string_of_bool !xen_platform_pci_bar_uc) + , "Controls whether, when the VM starts in HVM mode, the Xen PCI MMIO used \ + by grant tables is mapped as Uncached (UC, the default) or WriteBack \ + (WB, the workaround). WB mapping could improve performance of devices \ + using grant tables. This is useful on AMD platform only." + ) ; ( "additional-ballooning-timeout" , Arg.Set_float additional_ballooning_timeout , (fun () -> string_of_float !additional_ballooning_timeout) @@ -283,6 +300,11 @@ let options = , (fun () -> string_of_int !test_open) , "TESTING only: open N file descriptors" ) + ; ( "xenopsd-vbd-plug-unplug-legacy" + , Arg.Bool (fun x -> Xenops_server.xenopsd_vbd_plug_unplug_legacy := x) + , (fun () -> string_of_bool !Xenops_server.xenopsd_vbd_plug_unplug_legacy) + , "False if we want to split the plug atomic into attach/activate" + ) ] let path () = Filename.concat !sockets_path "xenopsd" @@ -295,29 +317,74 @@ let json_path () = path () ^ ".json" let rpc_fn call = (* Upgrade import_metadata API call *) - let call' = + let call', call_name, span_parent = match (call.Rpc.name, call.Rpc.params) with - | "VM.import_metadata", [debug_info; metadata] -> + | ("VM.import_metadata" as call_name), [Rpc.String debug_info; metadata] -> debug "Upgrading VM.import_metadata" ; - Rpc. - { - name= "VM.import_metadata" - ; params= - [Rpc.Dict [("debug_info", debug_info); ("metadata", metadata)]] - ; is_notification= false - } - | "query", [debug_info; unit_p] -> + let span_parent = + let di = debug_info |> Debug_info.of_string in + di.tracing + in + ( Rpc. + { + name= "VM.import_metadata" + ; params= + [ + Rpc.Dict + [ + ("debug_info", Rpc.String debug_info) + ; ("metadata", metadata) + ] + ] + ; is_notification= false + } + , call_name + , span_parent + ) + | ("query" as call_name), [Rpc.String debug_info; unit_p] -> debug "Upgrading query" ; - Rpc. - { - name= "query" - ; params= [Rpc.Dict [("debug_info", debug_info); ("unit", unit_p)]] - ; is_notification= false - } - | _ -> - call + let span_parent = + let di = debug_info |> Debug_info.of_string in + di.tracing + in + ( Rpc. + { + name= "query" + ; params= + [ + Rpc.Dict + [("debug_info", Rpc.String debug_info); ("unit", unit_p)] + ] + ; is_notification= false + } + , call_name + , span_parent + ) + | call_name, [Rpc.Dict kv_list] -> + let span_parent = + kv_list + |> List.find_map (function + | "debug_info", Rpc.String debug_info -> + let di = debug_info |> Debug_info.of_string in + di.tracing + | _ -> + None + ) + in + (call, call_name, span_parent) + | call_name, _ -> + (call, call_name, None) in - Idl.Exn.server Xenops_server.Server.implementation call' + Tracing.with_tracing + ~attributes: + [ + ("messaging.operation.name", "process") + ; ("messaging.system", "message-switch") + ; ("messaging.destination.name", !Xenops_interface.queue_name) + ] + ~span_kind:Tracing.SpanKind.Consumer ~parent:span_parent + ~name:("process" ^ " " ^ call_name) + @@ fun _ -> Idl.Exn.server Xenops_server.Server.implementation call' let handle_received_fd this_connection = let msg_size = 16384 in diff --git a/ocaml/xenopsd/scripts/qemu-wrapper b/ocaml/xenopsd/scripts/qemu-wrapper index c8acefbd3f6..b1d811e7126 100644 --- a/ocaml/xenopsd/scripts/qemu-wrapper +++ b/ocaml/xenopsd/scripts/qemu-wrapper @@ -102,7 +102,15 @@ def prepare_exec(): g = open("/sys/fs/cgroup/cpu/%s/cgroup.procs" % cgroup_slice, 'w') except FileNotFoundError: # cgroup-v2 path: - g = open("/sys/fs/cgroup/%s/cgroup.procs" % cgroup_slice, 'w') + # Note cgroups v2 "no internal processes" rule + # if cgroup.subtree_control is not empty, and we attach a pid + # into cgroup.procs, kernel would return EBUSY + cgroup_slice_dir = os.path.join("/sys/fs/cgroup", cgroup_slice) + qemu_dm_dir = os.path.join(cgroup_slice_dir, "qemu-dm") + if not os.path.exists(qemu_dm_dir): + os.mkdir(qemu_dm_dir) + procs_file = os.path.join(qemu_dm_dir, "cgroup.procs") + g = open(procs_file, 'w') g.write(str(os.getpid())) g.close() except IOError as e: diff --git a/ocaml/xenopsd/scripts/swtpm-wrapper b/ocaml/xenopsd/scripts/swtpm-wrapper index dfb322e6453..63de73a796a 100755 --- a/ocaml/xenopsd/scripts/swtpm-wrapper +++ b/ocaml/xenopsd/scripts/swtpm-wrapper @@ -13,6 +13,7 @@ # GNU Lesser General Public License for more details. import os +import os.path import stat import socket import sys @@ -140,6 +141,8 @@ def main(argv): tpm_env = dict(os.environ) tpm_env["LD_LIBRARY_PATH"] = "/usr/lib:" + if os.path.exists("/etc/ssl/openssl-swtpm.cnf"): + tpm_env["OPENSSL_CONF"] = "/etc/ssl/openssl-swtpm.cnf" if needs_init or check_state_needs_init(tpm_state_file): if tpm_file is None: diff --git a/ocaml/xenopsd/test/test_topology.ml b/ocaml/xenopsd/test/test_topology.ml index e53640f5054..d9945ed8018 100644 --- a/ocaml/xenopsd/test/test_topology.ml +++ b/ocaml/xenopsd/test/test_topology.ml @@ -210,18 +210,20 @@ let test_allocate ?(mem = default_mem) (expected_cores, h) ~vms () = match Softaffinity.plan h nodes ~vm with | None -> Alcotest.fail "No NUMA plan" - | Some plan -> - D.debug "NUMA allocation succeeded for VM %d: %s" i - (Fmt.to_to_string CPUSet.pp_dump plan) ; + | Some (cpu_plan, mem_plan) -> + D.debug + "NUMA allocation succeeded for VM %d: [CPUS: %s]; [nodes: %s]" i + (Fmt.to_to_string CPUSet.pp_dump cpu_plan) + (Fmt.to_to_string Fmt.(Dump.list NUMA.pp_dump_node) mem_plan) ; let usednodes = - plan + cpu_plan |> CPUSet.elements |> List.map (NUMA.node_of_cpu h) |> List.sort_uniq compare |> List.to_seq in let costs_numa_aware = - vm_access_costs h plans (vm_cores, usednodes, plan) + vm_access_costs h plans (vm_cores, usednodes, cpu_plan) in let costs_default = vm_access_costs h plans (vm_cores, NUMA.nodes h, NUMA.all_cpus h) @@ -229,7 +231,7 @@ let test_allocate ?(mem = default_mem) (expected_cores, h) ~vms () = cost_not_worse ~default:costs_default costs_numa_aware ; ( costs_default :: costs_old , costs_numa_aware :: costs_new - , ((vm_cores, List.of_seq usednodes), plan) :: plans + , ((vm_cores, List.of_seq usednodes), cpu_plan) :: plans ) ) ([], [], []) diff --git a/ocaml/xenopsd/xc/device.ml b/ocaml/xenopsd/xc/device.ml index 22514697509..f7b11e18d25 100644 --- a/ocaml/xenopsd/xc/device.ml +++ b/ocaml/xenopsd/xc/device.ml @@ -1215,30 +1215,6 @@ module PCI = struct |> String.trim |> int_of_string in - if hvm && qmp_add then - if Service.Qemu.is_running ~xs domid then - let id = - Printf.sprintf "pci-pt-%02x_%02x.%01x" host.bus host.dev host.fn - in - let _qmp_result = - qmp_send_cmd domid - (Qmp.Device_add - { - driver= "xen-pci-passthrough" - ; device= - Qmp.Device.PCI - { - id - ; devfn - ; hostaddr= string_of_address host - ; permissive= false - } - } - ) - in - () - else - raise (Domain_not_running (host, domid)) ; let addresses = sysfs_pci_dev ^ string_of_address host ^ "/resource" |> Unixext.string_of_file @@ -1264,15 +1240,39 @@ module PCI = struct in Xenctrl.domain_iomem_permission xc domid scan_start scan_size true in - List.iteri apply_io_permission addresses ; let xcext = Xenctrlext.get_handle () in + ignore (quarantine host) ; + Xenctrlext.assign_device xcext domid (encode_bdf host) + _xen_domctl_dev_rdm_relaxed ; + List.iteri apply_io_permission addresses ; ( if irq > 0 then Xenctrlext.physdev_map_pirq xcext domid irq |> fun x -> Xenctrl.domain_irq_permission xc domid x true ) ; - ignore (quarantine host) ; - Xenctrlext.assign_device xcext domid (encode_bdf host) - _xen_domctl_dev_rdm_relaxed + if hvm && qmp_add then + if Service.Qemu.is_running ~xs domid then + let id = + Printf.sprintf "pci-pt-%02x_%02x.%01x" host.bus host.dev host.fn + in + let _qmp_result = + qmp_send_cmd domid + (Qmp.Device_add + { + driver= "xen-pci-passthrough" + ; device= + Qmp.Device.PCI + { + id + ; devfn + ; hostaddr= string_of_address host + ; permissive= false + } + } + ) + in + () + else + raise (Domain_not_running (host, domid)) let add ~xc ~xs ~hvm pcidevs domid = let host_addr {host; guest= _; _} = host in diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index 07b1957db8c..4af94d7b96c 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -153,7 +153,7 @@ type build_info = { ; kernel: string (** in hvm case, point to hvmloader *) ; vcpus: int (** vcpus max *) ; priv: builder_spec_info - ; has_hard_affinity: bool [@default false] + ; hard_affinity: int list list [@default []] } [@@deriving rpcty] @@ -204,8 +204,6 @@ let assert_file_is_readable filename = error "Cannot read file %s" filename ; raise (Could_not_read_file filename) -let maybe f = function None -> () | Some x -> f x - (* Recursively iterate over a directory and all its children, calling fn for each *) let rec xenstore_iter t fn path = @@ -271,7 +269,8 @@ let wait_xen_free_mem ~xc ?(maximum_wait_time_seconds = 64) required_memory_kib in wait 0 -let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept = +let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept + num_of_vbds num_of_vifs = let open Xenctrl in let host_info = Xenctrl.physinfo xc in @@ -387,12 +386,80 @@ let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept = ; max_evtchn_port= -1 ; max_grant_frames= ( try int_of_string (List.assoc "max_grant_frames" vm_info.platformdata) - with _ -> 64 + with _ -> + let max_per_vif = 8 in + (* 1 VIF takes up (256 rx entries + 256 tx entries) * 8 queues max + * 8 bytes per grant table entry / 4096 bytes size of frame *) + let reasonable_per_vbd = 1 in + (* (1 ring (itself taking up one granted page) + 1 ring * + 32 requests * 11 grant refs contained in each * 8 bytes ) / + 4096 bytes size of frame = 0.6875, rounded up *) + let frames_number = + max 64 + ((max_per_vif * (num_of_vifs + 1)) + + (reasonable_per_vbd * (num_of_vbds + 1)) + ) + in + debug "estimated max_grant_frames = %d" frames_number ; + frames_number + (* max_per_vif * (num_of_vifs + 1 hotplugged future one) + + max_per_vbd * (num_of_vbds + 1 hotplugged future one) *) + + (* NOTE: While the VIF calculation is precise, the VBD one is a + very rough approximation of a reasonable value of + RING_SIZE * MAX_SEGMENTS_PER_REQUEST + PAGES_FOR_RING_ITSELF + The following points should allow for a rough understanding + of the scale of the problem of better estimation: + + 1) The blkfront driver can consume different numbers of grant + pages depending on the features advertised by the back driver + (and negotiated with it). These features can differ per VBD, and + right now aren't even known at the time of domain creation. + These include: + 1.1) indirect segments - these contain + BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST grants at most, and each + of these frames contains GRANTS_PER_INDIRECT_FRAME grants in + turn (stored in blkif_request_segment). + In practice, this means a catastrophic explosion - we should + not really aim to detect if indirect requests feature is on, + but turn it off to get reasonable estimates. + 1.2) persistent grants - these are an optimization, so + shouldn't really change the calculations, worst case is none + of the grants are persistent. + 1.3) multi-page rings - these change the RING_SIZE, but not in + a trivial manner (see ring-page-order) + 1.4) multi-queue - these change the number of rings, adding + another multiplier. + 2) The "8 bytes" multiplier for a grant table entry only applies + to grants_v1. v2 grants take up 16 bytes per entry. And it's + impossible to detect this feature at the moment. + 3) A dynamically-sized grant table itself could be a solution? + Used to exist before, caused a lot of XSAs, hard to get right. + 4) Drivers might need to be more explicitly limited in how many + pages they can consume + 5) VBD backdriver's features should be managed by XAPI on the + object itself and (their max bound) known at the time of domain + creation. + + So for this estimate, there is only 1 ring which is 1 page, with + 32 entries, each entry (request) can have up to 11 pages + (excluding indirect pages and other complications). + + SEE: xen-blkfront.c, blkif.h, and the backdriver to understand + the process of negotiation (visible in xenstore, in kernel + module parameters in the sys filesystem afterwards) + *) ) ; max_maptrack_frames= ( try int_of_string (List.assoc "max_maptrack_frames" vm_info.platformdata) - with _ -> 1024 + with _ -> + 0 + (* This should be >0 only for driver domains (Dom0 startup is not + handled by the toolstack), which currently do not exist. + To support these in the future, xenopsd would need to check what + type of domain is being started. + *) ) ; max_grant_version= (if List.mem CAP_Gnttab_v2 host_info.capabilities then 2 else 1) @@ -503,6 +570,9 @@ let make ~xc ~xs vm_info vcpus domain_config uuid final_uuid no_sharept = xs.Xs.writev (dom_path ^ "/bios-strings") vm_info.bios_strings ; if vm_info.is_uefi then xs.Xs.write (dom_path ^ "/hvmloader/bios") "ovmf" ; + xs.Xs.write + (dom_path ^ "/hvmloader/pci/xen-platform-pci-bar-uc") + (if !Xenopsd.xen_platform_pci_bar_uc then "1" else "0") ; (* If a toolstack sees a domain which it should own in this state then the domain is not completely setup and should be shutdown. *) xs.Xs.write (dom_path ^ "/action-request") "poweroff" ; @@ -859,47 +929,78 @@ let numa_init () = ) mem -let numa_placement domid ~vcpus ~memory = +let set_affinity = function + | Xenops_server.Hard -> + Xenctrlext.vcpu_setaffinity_hard + | Xenops_server.Soft -> + Xenctrlext.vcpu_setaffinity_soft + +let numa_placement domid ~vcpus ~memory affinity = let open Xenctrlext in let open Topology in - let hint = - with_lock numa_mutex (fun () -> - let ( let* ) = Option.bind in - let xcext = get_handle () in - let* host = Lazy.force numa_hierarchy in - let numa_meminfo = (numainfo xcext).memory |> Array.to_list in - let nodes = - ListLabels.map2 - (NUMA.nodes host |> List.of_seq) - numa_meminfo - ~f:(fun node m -> NUMA.resource host node ~memory:m.memfree) - in - let vm = NUMARequest.make ~memory ~vcpus in - let nodea = - match !numa_resources with - | None -> - Array.of_list nodes - | Some a -> - Array.map2 NUMAResource.min_memory (Array.of_list nodes) a - in - numa_resources := Some nodea ; - Softaffinity.plan ~vm host nodea - ) - in - let xcext = get_handle () in - ( match hint with - | None -> - D.debug "NUMA-aware placement failed for domid %d" domid - | Some soft_affinity -> - let cpua = CPUSet.to_mask soft_affinity in - for i = 0 to vcpus - 1 do - Xenctrlext.vcpu_setaffinity_soft xcext domid i cpua - done - ) ; - let nr_pages = Int64.div memory 4096L |> Int64.to_int in - Xenctrlext.domain_claim_pages xcext domid nr_pages + with_lock numa_mutex (fun () -> + let ( let* ) = Option.bind in + let xcext = get_handle () in + let* host = Lazy.force numa_hierarchy in + let numa_meminfo = (numainfo xcext).memory |> Array.to_list in + let nodes = + ListLabels.map2 + (NUMA.nodes host |> List.of_seq) + numa_meminfo + ~f:(fun node m -> NUMA.resource host node ~memory:m.memfree) + in + let vm = NUMARequest.make ~memory ~vcpus in + let nodea = + match !numa_resources with + | None -> + Array.of_list nodes + | Some a -> + Array.map2 NUMAResource.min_memory (Array.of_list nodes) a + in + numa_resources := Some nodea ; + let memory_plan = + match Softaffinity.plan ~vm host nodea with + | None -> + D.debug "NUMA-aware placement failed for domid %d" domid ; + [] + | Some (cpu_affinity, mem_plan) -> + let cpus = CPUSet.to_mask cpu_affinity in + for i = 0 to vcpus - 1 do + set_affinity affinity xcext domid i cpus + done ; + mem_plan + in + (* Xen only allows a single node when using memory claims, or none at all. *) + let* numa_node, node = + match memory_plan with + | [Node node] -> + Some (Xenctrlext.NumaNode.from node, node) + | [] | _ :: _ :: _ -> + D.debug + "%s: domain %d can't fit a single NUMA node, falling back to \ + default behaviour" + __FUNCTION__ domid ; + None + in + let nr_pages = Int64.div memory 4096L |> Int64.to_int in + try + Xenctrlext.domain_claim_pages xcext domid ~numa_node nr_pages ; + Some (node, memory) + with + | Xenctrlext.Not_available -> + (* Xen does not provide the interface to claim pages from a single NUMA + node, ignore the error and continue. *) + None + | Xenctrlext.Unix_error (errno, _) -> + D.info + "%s: unable to claim enough memory, domain %d won't be hosted in a \ + single NUMA node. (error %s)" + __FUNCTION__ domid + Unix.(error_message errno) ; + None + ) -let build_pre ~xc ~xs ~vcpus ~memory ~has_hard_affinity domid = +let build_pre ~xc ~xs ~vcpus ~memory ~hard_affinity domid = let open Memory in let uuid = get_uuid ~xc domid in debug "VM = %s; domid = %d; waiting for %Ld MiB of free host memory" @@ -921,7 +1022,7 @@ let build_pre ~xc ~xs ~vcpus ~memory ~has_hard_affinity domid = let timer_mode = int_platform_flag "timer_mode" in let log_reraise call_str f = debug "VM = %s; domid = %d; %s" (Uuidx.to_string uuid) domid call_str ; - try ignore (f ()) + try f () with e -> let bt = Printexc.get_backtrace () in debug "Backtrace: %s" bt ; @@ -931,7 +1032,7 @@ let build_pre ~xc ~xs ~vcpus ~memory ~has_hard_affinity domid = error "VM = %s; domid = %d; %s" (Uuidx.to_string uuid) domid err_msg ; raise (Domain_build_pre_failed err_msg) in - maybe + Option.iter (fun mode -> log_reraise (Printf.sprintf "domain_set_timer_mode %d" mode) (fun () -> let xcext = Xenctrlext.get_handle () in @@ -951,42 +1052,82 @@ let build_pre ~xc ~xs ~vcpus ~memory ~has_hard_affinity domid = log_reraise (Printf.sprintf "shadow_allocation_set %d MiB" shadow_mib) (fun () -> Xenctrl.shadow_allocation_set xc domid shadow_mib ) ; - let () = + let apply_hard_vcpu_map () = + let xcext = Xenctrlext.get_handle () in + let pcpus = Xenctrlext.get_max_nr_cpus xcext in + let bitmap cpus : bool array = + (* convert a mask into a boolean array, one element per pCPU *) + let cpus = List.filter (fun x -> x >= 0 && x < pcpus) cpus in + let result = Array.init pcpus (fun _ -> false) in + List.iter (fun cpu -> result.(cpu) <- true) cpus ; + result + in + ( match hard_affinity with + | [] -> + [] + | m :: ms -> + (* Treat the first as the template for the rest *) + let all_vcpus = List.init vcpus Fun.id in + let defaults = List.map (fun _ -> m) all_vcpus in + Xapi_stdext_std.Listext.List.take vcpus ((m :: ms) @ defaults) + ) + |> List.iteri (fun vcpu mask -> + Xenctrlext.vcpu_setaffinity_hard xcext domid vcpu (bitmap mask) + ) + in + apply_hard_vcpu_map () ; + let node_placement = match !Xenops_server.numa_placement with | Any -> - () - | Best_effort -> + None + | (Best_effort | Best_effort_hard) as pin -> log_reraise (Printf.sprintf "NUMA placement") (fun () -> - if has_hard_affinity then - D.debug "VM has hard affinity set, skipping NUMA optimization" - else + if hard_affinity <> [] then ( + D.debug "VM has hard affinity set, skipping NUMA optimization" ; + None + ) else + let affinity = + Xenops_server.affinity_of_numa_affinity_policy pin + in numa_placement domid ~vcpus ~memory:(Int64.mul memory.xen_max_mib 1048576L) + affinity + |> Option.map fst ) in - create_channels ~xc uuid domid + let store_chan, console_chan = create_channels ~xc uuid domid in + (store_chan, console_chan, node_placement) + +let args_numa_placements numa_placement = + Option.fold ~none:[] + ~some:(fun node -> ["-mem_pnode"; Printf.sprintf "%d" node]) + numa_placement let xenguest_args_base ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory = + ~console_domid ~memory ~numa_placement = [ - "-domid" - ; string_of_int domid - ; "-store_port" - ; string_of_int store_port - ; "-store_domid" - ; string_of_int store_domid - ; "-console_port" - ; string_of_int console_port - ; "-console_domid" - ; string_of_int console_domid - ; "-mem_max_mib" - ; Int64.to_string memory.Memory.build_max_mib - ; "-mem_start_mib" - ; Int64.to_string memory.Memory.build_start_mib + [ + "-domid" + ; string_of_int domid + ; "-store_port" + ; string_of_int store_port + ; "-store_domid" + ; string_of_int store_domid + ; "-console_port" + ; string_of_int console_port + ; "-console_domid" + ; string_of_int console_domid + ; "-mem_max_mib" + ; Int64.to_string memory.Memory.build_max_mib + ; "-mem_start_mib" + ; Int64.to_string memory.Memory.build_start_mib + ] + ; args_numa_placements numa_placement ] + |> List.concat let xenguest_args_hvm ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory ~kernel ~vgpus = + ~console_domid ~memory ~kernel ~vgpus ~numa_placement = ["-mode"; "hvm_build"; "-image"; kernel] @ (vgpus |> function | Xenops_interface.Vgpu.{implementation= Nvidia _; _} :: _ -> @@ -995,10 +1136,10 @@ let xenguest_args_hvm ~domid ~store_port ~store_domid ~console_port [] ) @ xenguest_args_base ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory + ~console_domid ~memory ~numa_placement let xenguest_args_pv ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory ~kernel ~cmdline ~ramdisk = + ~console_domid ~memory ~kernel ~cmdline ~ramdisk ~numa_placement = [ "-mode" ; "linux_build" @@ -1014,10 +1155,10 @@ let xenguest_args_pv ~domid ~store_port ~store_domid ~console_port ; "0" ] @ xenguest_args_base ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory + ~console_domid ~memory ~numa_placement let xenguest_args_pvh ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory ~kernel ~cmdline ~modules = + ~console_domid ~memory ~kernel ~cmdline ~modules ~numa_placement = let module_args = List.concat_map (fun (m, c) -> @@ -1039,7 +1180,7 @@ let xenguest_args_pvh ~domid ~store_port ~store_domid ~console_port ] @ module_args @ xenguest_args_base ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory + ~console_domid ~memory ~numa_placement let xenguest task xenguest_path domid uuid args = let line = @@ -1073,8 +1214,8 @@ let correct_shadow_allocation xc domid uuid shadow_mib = ) (* puts value in store after the domain build succeed *) -let build_post ~xc ~xs ~vcpus:_ ~static_max_mib ~target_mib domid domain_type - store_mfn store_port ents vments = +let build_post ~xc ~xs ~static_max_mib ~target_mib domid domain_type store_mfn + store_port ents vments = let uuid = get_uuid ~xc domid in let dom_path = xs.Xs.getdomainpath domid in (* Unit conversion. *) @@ -1118,7 +1259,7 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid let target_kib = info.memory_target in let vcpus = info.vcpus in let kernel = info.kernel in - let has_hard_affinity = info.has_hard_affinity in + let hard_affinity = info.hard_affinity in let force_arg = if force then ["--force"] else [] in assert_file_is_readable kernel ; (* Convert memory configuration values into the correct units. *) @@ -1136,13 +1277,13 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid shadow_multiplier in maybe_ca_140252_workaround ~xc ~vcpus domid ; - let store_port, console_port = - build_pre ~xc ~xs ~memory ~vcpus ~has_hard_affinity domid + let store_port, console_port, numa_placement = + build_pre ~xc ~xs ~memory ~vcpus ~hard_affinity domid in let store_mfn, console_mfn = let args = xenguest_args_hvm ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory ~kernel ~vgpus + ~console_domid ~memory ~kernel ~vgpus ~numa_placement @ force_arg @ extras in @@ -1163,15 +1304,15 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid Memory.Linux.full_config static_max_mib video_mib target_mib vcpus shadow_multiplier in - maybe assert_file_is_readable pvinfo.ramdisk ; - let store_port, console_port = - build_pre ~xc ~xs ~memory ~vcpus ~has_hard_affinity domid + Option.iter assert_file_is_readable pvinfo.ramdisk ; + let store_port, console_port, numa_placement = + build_pre ~xc ~xs ~memory ~vcpus ~hard_affinity domid in let store_mfn, console_mfn = let args = xenguest_args_pv ~domid ~store_port ~store_domid ~console_port ~console_domid ~memory ~kernel ~cmdline:pvinfo.cmdline - ~ramdisk:pvinfo.ramdisk + ~ramdisk:pvinfo.ramdisk ~numa_placement @ force_arg @ extras in @@ -1187,13 +1328,13 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid shadow_multiplier in maybe_ca_140252_workaround ~xc ~vcpus domid ; - let store_port, console_port = - build_pre ~xc ~xs ~memory ~vcpus ~has_hard_affinity domid + let store_port, console_port, numa_placement = + build_pre ~xc ~xs ~memory ~vcpus ~hard_affinity domid in let store_mfn, console_mfn = let args = xenguest_args_pvh ~domid ~store_port ~store_domid ~console_port - ~console_domid ~memory ~kernel ~cmdline ~modules + ~console_domid ~memory ~kernel ~cmdline ~modules ~numa_placement @ force_arg @ extras in @@ -1209,8 +1350,8 @@ let build (task : Xenops_task.task_handle) ~xc ~xs ~store_domid ~console_domid ) in let local_stuff = console_keys console_port console_mfn in - build_post ~xc ~xs ~vcpus ~target_mib ~static_max_mib domid domain_type - store_mfn store_port local_stuff vm_stuff + build_post ~xc ~xs ~target_mib ~static_max_mib domid domain_type store_mfn + store_port local_stuff vm_stuff type suspend_flag = Live | Debug @@ -1223,8 +1364,8 @@ let dm_flags = [] let with_emu_manager_restore (task : Xenops_task.task_handle) ~domain_type - ~(dm : Device.Profile.t) ~store_port ~console_port ~extras manager_path - domid _uuid main_fd vgpu_fd f = + ~(dm : Device.Profile.t) ~store_port ~console_port ~extras ~numa_placements + manager_path domid _uuid main_fd vgpu_fd f = let mode = match domain_type with `hvm | `pvh -> "hvm_restore" | `pv -> "restore" in @@ -1242,20 +1383,24 @@ let with_emu_manager_restore (task : Xenops_task.task_handle) ~domain_type let fds = [(fd_uuid, main_fd)] @ vgpu_args in let args = [ - "-mode" - ; mode - ; "-domid" - ; string_of_int domid - ; "-fd" - ; fd_uuid - ; "-store_port" - ; string_of_int store_port - ; "-console_port" - ; string_of_int console_port + [ + "-mode" + ; mode + ; "-domid" + ; string_of_int domid + ; "-fd" + ; fd_uuid + ; "-store_port" + ; string_of_int store_port + ; "-console_port" + ; string_of_int console_port + ] + ; args_numa_placements numa_placements + ; dm_flags dm + ; extras + ; vgpu_cmdline ] - @ dm_flags dm - @ extras - @ vgpu_cmdline + |> List.concat in Emu_manager.with_connection task manager_path args fds f @@ -1307,9 +1452,8 @@ let consume_qemu_record fd limit domid uuid = (fun () -> Unix.close fd2) let restore_common (task : Xenops_task.task_handle) ~xc ~xs - ~(dm : Device.Profile.t) ~domain_type ~store_port ~store_domid:_ - ~console_port ~console_domid:_ ~no_incr_generationid:_ ~vcpus:_ ~extras - ~vtpm manager_path domid main_fd vgpu_fd = + ~(dm : Device.Profile.t) ~domain_type ~store_port ~console_port ~extras + ~vtpm ~numa_placements manager_path domid main_fd vgpu_fd = let module DD = Debug.Make (struct let name = "mig64" end) in let open DD in let uuid = get_uuid ~xc domid in @@ -1322,8 +1466,8 @@ let restore_common (task : Xenops_task.task_handle) ~xc ~xs match with_conversion_script task "Emu_manager" hvm main_fd (fun pipe_r -> with_emu_manager_restore task ~domain_type ~dm ~store_port - ~console_port ~extras manager_path domid uuid pipe_r vgpu_fd - (fun cnx -> restore_libxc_record cnx domid uuid + ~console_port ~extras ~numa_placements manager_path domid uuid + pipe_r vgpu_fd (fun cnx -> restore_libxc_record cnx domid uuid ) ) with @@ -1362,7 +1506,8 @@ let restore_common (task : Xenops_task.task_handle) ~xc ~xs [main_fd] in with_emu_manager_restore task ~domain_type ~dm ~store_port ~console_port - ~extras manager_path domid uuid main_fd vgpu_fd (fun cnx -> + ~extras ~numa_placements manager_path domid uuid main_fd vgpu_fd + (fun cnx -> (* Maintain a list of results returned by emu-manager that are expected by the reader threads. Contains the emu for which a result is wanted plus an event channel for waking up the reader once the @@ -1573,9 +1718,8 @@ let restore_common (task : Xenops_task.task_handle) ~xc ~xs (Uuidx.to_string uuid) domid e ; raise Suspend_image_failure -let restore (task : Xenops_task.task_handle) ~xc ~xs ~dm ~store_domid - ~console_domid ~no_incr_generationid ~timeoffset ~extras info ~manager_path - ~vtpm domid fd vgpu_fd = +let restore (task : Xenops_task.task_handle) ~xc ~xs ~dm ~timeoffset ~extras + info ~manager_path ~vtpm domid fd vgpu_fd = let static_max_kib = info.memory_max in let target_kib = info.memory_target in let vcpus = info.vcpus in @@ -1616,21 +1760,19 @@ let restore (task : Xenops_task.task_handle) ~xc ~xs ~dm ~store_domid maybe_ca_140252_workaround ~xc ~vcpus domid ; (memory, vm_stuff, `pvh) in - let store_port, console_port = - build_pre ~xc ~xs ~memory ~vcpus ~has_hard_affinity:info.has_hard_affinity - domid + let store_port, console_port, numa_placements = + build_pre ~xc ~xs ~memory ~vcpus ~hard_affinity:info.hard_affinity domid in let store_mfn, console_mfn = - restore_common task ~xc ~xs ~dm ~domain_type ~store_port ~store_domid - ~console_port ~console_domid ~no_incr_generationid ~vcpus ~extras ~vtpm - manager_path domid fd vgpu_fd + restore_common task ~xc ~xs ~dm ~domain_type ~store_port ~console_port + ~extras ~vtpm ~numa_placements manager_path domid fd vgpu_fd in let local_stuff = console_keys console_port console_mfn in (* And finish domain's building *) - build_post ~xc ~xs ~vcpus ~target_mib ~static_max_mib domid domain_type - store_mfn store_port local_stuff vm_stuff + build_post ~xc ~xs ~target_mib ~static_max_mib domid domain_type store_mfn + store_port local_stuff vm_stuff -let suspend_emu_manager ~(task : Xenops_task.task_handle) ~xc:_ ~xs ~domain_type +let suspend_emu_manager ~(task : Xenops_task.task_handle) ~xs ~domain_type ~is_uefi ~vtpm ~dm ~manager_path ~domid ~uuid ~main_fd ~vgpu_fd ~flags ~progress_callback ~qemu_domid ~do_suspend_callback = let open Suspend_image in @@ -1842,9 +1984,9 @@ let suspend (task : Xenops_task.task_handle) ~xc ~xs ~domain_type ~is_uefi ~dm write_header main_fd (Xenops, Int64.of_int xenops_rec_len) >>= fun () -> debug "Writing Xenops record contents" ; Io.write main_fd xenops_record ; - suspend_emu_manager ~task ~xc ~xs ~domain_type ~is_uefi ~vtpm ~dm - ~manager_path ~domid ~uuid ~main_fd ~vgpu_fd ~flags ~progress_callback - ~qemu_domid ~do_suspend_callback + suspend_emu_manager ~task ~xs ~domain_type ~is_uefi ~vtpm ~dm ~manager_path + ~domid ~uuid ~main_fd ~vgpu_fd ~flags ~progress_callback ~qemu_domid + ~do_suspend_callback >>= fun () -> ( if is_uefi then write_varstored_record task ~xs domid main_fd >>= fun () -> diff --git a/ocaml/xenopsd/xc/domain.mli b/ocaml/xenopsd/xc/domain.mli index c8f83b0994a..40f154561a3 100644 --- a/ocaml/xenopsd/xc/domain.mli +++ b/ocaml/xenopsd/xc/domain.mli @@ -133,7 +133,7 @@ type build_info = { ; kernel: string (** image to load. In HVM case, point to hvmloader *) ; vcpus: int (** vcpus max *) ; priv: builder_spec_info - ; has_hard_affinity: bool + ; hard_affinity: int list list (** vcpu -> pcpu map *) } val typ_of_build_info : build_info Rpc.Types.typ @@ -149,6 +149,8 @@ val make : -> [`VM] Uuidx.t -> string option -> bool (* no_sharept *) + -> int (* num_of_vbds *) + -> int (* num_of_vifs *) -> domid (** Create a fresh (empty) domain with a specific UUID, returning the domain ID *) @@ -245,9 +247,6 @@ val restore : -> xc:Xenctrl.handle -> xs:Ezxenstore_core.Xenstore.Xs.xsh -> dm:Device.Profile.t - -> store_domid:int - -> console_domid:int - -> no_incr_generationid:bool -> timeoffset:string -> extras:string list -> build_info diff --git a/ocaml/xenopsd/xc/dune b/ocaml/xenopsd/xc/dune index f04f082d086..1bf73af404f 100644 --- a/ocaml/xenopsd/xc/dune +++ b/ocaml/xenopsd/xc/dune @@ -2,6 +2,7 @@ (name xenopsd_xc) (modes best) (modules :standard \ + numa xenops_xc_main memory_breakdown memory_summary @@ -68,13 +69,18 @@ ) (wrapped false) ) + +(executable + (name numa) + (modules numa) + (libraries fmt logs logs.fmt mtime mtime.clock threads.posix xenctrl xenopsd_xc) +) + (executable (name xenops_xc_main) (modes exe) (modules xenops_xc_main) - (libraries - ezxenstore.core uuid xapi-idl @@ -95,7 +101,7 @@ (libraries astring cmdliner - + ezxenstore.core uuid xapi-idl.memory @@ -112,13 +118,13 @@ (section sbin) (package xapi-tools) ) - + (executable (name memory_summary) (modes exe) (modules memory_summary) (libraries - + clock xapi-stdext-unix xapi_xenopsd @@ -143,7 +149,7 @@ (modules cancel_utils_test) (libraries cmdliner - + ezxenstore.core threads.posix xapi-idl.xen.interface diff --git a/ocaml/xenopsd/xc/mem_stats.ml b/ocaml/xenopsd/xc/mem_stats.ml index 8daca47aff6..12353e56c1c 100644 --- a/ocaml/xenopsd/xc/mem_stats.ml +++ b/ocaml/xenopsd/xc/mem_stats.ml @@ -325,7 +325,7 @@ let generate_stats_exn () = let generate_stats () = try generate_stats_exn () with e -> - D.log_backtrace () ; + D.log_backtrace e ; D.debug "Failed to generate stats: %s" (Printexc.to_string e) ; [] diff --git a/ocaml/xenopsd/xc/numa.ml b/ocaml/xenopsd/xc/numa.ml new file mode 100644 index 00000000000..99f6473e9e4 --- /dev/null +++ b/ocaml/xenopsd/xc/numa.ml @@ -0,0 +1,176 @@ +(* Copyright (C) 2025 Cloud Software Group + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; version 2.1 only. with the special + exception on linking described in file LICENSE. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. +*) + +(* Monitoring loop that keeps track of per-numa-node memory changes, and prints + the change. Useful to see whether memory scrubbing is seen as used or free + memory by userspace *) +open! Xenctrlext + +let ( let@ ) f x = f x + +let stamp_tag : Mtime.span Logs.Tag.def = + Logs.Tag.def "stamp" ~doc:"Relative monotonic time stamp" Mtime.Span.pp + +let stamp c = Logs.Tag.(empty |> add stamp_tag (Mtime_clock.count c)) + +let xc = get_handle () + +let binary_prefixes = [""; "Ki"; "Mi"; "Gi"; "Ti"; "Pi"] + +let human_readable_bytes quantity = + let unit = "Bs" in + let print prefix q = Printf.sprintf "%Ld %s%s" q prefix unit in + let rec loop acc q = function + | [] -> + acc + | pre :: prefs -> + let quotient = Int64.div q 1024L in + let modulus = Int64.rem q 1024L in + let acc = + if Int64.equal modulus 0L then acc else print pre modulus :: acc + in + loop acc quotient prefs + in + if quantity = 0L then + print "" 0L + else + loop [] quantity binary_prefixes |> String.concat ", " + +let get_memory () = + let {memory; _} = numainfo xc in + memory + +let print_mem c mem = + for i = 0 to Array.length mem - 1 do + let {memfree; memsize} = mem.(i) in + let memfree = human_readable_bytes memfree in + let memsize = human_readable_bytes memsize in + Logs.app (fun m -> + m "\t%d: %s free out of %s" i memfree memsize ~tags:(stamp c) + ) + done + +let print_diff_mem before after = + if before > after then + Printf.sprintf "%s 🢆 " (Int64.sub before after |> human_readable_bytes) + else + Printf.sprintf "%s 🢅 " (Int64.sub after before |> human_readable_bytes) + +let diff c old cur = + let changed_yet = ref false in + for i = 0 to Int.min (Array.length old) (Array.length cur) - 1 do + let {memfree= a_free; _}, {memfree= b_free; _} = (old.(i), cur.(i)) in + if a_free <> b_free then ( + if not !changed_yet then + changed_yet := true ; + let free = human_readable_bytes b_free in + let updown = print_diff_mem a_free b_free in + Logs.app (fun m -> + m "\t%d: %s free (%s)" i free updown ~tags:(stamp (c ())) + ) + ) + done ; + !changed_yet + +let reporter ppf = + let report _src level ~over k msgf = + let k _ = over () ; k () in + let with_stamp h tags k ppf fmt = + let stamp = + match tags with + | None -> + None + | Some tags -> + Logs.Tag.find stamp_tag tags + in + let span_pp s = + match s with + | None -> + "0ns" + | Some s -> + Fmt.to_to_string Mtime.Span.pp s + in + Format.kfprintf k ppf + ("%a[%s] @[" ^^ fmt ^^ "@]@.") + Logs.pp_header (level, h) (span_pp stamp) + in + msgf @@ fun ?header ?tags fmt -> with_stamp header tags k ppf fmt + in + {Logs.report} + +let memory_changes () = + let max_time = Mtime.Span.(7 * s) in + + let memory = get_memory () in + let c = Mtime_clock.counter () in + print_mem c memory ; + let rec loop since_started since_changed previous = + let current = get_memory () in + + let since_started = ref since_started in + let timer () = + let last_changed = Mtime_clock.count since_changed in + if Mtime.Span.is_longer last_changed ~than:max_time then + since_started := Mtime_clock.counter () ; + !since_started + in + + let changed = diff timer previous current in + + let since_changed = + if changed then + Mtime_clock.counter () + else + !since_started + in + Unix.sleepf 0.01 ; + loop !since_started since_changed current + in + loop c c memory + +module DomainSet = Set.Make (Int) + +let get_domains xc = + Xenctrl.domain_getinfolist xc 0 + |> List.to_seq + |> Seq.map (function Xenctrl.{domid; _} -> domid) + |> DomainSet.of_seq + +let diff_domains c previous current = + let added = DomainSet.diff current previous in + let removed = DomainSet.diff previous current in + DomainSet.iter + (fun id -> Logs.app (fun m -> m "domain %d added" id ~tags:(stamp c))) + added ; + DomainSet.iter + (fun id -> Logs.app (fun m -> m "domain %d removed" id ~tags:(stamp c))) + removed + +let domain_changes xc = + let domains = get_domains xc in + let c = Mtime_clock.counter () in + let rec loop previous = + let current = get_domains xc in + diff_domains c previous current ; + Unix.sleepf 0.01 ; + loop current + in + loop domains + +let () = + Logs.set_reporter (reporter Format.std_formatter) ; + Logs.set_level (Some Logs.Info) ; + + ignore (Thread.create memory_changes () : Thread.t) ; + let@ xc = Xenctrl.with_intf in + domain_changes xc diff --git a/ocaml/xenopsd/xc/numa.mli b/ocaml/xenopsd/xc/numa.mli new file mode 100644 index 00000000000..e69de29bb2d diff --git a/ocaml/xenopsd/xc/xenctrlext.ml b/ocaml/xenopsd/xc/xenctrlext.ml index 5cea490864a..1c983daae26 100644 --- a/ocaml/xenopsd/xc/xenctrlext.ml +++ b/ocaml/xenopsd/xc/xenctrlext.ml @@ -90,6 +90,9 @@ external domain_soft_reset : handle -> domid -> unit external domain_update_channels : handle -> domid -> int -> int -> unit = "stub_xenctrlext_domain_update_channels" +external vcpu_setaffinity_hard : handle -> domid -> int -> bool array -> unit + = "stub_xenctrlext_vcpu_setaffinity_hard" + external vcpu_setaffinity_soft : handle -> domid -> int -> bool array -> unit = "stub_xenctrlext_vcpu_setaffinity_soft" @@ -109,5 +112,22 @@ external combine_cpu_policies : int64 array -> int64 array -> int64 array external policy_is_compatible : int64 array -> int64 array -> string option = "stub_xenctrlext_featuresets_are_compatible" -external domain_claim_pages : handle -> domid -> int -> unit +external stub_domain_claim_pages : handle -> domid -> int -> int -> unit = "stub_xenctrlext_domain_claim_pages" + +module NumaNode = struct + type t = int + + (** Defined as XC_NUMA_NO_NODE in xen.git/tools/include/xenguest.h, it's an + unsigned int (~0U) *) + let none = 0xFFFFFFFF + + let from = Fun.id +end + +exception Not_available + +let domain_claim_pages handle domid ?(numa_node = NumaNode.none) nr_pages = + if numa_node <> NumaNode.none then + raise Not_available ; + stub_domain_claim_pages handle domid numa_node nr_pages diff --git a/ocaml/xenopsd/xc/xenctrlext.mli b/ocaml/xenopsd/xc/xenctrlext.mli index 2a4632780ce..1572a1a8589 100644 --- a/ocaml/xenopsd/xc/xenctrlext.mli +++ b/ocaml/xenopsd/xc/xenctrlext.mli @@ -78,6 +78,9 @@ type numainfo = {memory: meminfo array; distances: int array array} type cputopo = {core: int; socket: int; node: int} +external vcpu_setaffinity_hard : handle -> domid -> int -> bool array -> unit + = "stub_xenctrlext_vcpu_setaffinity_hard" + external vcpu_setaffinity_soft : handle -> domid -> int -> bool array -> unit = "stub_xenctrlext_vcpu_setaffinity_soft" @@ -91,5 +94,17 @@ external combine_cpu_policies : int64 array -> int64 array -> int64 array external policy_is_compatible : int64 array -> int64 array -> string option = "stub_xenctrlext_featuresets_are_compatible" -external domain_claim_pages : handle -> domid -> int -> unit - = "stub_xenctrlext_domain_claim_pages" +module NumaNode : sig + type t + + val none : t + + val from : int -> t +end + +exception Not_available + +val domain_claim_pages : handle -> domid -> ?numa_node:NumaNode.t -> int -> unit +(** Raises {Unix_error} if there's not enough memory to claim in the system. + Raises {Not_available} if a single numa node is requested and xen does not + provide page claiming for single numa nodes. *) diff --git a/ocaml/xenopsd/xc/xenguestHelper.ml b/ocaml/xenopsd/xc/xenguestHelper.ml index b76fec51c25..06a28d92f33 100644 --- a/ocaml/xenopsd/xc/xenguestHelper.ml +++ b/ocaml/xenopsd/xc/xenguestHelper.ml @@ -200,13 +200,14 @@ let rec non_debug_receive ?(debug_callback = fun s -> debug "%s" s) cnx = (* Dump memory statistics on failure *) let non_debug_receive ?debug_callback cnx = - let debug_memory () = + let debug_memory log_type = Xenctrl.with_intf (fun xc -> let open Memory in let open Int64 in let open Xenctrl in let p = Xenctrl.physinfo xc in - error "Memory F %Ld KiB S %Ld KiB T %Ld MiB" + (match log_type with Syslog.Debug -> debug | _ -> error) + "Memory F %Ld KiB S %Ld KiB T %Ld MiB" (p.free_pages |> of_nativeint |> kib_of_pages) (p.scrub_pages |> of_nativeint |> kib_of_pages) (p.total_pages |> of_nativeint |> mib_of_pages_free) @@ -215,10 +216,18 @@ let non_debug_receive ?debug_callback cnx = try match non_debug_receive ?debug_callback cnx with | Error y as x -> - error "Received: %s" y ; debug_memory () ; x + error "Received: %s" y ; debug_memory Syslog.Err ; x | x -> x - with e -> debug_memory () ; raise e + with + | End_of_file as e -> + Unixext.raise_with_preserved_backtrace e (fun () -> + debug_memory Syslog.Debug + ) + | e -> + Unixext.raise_with_preserved_backtrace e (fun () -> + debug_memory Syslog.Err + ) (** For the simple case where we just want the successful result, return it. If we get an error message (or suspend) then throw an exception. *) diff --git a/ocaml/xenopsd/xc/xenops_server_xen.ml b/ocaml/xenopsd/xc/xenops_server_xen.ml index ba3dd7e2b8a..3b348399a36 100644 --- a/ocaml/xenopsd/xc/xenops_server_xen.ml +++ b/ocaml/xenopsd/xc/xenops_server_xen.ml @@ -51,6 +51,8 @@ let _xenguest = "xenguest" let _emu_manager = "emu-manager" +let ( // ) = Filename.concat + let run cmd args = debug "%s %s" cmd (String.concat " " args) ; fst (Forkhelpers.execute_command_get_output cmd args) @@ -58,20 +60,20 @@ let run cmd args = let choose_alternative kind default platformdata = debug "looking for %s in [ %s ]" kind (String.concat "; " (List.map (fun (k, v) -> k ^ " : " ^ v) platformdata)) ; - if List.mem_assoc kind platformdata then - let x = List.assoc kind platformdata in - let dir = Filename.concat !Xc_resources.alternatives kind in - let available = try Array.to_list (Sys.readdir dir) with _ -> [] in + let path_available x = + let dir = !Xc_resources.alternatives // kind in + let available = try Sys.readdir dir with _ -> [||] in (* If x has been put in the directory (by root) then it's safe to use *) - if List.mem x available then - Filename.concat dir x + if Array.mem x available then + Some (dir // x) else ( error "Invalid platform:%s=%s (check execute permissions of %s)" kind x - (Filename.concat dir x) ; - default + (dir // x) ; + None ) - else - default + in + Option.bind (List.assoc_opt kind platformdata) path_available + |> Option.value ~default (* We allow qemu-dm to be overriden via a platform flag *) let choose_qemu_dm x = @@ -185,6 +187,7 @@ module VmExtra = struct ; pv_drivers_detected: bool [@default false] ; xen_platform: (int * int) option (* (device_id, revision) for QEMU *) ; platformdata: (string * string) list [@default []] + ; attached_vdis: (string * attached_vdi) list [@default []] } [@@deriving rpcty] @@ -409,18 +412,16 @@ module Storage = struct let vm_of_domid = vm_of_domid (* We need to deal with driver domains here: *) - let attach_and_activate ~xc:_ ~xs task vm dp sr vdi read_write = + let attach ~xc:_ ~xs task vm dp sr vdi read_write = let vmdomid = vm_of_domid (domid_of_uuid ~xs (uuid_of_string vm)) in - let result = - attach_and_activate ~task ~_vm:vm ~vmdomid ~dp ~sr ~vdi ~read_write - in + let result = attach ~task ~_vm:vm ~vmdomid ~dp ~sr ~vdi ~read_write in let backend = Xenops_task.with_subtask task (Printf.sprintf "Policy.get_backend_vm %s %s %s" vm (Sr.string_of sr) (Vdi.string_of vdi) ) (transform_exception (fun () -> - Client.Policy.get_backend_vm "attach_and_activate" vm sr vdi + Client.Policy.get_backend_vm "attach" vm sr vdi ) ) in @@ -430,6 +431,10 @@ module Storage = struct | Some domid -> {domid; attach_info= result} + let activate ~xc:_ ~xs task vm dp sr vdi = + let vmdomid = vm_of_domid (domid_of_uuid ~xs (uuid_of_string vm)) in + activate ~task ~_vm:vm ~vmdomid ~dp ~sr ~vdi + let deactivate = deactivate let dp_destroy = dp_destroy @@ -504,10 +509,11 @@ let with_disk ~xc ~xs task disk write f = (fun () -> let frontend_domid = this_domid ~xs in let frontend_vm = get_uuid ~xc frontend_domid |> Uuidx.to_string in - let vdi = - attach_and_activate ~xc ~xs task frontend_vm dp sr vdi write + let attached_vdi = attach ~xc ~xs task frontend_vm dp sr vdi write in + activate ~xc ~xs task frontend_vm dp sr vdi ; + let device = + create_vbd_frontend ~xc ~xs task frontend_domid attached_vdi in - let device = create_vbd_frontend ~xc ~xs task frontend_domid vdi in finally (fun () -> match device with @@ -1281,7 +1287,7 @@ module VM = struct ; kernel= "" ; vcpus= vm.vcpu_max ; priv= builder_spec_info - ; has_hard_affinity= vm.scheduler_params.affinity <> [] + ; hard_affinity= vm.scheduler_params.affinity } in VmExtra. @@ -1297,8 +1303,6 @@ module VM = struct |> rpc_of VmExtra.persistent_t |> Jsonrpc.to_string - let mkints n = List.init n Fun.id - let generate_create_info ~xs:_ vm persistent = let ty = match persistent.VmExtra.ty with Some ty -> ty | None -> vm.ty in let hvm = @@ -1306,38 +1310,6 @@ module VM = struct in (* XXX add per-vcpu information to the platform data *) (* VCPU configuration *) - let xcext = Xenctrlext.get_handle () in - let pcpus = Xenctrlext.get_max_nr_cpus xcext in - let all_pcpus = mkints pcpus in - let all_vcpus = mkints vm.vcpu_max in - let masks = - match vm.scheduler_params.affinity with - | [] -> - (* Every vcpu can run on every pcpu *) - List.map (fun _ -> all_pcpus) all_vcpus - | m :: ms -> - (* Treat the first as the template for the rest *) - let defaults = List.map (fun _ -> m) all_vcpus in - Xapi_stdext_std.Listext.List.take vm.vcpu_max ((m :: ms) @ defaults) - in - (* convert a mask into a binary string, one char per pCPU *) - let bitmap cpus : string = - let cpus = List.filter (fun x -> x >= 0 && x < pcpus) cpus in - let result = Bytes.make pcpus '0' in - List.iter (fun cpu -> Bytes.set result cpu '1') cpus ; - Bytes.unsafe_to_string result - in - let affinity = - snd - (List.fold_left - (fun (idx, acc) mask -> - ( idx + 1 - , (Printf.sprintf "vcpu/%d/affinity" idx, bitmap mask) :: acc - ) - ) - (0, []) masks - ) - in let weight = vm.scheduler_params.priority |> Option.map (fun (w, c) -> @@ -1353,7 +1325,6 @@ module VM = struct (match vm.ty with PVinPVH _ -> vm.vcpu_max | _ -> vm.vcpus) ) ] - @ affinity @ weight in let set_generation_id platformdata = @@ -1418,7 +1389,8 @@ module VM = struct in (device_id, revision) - let create_exn task memory_upper_bound vm final_id no_sharept = + let create_exn task memory_upper_bound vm final_id no_sharept num_of_vbds + num_of_vifs = let k = vm.Vm.id in with_xc_and_xs (fun xc xs -> (* Ensure the DB contains something for this VM - this is to avoid a @@ -1547,7 +1519,8 @@ module VM = struct let create_info = generate_create_info ~xs vm persistent in let domid = Domain.make ~xc ~xs create_info vm.vcpu_max domain_config - (uuid_of_vm vm) final_id no_sharept + (uuid_of_vm vm) final_id no_sharept num_of_vbds + num_of_vifs in Mem.transfer_reservation_to_domain dbg domid reservation_id ; let initial_target = @@ -2034,7 +2007,7 @@ module VM = struct ; kernel ; vcpus= vm.vcpu_max ; priv - ; has_hard_affinity= vm.scheduler_params.affinity <> [] + ; hard_affinity= vm.scheduler_params.affinity } in debug "static_max_mib=%Ld" static_max_mib ; @@ -2313,10 +2286,12 @@ module VM = struct ) (create_device_model_config vm vmextra vbds vifs vgpus vusbs) ; match vm.Vm.ty with - | Vm.PV {vncterm= true; vncterm_ip= ip; _} - | Vm.PVinPVH {vncterm= true; vncterm_ip= ip; _} -> - Service.PV_Vnc.start ~xs ?ip di.Xenctrl.domid - | _ -> + | PV {vncterm; vncterm_ip= ip; _} + | PVH {vncterm; vncterm_ip= ip; _} + | PVinPVH {vncterm; vncterm_ip= ip; _} -> + if vncterm then + Service.PV_Vnc.start ~xs ?ip di.Xenctrl.domid + | HVM _ -> () with Device.Ioemu_failed (name, msg) -> raise (Xenopsd_error (Failed_to_start_emulator (vm.Vm.id, name, msg))) @@ -2513,6 +2488,7 @@ module VM = struct @@ fun () -> pre_suspend_callback task ) ; + with_tracing ~task ~name:"VM_save_request_shutdown" @@ fun () -> if not ( with_tracing ~task @@ -2521,6 +2497,13 @@ module VM = struct ) then raise (Xenopsd_error Failed_to_acknowledge_suspend_request) ; + (* If this is for a migration, record the begin time *) + ( match data with + | FD _ -> + with_tracing ~task ~name:"VM_migrate_downtime_begin" Fun.id + | _ -> + () + ) ; if not ( with_tracing ~task @@ -2645,7 +2628,6 @@ module VM = struct in ({x with Domain.memory_target= initial_target}, timeoffset) in - let no_incr_generationid = false in let vtpm = vtpm_of ~vm in ( try with_data ~xc ~xs task data false @@ fun fd -> @@ -2661,9 +2643,8 @@ module VM = struct None in let manager_path = choose_emu_manager vm.Vm.platformdata in - Domain.restore task ~xc ~xs ~dm:(dm_of ~vm) ~store_domid - ~console_domid ~no_incr_generationid (* XXX progress_callback *) - ~timeoffset ~extras build_info ~manager_path ~vtpm domid fd vgpu_fd + Domain.restore task ~xc ~xs ~dm:(dm_of ~vm) ~timeoffset ~extras + build_info ~manager_path ~vtpm domid fd vgpu_fd with e -> error "VM %s: restore failed: %s" vm.Vm.id (Printexc.to_string e) ; (* As of xen-unstable.hg 779c0ef9682 libxenguest will destroy @@ -2744,9 +2725,10 @@ module VM = struct (fun port -> {Vm.protocol= Vm.Vt100; port; path= ""}) (Device.get_tc_port ~xs di.Xenctrl.domid) in - let local x = - Printf.sprintf "/local/domain/%d/%s" di.Xenctrl.domid x + let root_path = + Printf.sprintf "/local/domain/%d" di.Xenctrl.domid in + let local x = Printf.sprintf "%s/%s" root_path x in let uncooperative = try ignore_string (xs.Xs.read (local "memory/uncooperative")) ; @@ -2849,12 +2831,11 @@ module VM = struct ; ("drivers", None, 0) ; ("data", None, 0) (* in particular avoid data/volumes which contains many entries for each disk *) + ; ("data/service", None, 1) (* data/service//*) ] |> List.fold_left (fun acc (dir, excludes, depth) -> - ls_lR ?excludes ~depth - (Printf.sprintf "/local/domain/%d" di.Xenctrl.domid) - acc dir + ls_lR ?excludes ~depth root_path acc dir ) (quota, []) |> fun (quota, acc) -> @@ -2862,9 +2843,7 @@ module VM = struct in let quota, xsdata_state = Domain.allowed_xsdata_prefixes - |> List.fold_left - (ls_lR (Printf.sprintf "/local/domain/%d" di.Xenctrl.domid)) - (quota, []) + |> List.fold_left (ls_lR root_path) (quota, []) in let path = Device_common.xenops_path_of_domain di.Xenctrl.domid @@ -3528,11 +3507,14 @@ module VBD = struct let vdi_attach_path vbd = Printf.sprintf "/xapi/%s/private/vdis/%s" (fst vbd.id) (snd vbd.id) - let attach_and_activate task xc xs frontend_domid vbd vdi = - let vdi = - match vdi with - | None -> - (* XXX: do something better with CDROMs *) + type attachment_status = Attached of attached_vdi | PathToAttach of string + + (* For vdis that are None or local, return Attached attached_vdi, otherwise return PathToAttach path *) + let attachment_status_of_vdi xs vdi = + match vdi with + | None -> + (* XXX: do something better with CDROMs *) + Attached { domid= this_domid ~xs ; attach_info= @@ -3545,7 +3527,8 @@ module VBD = struct ] } } - | Some (Local path) -> + | Some (Local path) -> + Attached { domid= this_domid ~xs ; attach_info= @@ -3558,17 +3541,34 @@ module VBD = struct ] } } - | Some (VDI path) -> + | Some (VDI path) -> + PathToAttach path + + let attach' task xc xs frontend_domid vbd vdi = + let vdi = + match attachment_status_of_vdi xs vdi with + | Attached attached_vdi -> + attached_vdi + | PathToAttach path -> let sr, vdi = Storage.get_disk_by_name task path in let dp = Storage.id_of (string_of_int frontend_domid) vbd.id in let vm = fst vbd.id in - Storage.attach_and_activate ~xc ~xs task vm dp sr vdi - (vbd.mode = ReadWrite) + Storage.attach ~xc ~xs task vm dp sr vdi (vbd.mode = ReadWrite) in xs.Xs.write (vdi_attach_path vbd) (vdi |> rpc_of attached_vdi |> Jsonrpc.to_string) ; vdi + let activate' task xc xs frontend_domid vbd vdi = + match attachment_status_of_vdi xs vdi with + | Attached _ -> + () + | PathToAttach path -> + let sr, vdi = Storage.get_disk_by_name task path in + let dp = Storage.id_of (string_of_int frontend_domid) vbd.id in + let vm = fst vbd.id in + Storage.activate ~xc ~xs task vm dp sr vdi + let frontend_domid_of_device device = device.Device_common.frontend.Device_common.domid @@ -3647,153 +3647,221 @@ module VBD = struct let vdi_path_of_device ~xs device = Device_common.backend_path_of_device ~xs device ^ "/vdi" - let plug task vm vbd = + let attach task vm vbd = (* If the vbd isn't listed as "active" then we don't automatically plug this - one in *) - if not (get_active vm vbd) then - debug "VBD %s.%s is not active: not plugging into VM" (fst vbd.Vbd.id) - (snd vbd.Vbd.id) - else - on_frontend - (fun xc xs frontend_domid domain_type -> - if vbd.backend = None && domain_type <> Vm.Domain_HVM then - info - "VM = %s; an empty CDROM drive on PV and PVinPVH guests is \ - simulated by unplugging the whole drive" - vm - else - let vdi = - attach_and_activate task xc xs frontend_domid vbd vbd.backend - in - let params, xenstore_data, extra_keys = - params_of_backend vdi.attach_info - in - let new_keys = - List.map (fun (k, v) -> ("sm-data/" ^ k, v)) xenstore_data - @ extra_keys - in - let extra_backend_keys = - List.fold_left - (fun acc (k, v) -> (k, v) :: List.remove_assoc k acc) - vbd.extra_backend_keys new_keys - in - let kind = device_kind_of ~xs vbd in - (* Remember the VBD id with the device *) - let vbd_id = (_device_id kind, id_of vbd) in - (* Remember the VDI with the device (for later deactivation) *) - let vdi_id = - (_vdi_id, vbd.backend |> rpc_of backend |> Jsonrpc.to_string) - in - let dp_id = - (_dp_id, Storage.id_of (string_of_int frontend_domid) vbd.Vbd.id) - in - let x = - { - Device.Vbd.mode= - ( match vbd.mode with - | ReadOnly -> - Device.Vbd.ReadOnly - | ReadWrite -> - Device.Vbd.ReadWrite - ) - ; device_number= vbd.position - ; phystype= Device.Vbd.Phys - ; params - ; dev_type= - ( match vbd.ty with - | CDROM -> - Device.Vbd.CDROM - | Disk -> - Device.Vbd.Disk - | Floppy -> - Device.Vbd.Floppy + one in *) + let attached_vdi = + if not (get_active vm vbd) then ( + debug "VBD %s.%s is not active: not plugging into VM" (fst vbd.Vbd.id) + (snd vbd.Vbd.id) ; + None + ) else + on_frontend + (fun xc xs frontend_domid domain_type -> + if vbd.backend = None && domain_type <> Vm.Domain_HVM then ( + info + "VM = %s; an empty CDROM drive on PV and PVinPVH guests is \ + simulated by unplugging the whole drive" + vm ; + None + ) else + Some (attach' task xc xs frontend_domid vbd vbd.backend) + ) + vm + in + match attached_vdi with + | None -> + () + | Some vdi -> + (* Record the attached_vdi so it can be used in activate *) + let _ = + DB.update_exn vm (fun vm_t -> + Some + VmExtra. + { + persistent= + { + vm_t.VmExtra.persistent with + (* Index by id_of vbd rather than vbd.id as VmExtra is + already indexed by VM id, so the VM id part of + vbd.id is unnecessary and causes issues finding the + attached_vdi when the VM is renamed. *) + attached_vdis= + (id_of vbd, vdi) + :: List.remove_assoc (id_of vbd) + vm_t.persistent.attached_vdis + } + } + ) + in + () + + let cleanup_attached_vdis vm vbd_id = + let _ = + DB.update_exn vm (fun vm_t -> + let remaining_vdis = + List.remove_assoc vbd_id vm_t.persistent.attached_vdis + in + Some + {persistent= {vm_t.persistent with attached_vdis= remaining_vdis}} + ) + in + () + + let activate task vm vbd = + let vmextra = DB.read_exn vm in + match List.assoc_opt (id_of vbd) vmextra.persistent.attached_vdis with + | None -> + debug "No attached_vdi info, so not activating" + | Some vdi -> + finally + (fun () -> + on_frontend + (fun xc xs frontend_domid domain_type -> + activate' task xc xs frontend_domid vbd vbd.backend ; + let params, xenstore_data, extra_keys = + params_of_backend vdi.attach_info + in + let new_keys = + List.map (fun (k, v) -> ("sm-data/" ^ k, v)) xenstore_data + @ extra_keys + in + let extra_backend_keys = + List.fold_left + (fun acc (k, v) -> (k, v) :: List.remove_assoc k acc) + vbd.extra_backend_keys new_keys + in + let kind = device_kind_of ~xs vbd in + (* Remember the VBD id with the device *) + let vbd_id = (_device_id kind, id_of vbd) in + (* Remember the VDI with the device (for later deactivation) *) + let vdi_id = + (_vdi_id, vbd.backend |> rpc_of backend |> Jsonrpc.to_string) + in + let dp_id = + ( _dp_id + , Storage.id_of (string_of_int frontend_domid) vbd.Vbd.id ) - ; unpluggable= vbd.unpluggable - ; protocol= None - ; kind - ; extra_backend_keys - ; extra_private_keys= - dp_id :: vdi_id :: vbd_id :: vbd.extra_private_keys - ; backend_domid= vdi.domid - } - in - let dev = - Xenops_task.with_subtask task - (Printf.sprintf "Vbd.add %s" (id_of vbd)) - (fun () -> - Device.Vbd.add task ~xc ~xs - ~hvm:(domain_type = Vm.Domain_HVM) - x frontend_domid - ) - in - (* We store away the disk so we can implement VBD.stat *) - Option.iter - (fun d -> - xs.Xs.write - (vdi_path_of_device ~xs dev) - (d |> rpc_of disk |> Jsonrpc.to_string) - ) - vbd.backend ; - (* NB now the frontend position has been resolved *) - let open Device_common in - let device_number = - dev.frontend.devid |> Device_number.of_xenstore_key - in - let qemu_domid = this_domid ~xs in - let qemu_frontend = - let maybe_create_vbd_frontend () = - let index = Device_number.disk device_number in - match vbd.Vbd.backend with - | None -> - Some (index, Empty) - | Some _ -> - Some (index, create_vbd_frontend ~xc ~xs task qemu_domid vdi) - in - match (device_number :> Device_number.bus_type * int * int) with - | Ide, n, _ when 0 <= n && n < 4 -> - maybe_create_vbd_frontend () - | Floppy, n, _ when 0 <= n && n < 2 -> - maybe_create_vbd_frontend () - | Ide, n, _ -> - D.warn - "qemu_frontend: Ide supports device numbers between 0 and \ - 3, but got: %i" - n ; - None - | Floppy, n, _ -> - D.warn - "qemu_frontend: Floppy supports device numbers between 0 \ - and 1, but got: %i" - n ; - None - | (Xen | Scsi), _, _ -> - None - in - (* Remember what we've just done *) - (* Dom0 doesn't have a vm_t - we don't need this currently, but when - we have storage driver domains, we will. Also this causes the - SMRT tests to fail, as they demand the loopback VBDs *) - Option.iter - (fun q -> - let _ = - DB.update_exn vm (fun vm_t -> - Some - VmExtra. - { - persistent= + in + let x = + { + Device.Vbd.mode= + ( match vbd.mode with + | ReadOnly -> + Device.Vbd.ReadOnly + | ReadWrite -> + Device.Vbd.ReadWrite + ) + ; device_number= vbd.position + ; phystype= Device.Vbd.Phys + ; params + ; dev_type= + ( match vbd.ty with + | CDROM -> + Device.Vbd.CDROM + | Disk -> + Device.Vbd.Disk + | Floppy -> + Device.Vbd.Floppy + ) + ; unpluggable= vbd.unpluggable + ; protocol= None + ; kind + ; extra_backend_keys + ; extra_private_keys= + dp_id :: vdi_id :: vbd_id :: vbd.extra_private_keys + ; backend_domid= vdi.domid + } + in + let dev = + with_tracing ~task ~name:"VBD_activate_add" @@ fun () -> + Xenops_task.with_subtask task + (Printf.sprintf "Vbd.add %s" (id_of vbd)) + (fun () -> + Device.Vbd.add task ~xc ~xs + ~hvm:(domain_type = Vm.Domain_HVM) + x frontend_domid + ) + in + (* We store away the disk so we can implement VBD.stat *) + ( with_tracing ~task ~name:"VBD_activate_xs_write" @@ fun () -> + Option.iter + (fun d -> + xs.Xs.write + (vdi_path_of_device ~xs dev) + (d |> rpc_of disk |> Jsonrpc.to_string) + ) + vbd.backend + ) ; + with_tracing ~task ~name:"VBD_activate_qemu" @@ fun () -> + (* NB now the frontend position has been resolved *) + let open Device_common in + let device_number = + dev.frontend.devid |> Device_number.of_xenstore_key + in + let qemu_domid = this_domid ~xs in + let qemu_frontend = + let maybe_create_vbd_frontend () = + let index = Device_number.disk device_number in + match vbd.Vbd.backend with + | None -> + Some (index, Empty) + | Some _ -> + Some + ( index + , create_vbd_frontend ~xc ~xs task qemu_domid vdi + ) + in + match + (device_number :> Device_number.bus_type * int * int) + with + | Ide, n, _ when 0 <= n && n < 4 -> + maybe_create_vbd_frontend () + | Floppy, n, _ when 0 <= n && n < 2 -> + maybe_create_vbd_frontend () + | Ide, n, _ -> + D.warn + "qemu_frontend: Ide supports device numbers between 0 \ + and 3, but got: %i" + n ; + None + | Floppy, n, _ -> + D.warn + "qemu_frontend: Floppy supports device numbers between \ + 0 and 1, but got: %i" + n ; + None + | (Xen | Scsi), _, _ -> + None + in + (* Remember what we've just done *) + (* Dom0 doesn't have a vm_t - we don't need this currently, but when + we have storage driver domains, we will. Also this causes the + SMRT tests to fail, as they demand the loopback VBDs *) + Option.iter + (fun q -> + let _ = + DB.update_exn vm (fun vm_t -> + Some + VmExtra. { - vm_t.VmExtra.persistent with - qemu_vbds= - (vbd.Vbd.id, q) :: vm_t.persistent.qemu_vbds + persistent= + { + vm_t.VmExtra.persistent with + qemu_vbds= + (vbd.Vbd.id, q) + :: vm_t.persistent.qemu_vbds + } } - } + ) + in + () ) - in - () + qemu_frontend ) - qemu_frontend - ) - vm + vm + ) + (fun () -> cleanup_attached_vdis vm (id_of vbd)) let unplug task vm vbd force = with_xc_and_xs (fun xc xs -> @@ -3849,6 +3917,7 @@ module VBD = struct (* this happens on normal shutdown too *) (* Case (1): success; Case (2): success; Case (3): an exception is thrown *) + with_tracing ~task ~name:"VBD_device_shutdown" @@ fun () -> Xenops_task.with_subtask task (Printf.sprintf "Vbd.clean_shutdown %s" (id_of vbd)) (fun () -> @@ -3861,14 +3930,17 @@ module VBD = struct the DP if the backend is of type VDI *) finally (fun () -> - Option.iter - (fun dev -> - Xenops_task.with_subtask task - (Printf.sprintf "Vbd.release %s" (id_of vbd)) - (fun () -> Device.Vbd.release task ~xc ~xs dev) - ) - dev ; + with_tracing ~task ~name:"VBD_device_release" (fun () -> + Option.iter + (fun dev -> + Xenops_task.with_subtask task + (Printf.sprintf "Vbd.release %s" (id_of vbd)) + (fun () -> Device.Vbd.release task ~xc ~xs dev) + ) + dev + ) ; (* If we have a qemu frontend, detach this too. *) + with_tracing ~task ~name:"VBD_detach_qemu" @@ fun () -> let _ = DB.update vm (Option.map (fun vm_t -> @@ -3899,6 +3971,7 @@ module VBD = struct () ) (fun () -> + with_tracing ~task ~name:"VBD_dp_destroy" @@ fun () -> match (domid, backend) with | Some x, None | Some x, Some (VDI _) -> Storage.dp_destroy task @@ -3911,18 +3984,182 @@ module VBD = struct raise (Xenopsd_error (Device_detach_rejected ("VBD", id_of vbd, s))) ) + let deactivate task vm vbd force = + with_xc_and_xs (fun xc xs -> + try + (* On destroying the datapath + + 1. if the device has already been shutdown and deactivated (as in + suspend) we must call DP.destroy here to avoid leaks + + 2. if the device is successfully shutdown here then we must call + DP.destroy because no-one else will + + 3. if the device shutdown is rejected then we should leave the DP + alone and rely on the event thread calling us again later. *) + let domid = domid_of_uuid ~xs (uuid_of_string vm) in + (* If the device is gone then we don't need to shut it down but we do + need to free any storage resources. *) + let dev = + try + Some (device_by_id xc xs vm (device_kind_of ~xs vbd) (id_of vbd)) + with + | Xenopsd_error (Does_not_exist (_, _)) -> + debug "VM = %s; VBD = %s; Ignoring missing domain" vm (id_of vbd) ; + None + | Xenopsd_error Device_not_connected -> + debug "VM = %s; VBD = %s; Ignoring missing device" vm (id_of vbd) ; + None + in + let backend = + match dev with + | None -> + None + | Some dv -> ( + match + Rpcmarshal.unmarshal typ_of_backend + (Device.Generic.get_private_key ~xs dv _vdi_id + |> Jsonrpc.of_string + ) + with + | Ok x -> + x + | Error (`Msg m) -> + internal_error "Failed to unmarshal VBD backend: %s" m + ) + in + Option.iter + (fun dev -> + if force && not (Device.can_surprise_remove ~xs dev) then + debug + "VM = %s; VBD = %s; Device is not surprise-removable \ + (ignoring and removing anyway)" + vm (id_of vbd) ; + (* this happens on normal shutdown too *) + (* Case (1): success; Case (2): success; Case (3): an exception is + thrown *) + with_tracing ~task ~name:"VBD_device_shutdown" @@ fun () -> + Xenops_task.with_subtask task + (Printf.sprintf "Vbd.clean_shutdown %s" (id_of vbd)) + (fun () -> + (if force then Device.hard_shutdown else Device.clean_shutdown) + task ~xs dev + ) + ) + dev ; + (* We now have a shutdown device but an active DP: we should destroy + the DP if the backend is of type VDI *) + finally + (fun () -> + with_tracing ~task ~name:"VBD_device_release" (fun () -> + Option.iter + (fun dev -> + Xenops_task.with_subtask task + (Printf.sprintf "Vbd.release %s" (id_of vbd)) + (fun () -> Device.Vbd.release task ~xc ~xs dev) + ) + dev + ) ; + (* If we have a qemu frontend, detach this too. *) + with_tracing ~task ~name:"VBD_detach_qemu" @@ fun () -> + let _ = + DB.update vm + (Option.map (fun vm_t -> + let persistent = vm_t.VmExtra.persistent in + if List.mem_assoc vbd.Vbd.id persistent.VmExtra.qemu_vbds + then ( + let _, qemu_vbd = + List.assoc vbd.Vbd.id persistent.VmExtra.qemu_vbds + in + (* destroy_vbd_frontend ignores 'refusing to close' + transients' *) + destroy_vbd_frontend ~xc ~xs task qemu_vbd ; + VmExtra. + { + persistent= + { + persistent with + qemu_vbds= + List.remove_assoc vbd.Vbd.id + persistent.qemu_vbds + } + } + ) else + vm_t + ) + ) + in + () + ) + (fun () -> + with_tracing ~task ~name:"VBD_deactivate" @@ fun () -> + let vmid = Storage.vm_of_domid domid in + match (domid, backend) with + | Some x, Some (VDI path) -> + let sr, vdi = Storage.get_disk_by_name task path in + let dp = Storage.id_of (string_of_int x) vbd.id in + Storage.deactivate task dp sr vdi vmid + (* We don't need to detach Local or CDROM *) + | _ -> + () + ) + with Device_common.Device_error (_, s) -> + debug "Caught Device_error: %s" s ; + raise (Xenopsd_error (Device_detach_rejected ("VBD", id_of vbd, s))) + ) + + let detach task vm vbd = + with_xc_and_xs (fun xc xs -> + let domid = domid_of_uuid ~xs (uuid_of_string vm) in + let dev = + try + Some (device_by_id xc xs vm (device_kind_of ~xs vbd) (id_of vbd)) + with + | Xenopsd_error (Does_not_exist (_, _)) -> + debug "VM = %s; VBD = %s; Ignoring missing domain" vm (id_of vbd) ; + None + | Xenopsd_error Device_not_connected -> + debug "VM = %s; VBD = %s; Ignoring missing device" vm (id_of vbd) ; + None + in + let backend = + match dev with + | None -> + None + | Some dv -> ( + match + Rpcmarshal.unmarshal typ_of_backend + (Device.Generic.get_private_key ~xs dv _vdi_id + |> Jsonrpc.of_string + ) + with + | Ok x -> + x + | Error (`Msg m) -> + internal_error "Failed to unmarshal VBD backend: %s" m + ) + in + with_tracing ~task ~name:"VBD_dp_destroy" @@ fun () -> + match (domid, backend) with + | Some x, None | Some x, Some (VDI _) -> + Storage.dp_destroy task (Storage.id_of (string_of_int x) vbd.Vbd.id) + | _ -> + () + ) ; + cleanup_attached_vdis vm (id_of vbd) + let insert task vm vbd d = on_frontend (fun xc xs frontend_domid domain_type -> - if domain_type <> Vm.Domain_HVM then - plug task vm {vbd with backend= Some d} - else + if domain_type <> Vm.Domain_HVM then ( + attach task vm {vbd with backend= Some d} ; + activate task vm {vbd with backend= Some d} + ) else let (device : Device_common.device) = device_by_id xc xs vm (device_kind_of ~xs vbd) (id_of vbd) in - let vdi = - attach_and_activate task xc xs frontend_domid vbd (Some d) - in + let vdi = attach' task xc xs frontend_domid vbd (Some d) in + activate' task xc xs frontend_domid vbd (Some d) ; let params, xenstore_data, _ = params_of_backend vdi.attach_info in let phystype = Device.Vbd.Phys in (* We store away the disk so we can implement VBD.stat *) @@ -4823,8 +5060,8 @@ module Actions = struct let open Printf in [ sprintf "/local/domain/%d/attr" domid - ; sprintf "/local/domain/%d/data/updated" domid ; sprintf "/local/domain/%d/data/ts" domid + ; sprintf "/local/domain/%d/data/service" domid ; sprintf "/local/domain/%d/memory/target" domid ; sprintf "/local/domain/%d/memory/uncooperative" domid ; sprintf "/local/domain/%d/console/vnc-port" domid @@ -5146,8 +5383,11 @@ let init () = {Xs_protocol.ACL.owner= 0; other= Xs_protocol.ACL.READ; acl= []} ) ; Device.Backend.init () ; - Xenops_server.numa_placement := + Xenops_server.default_numa_affinity_policy := if !Xenopsd.numa_placement_compat then Best_effort else Any ; + info "Default NUMA affinity policy is '%s'" + Xenops_server.(string_of_numa_affinity_policy !default_numa_affinity_policy) ; + Xenops_server.numa_placement := !Xenops_server.default_numa_affinity_policy ; Domain.numa_init () ; debug "xenstore is responding to requests" ; let () = Watcher.create_watcher_thread () in diff --git a/ocaml/xenopsd/xenopsd.conf b/ocaml/xenopsd/xenopsd.conf index e80194c1f55..e1c3c87c7cb 100644 --- a/ocaml/xenopsd/xenopsd.conf +++ b/ocaml/xenopsd/xenopsd.conf @@ -108,3 +108,11 @@ disable-logging-for=http tracing tracing_export # time to wait for in-guest PV drivers to acknowledge a shutdown request # before we conclude that the drivers have failed # domain_shutdown_ack_timeout = 60 + +# Controls whether, when the VM starts in HVM mode, the Xen PCI MMIO used +# by grant tables is mapped as Uncached (UC, the default) or WriteBack +# (WB, the workaround). WB mapping could improve performance of devices +# using grant tables. This is useful on AMD platform only. +# On Intel a similar effect is already achieved with iPAT in Xen, +# but setting this to 0 works on Intel too. +# xen-platform-pci-bar-uc=false diff --git a/ocaml/xs-trace/dune b/ocaml/xs-trace/dune index e34fc7e5575..4a19b8c888a 100644 --- a/ocaml/xs-trace/dune +++ b/ocaml/xs-trace/dune @@ -1,23 +1,18 @@ (executable - (modes exe) - (name xs_trace) - (public_name xs-trace) - (package xapi-tools) - (libraries - uri - tracing - cmdliner - tracing_export - xapi-stdext-unix - zstd - ) -) + (modes exe) + (name xs_trace) + (public_name xs-trace) + (package xapi-tools) + (libraries uri tracing cmdliner tracing_export yojson xapi-stdext-unix zstd)) (rule - (targets xs-trace.1) - (deps (:exe xs_trace.exe)) - (action (with-stdout-to %{targets} (run %{exe} --help=groff))) -) + (targets xs-trace.1) + (deps + (:exe xs_trace.exe)) + (action + (with-stdout-to + %{targets} + (run %{exe} --help=groff)))) ; not expected by the specfile ;(install diff --git a/ocaml/xs-trace/xs_trace.ml b/ocaml/xs-trace/xs_trace.ml index 6360649fb20..a5f0c8becef 100644 --- a/ocaml/xs-trace/xs_trace.ml +++ b/ocaml/xs-trace/xs_trace.ml @@ -25,10 +25,7 @@ module Exporter = struct | _ -> () - (** Export traces from file system to a remote endpoint. *) - let export erase src dst = - let dst = Uri.of_string dst in - let submit_json = submit_json dst in + let iter_src src f = let rec export_file = function | path when Sys.is_directory path -> (* Recursively export trace files. *) @@ -38,7 +35,7 @@ module Exporter = struct (* Decompress compressed trace file and submit each line iteratively *) let args = [|"zstdcat"; path|] in let ic = Unix.open_process_args_in args.(0) args in - Unixext.lines_iter submit_json ic ; + Unixext.lines_iter f ic ; match Unix.close_process_in ic with | Unix.WEXITED 0 -> () @@ -47,15 +44,27 @@ module Exporter = struct ) | path when Filename.check_suffix path ".ndjson" -> (* Submit traces line by line. *) - Unixext.readfile_line submit_json path + Unixext.readfile_line f path | path -> (* Assume any other extension is a valid JSON file. *) let json = Unixext.string_of_file path in - submit_json json + f json in - export_file src ; + export_file src + + (** Export traces from file system to a remote endpoint. *) + let export erase src dst = + let dst = Uri.of_string dst in + let submit_json = submit_json dst in + iter_src src submit_json ; if erase then Unixext.rm_rec ~rm_top:true src + + let pretty_print src = + iter_src src @@ fun line -> + line + |> Yojson.Safe.from_string + |> Yojson.Safe.pretty_to_channel ~std:true stdout end module Cli = struct @@ -83,6 +92,11 @@ module Cli = struct let doc = "copy a trace to an endpoint and erase it afterwards" in Cmd.(v (info "mv" ~doc) term) + let pp_cmd = + let term = Term.(const Exporter.pretty_print $ src) in + let doc = "Pretty print NDJSON traces" in + Cmd.(v (info "pp" ~doc) term) + let xs_trace_cmd = let man = [ @@ -94,7 +108,7 @@ module Cli = struct let doc = "utility for working with local trace files" in Cmd.info "xs-trace" ~doc ~version:"0.1" ~man in - Cmd.group desc [cp_cmd; mv_cmd] + Cmd.group desc [cp_cmd; mv_cmd; pp_cmd] let main () = Cmd.eval xs_trace_cmd end diff --git a/clock.opam b/opam/clock.opam similarity index 100% rename from clock.opam rename to opam/clock.opam diff --git a/cohttp-posix.opam b/opam/cohttp-posix.opam similarity index 100% rename from cohttp-posix.opam rename to opam/cohttp-posix.opam diff --git a/cohttp-posix.opam.template b/opam/cohttp-posix.opam.template similarity index 100% rename from cohttp-posix.opam.template rename to opam/cohttp-posix.opam.template diff --git a/ezxenstore.opam b/opam/ezxenstore.opam similarity index 100% rename from ezxenstore.opam rename to opam/ezxenstore.opam diff --git a/ezxenstore.opam.template b/opam/ezxenstore.opam.template similarity index 100% rename from ezxenstore.opam.template rename to opam/ezxenstore.opam.template diff --git a/forkexec.opam b/opam/forkexec.opam similarity index 100% rename from forkexec.opam rename to opam/forkexec.opam diff --git a/gzip.opam b/opam/gzip.opam similarity index 100% rename from gzip.opam rename to opam/gzip.opam diff --git a/gzip.opam.template b/opam/gzip.opam.template similarity index 100% rename from gzip.opam.template rename to opam/gzip.opam.template diff --git a/http-lib.opam b/opam/http-lib.opam similarity index 100% rename from http-lib.opam rename to opam/http-lib.opam diff --git a/message-switch-cli.opam b/opam/message-switch-cli.opam similarity index 100% rename from message-switch-cli.opam rename to opam/message-switch-cli.opam diff --git a/message-switch-cli.opam.template b/opam/message-switch-cli.opam.template similarity index 100% rename from message-switch-cli.opam.template rename to opam/message-switch-cli.opam.template diff --git a/message-switch-core.opam b/opam/message-switch-core.opam similarity index 97% rename from message-switch-core.opam rename to opam/message-switch-core.opam index a6b183bdd7f..dc4ca95da07 100644 --- a/message-switch-core.opam +++ b/opam/message-switch-core.opam @@ -20,6 +20,7 @@ depends: [ "uri" "xapi-log" {= version} "xapi-stdext-threads" {= version} + "xapi-tracing" {= version} "odoc" {with-doc} ] build: [ diff --git a/message-switch-lwt.opam b/opam/message-switch-lwt.opam similarity index 100% rename from message-switch-lwt.opam rename to opam/message-switch-lwt.opam diff --git a/message-switch-lwt.opam.template b/opam/message-switch-lwt.opam.template similarity index 100% rename from message-switch-lwt.opam.template rename to opam/message-switch-lwt.opam.template diff --git a/message-switch-unix.opam b/opam/message-switch-unix.opam similarity index 96% rename from message-switch-unix.opam rename to opam/message-switch-unix.opam index c9379979e2d..975d81ac831 100644 --- a/message-switch-unix.opam +++ b/opam/message-switch-unix.opam @@ -16,6 +16,7 @@ depends: [ "ppx_deriving_rpc" "rpclib" "xapi-stdext-threads" {= version} + "xapi-tracing" {= version} "odoc" {with-doc} ] build: [ diff --git a/message-switch.opam b/opam/message-switch.opam similarity index 98% rename from message-switch.opam rename to opam/message-switch.opam index f0dcf7ff224..41613cb034f 100644 --- a/message-switch.opam +++ b/opam/message-switch.opam @@ -30,6 +30,7 @@ depends: [ "sexplib" "shared-block-ring" {>= "2.3.0"} "xapi-stdext-unix" + "xapi-tracing" ] synopsis: "A simple store-and-forward message switch" description: """ diff --git a/message-switch.opam.template b/opam/message-switch.opam.template similarity index 98% rename from message-switch.opam.template rename to opam/message-switch.opam.template index a33fe27cb3e..0e8ec76c2e6 100644 --- a/message-switch.opam.template +++ b/opam/message-switch.opam.template @@ -28,6 +28,7 @@ depends: [ "sexplib" "shared-block-ring" {>= "2.3.0"} "xapi-stdext-unix" + "xapi-tracing" ] synopsis: "A simple store-and-forward message switch" description: """ diff --git a/pciutil.opam b/opam/pciutil.opam similarity index 100% rename from pciutil.opam rename to opam/pciutil.opam diff --git a/pciutil.opam.template b/opam/pciutil.opam.template similarity index 100% rename from pciutil.opam.template rename to opam/pciutil.opam.template diff --git a/xapi-stdext-date.opam b/opam/qcow-stream-tool.opam similarity index 83% rename from xapi-stdext-date.opam rename to opam/qcow-stream-tool.opam index 06021447900..a7c3ab6ef3c 100644 --- a/xapi-stdext-date.opam +++ b/opam/qcow-stream-tool.opam @@ -1,15 +1,15 @@ # This file is generated by dune, edit dune-project instead opam-version: "2.0" -synopsis: "Xapi's standard library extension, Dates" +synopsis: "Minimal CLI wrapper for qcow-stream" maintainer: ["Xapi project maintainers"] -authors: ["Jonathan Ludlam"] +authors: ["xen-api@lists.xen.org"] license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" homepage: "https://xapi-project.github.io/" bug-reports: "https://github.com/xapi-project/xen-api/issues" depends: [ "dune" {>= "3.15"} - "clock" {= version} - "ptime" + "qcow-stream" + "cmdliner" "odoc" {with-doc} ] build: [ diff --git a/rrd-transport.opam b/opam/rrd-transport.opam similarity index 100% rename from rrd-transport.opam rename to opam/rrd-transport.opam diff --git a/rrdd-plugin.opam b/opam/rrdd-plugin.opam similarity index 100% rename from rrdd-plugin.opam rename to opam/rrdd-plugin.opam diff --git a/safe-resources.opam b/opam/safe-resources.opam similarity index 100% rename from safe-resources.opam rename to opam/safe-resources.opam diff --git a/safe-resources.opam.template b/opam/safe-resources.opam.template similarity index 100% rename from safe-resources.opam.template rename to opam/safe-resources.opam.template diff --git a/sexpr.opam b/opam/sexpr.opam similarity index 100% rename from sexpr.opam rename to opam/sexpr.opam diff --git a/sexpr.opam.template b/opam/sexpr.opam.template similarity index 100% rename from sexpr.opam.template rename to opam/sexpr.opam.template diff --git a/stunnel.opam b/opam/stunnel.opam similarity index 100% rename from stunnel.opam rename to opam/stunnel.opam diff --git a/tgroup.opam b/opam/tgroup.opam similarity index 100% rename from tgroup.opam rename to opam/tgroup.opam diff --git a/uuid.opam b/opam/uuid.opam similarity index 100% rename from uuid.opam rename to opam/uuid.opam diff --git a/uuid.opam.template b/opam/uuid.opam.template similarity index 100% rename from uuid.opam.template rename to opam/uuid.opam.template diff --git a/varstored-guard.opam b/opam/varstored-guard.opam similarity index 100% rename from varstored-guard.opam rename to opam/varstored-guard.opam diff --git a/varstored-guard.opam.template b/opam/varstored-guard.opam.template similarity index 100% rename from varstored-guard.opam.template rename to opam/varstored-guard.opam.template diff --git a/vhd-format-lwt.opam b/opam/vhd-format-lwt.opam similarity index 100% rename from vhd-format-lwt.opam rename to opam/vhd-format-lwt.opam diff --git a/vhd-format-lwt.opam.template b/opam/vhd-format-lwt.opam.template similarity index 100% rename from vhd-format-lwt.opam.template rename to opam/vhd-format-lwt.opam.template diff --git a/vhd-format.opam b/opam/vhd-format.opam similarity index 100% rename from vhd-format.opam rename to opam/vhd-format.opam diff --git a/vhd-format.opam.template b/opam/vhd-format.opam.template similarity index 100% rename from vhd-format.opam.template rename to opam/vhd-format.opam.template diff --git a/vhd-tool.opam b/opam/vhd-tool.opam similarity index 100% rename from vhd-tool.opam rename to opam/vhd-tool.opam diff --git a/xapi-cli-protocol.opam b/opam/xapi-cli-protocol.opam similarity index 100% rename from xapi-cli-protocol.opam rename to opam/xapi-cli-protocol.opam diff --git a/xapi-cli-protocol.opam.template b/opam/xapi-cli-protocol.opam.template similarity index 100% rename from xapi-cli-protocol.opam.template rename to opam/xapi-cli-protocol.opam.template diff --git a/xapi-client.opam b/opam/xapi-client.opam similarity index 100% rename from xapi-client.opam rename to opam/xapi-client.opam diff --git a/xapi-client.opam.template b/opam/xapi-client.opam.template similarity index 100% rename from xapi-client.opam.template rename to opam/xapi-client.opam.template diff --git a/xapi-compression.opam b/opam/xapi-compression.opam similarity index 100% rename from xapi-compression.opam rename to opam/xapi-compression.opam diff --git a/xapi-compression.opam.template b/opam/xapi-compression.opam.template similarity index 100% rename from xapi-compression.opam.template rename to opam/xapi-compression.opam.template diff --git a/xapi-consts.opam b/opam/xapi-consts.opam similarity index 100% rename from xapi-consts.opam rename to opam/xapi-consts.opam diff --git a/xapi-consts.opam.template b/opam/xapi-consts.opam.template similarity index 100% rename from xapi-consts.opam.template rename to opam/xapi-consts.opam.template diff --git a/xapi-datamodel.opam b/opam/xapi-datamodel.opam similarity index 100% rename from xapi-datamodel.opam rename to opam/xapi-datamodel.opam diff --git a/xapi-datamodel.opam.template b/opam/xapi-datamodel.opam.template similarity index 100% rename from xapi-datamodel.opam.template rename to opam/xapi-datamodel.opam.template diff --git a/xapi-debug.opam b/opam/xapi-debug.opam similarity index 98% rename from xapi-debug.opam rename to opam/xapi-debug.opam index f8550f7508b..a2b7d9dd863 100644 --- a/xapi-debug.opam +++ b/opam/xapi-debug.opam @@ -42,6 +42,7 @@ depends: [ "re" "result" "rpclib" + "rrdd-plugin" "rresult" "sexplib" "sexplib0" diff --git a/xapi-expiry-alerts.opam b/opam/xapi-expiry-alerts.opam similarity index 100% rename from xapi-expiry-alerts.opam rename to opam/xapi-expiry-alerts.opam diff --git a/xapi-expiry-alerts.opam.template b/opam/xapi-expiry-alerts.opam.template similarity index 100% rename from xapi-expiry-alerts.opam.template rename to opam/xapi-expiry-alerts.opam.template diff --git a/xapi-forkexecd.opam b/opam/xapi-forkexecd.opam similarity index 100% rename from xapi-forkexecd.opam rename to opam/xapi-forkexecd.opam diff --git a/xapi-idl.opam b/opam/xapi-idl.opam similarity index 100% rename from xapi-idl.opam rename to opam/xapi-idl.opam diff --git a/xapi-idl.opam.template b/opam/xapi-idl.opam.template similarity index 100% rename from xapi-idl.opam.template rename to opam/xapi-idl.opam.template diff --git a/xapi-inventory.opam b/opam/xapi-inventory.opam similarity index 100% rename from xapi-inventory.opam rename to opam/xapi-inventory.opam diff --git a/xapi-inventory.opam.template b/opam/xapi-inventory.opam.template similarity index 100% rename from xapi-inventory.opam.template rename to opam/xapi-inventory.opam.template diff --git a/opam/xapi-log.opam b/opam/xapi-log.opam new file mode 100644 index 00000000000..12840be135b --- /dev/null +++ b/opam/xapi-log.opam @@ -0,0 +1,35 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "A Logs library required by xapi" +description: + "This package is provided for backwards compatibility only. No new package should use it." +maintainer: ["Xapi project maintainers"] +authors: ["xen-api@lists.xen.org"] +license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" +homepage: "https://xapi-project.github.io/" +bug-reports: "https://github.com/xapi-project/xen-api/issues" +depends: [ + "dune" {>= "3.15"} + "astring" + "fmt" + "logs" + "mtime" + "xapi-backtrace" + "xapi-stdext-pervasives" {= version} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/xapi-project/xen-api.git" diff --git a/xapi-nbd.opam b/opam/xapi-nbd.opam similarity index 100% rename from xapi-nbd.opam rename to opam/xapi-nbd.opam diff --git a/xapi-nbd.opam.template b/opam/xapi-nbd.opam.template similarity index 100% rename from xapi-nbd.opam.template rename to opam/xapi-nbd.opam.template diff --git a/xapi-open-uri.opam b/opam/xapi-open-uri.opam similarity index 100% rename from xapi-open-uri.opam rename to opam/xapi-open-uri.opam diff --git a/xapi-open-uri.opam.template b/opam/xapi-open-uri.opam.template similarity index 100% rename from xapi-open-uri.opam.template rename to opam/xapi-open-uri.opam.template diff --git a/xapi-rrd.opam b/opam/xapi-rrd.opam similarity index 100% rename from xapi-rrd.opam rename to opam/xapi-rrd.opam diff --git a/xapi-rrd.opam.template b/opam/xapi-rrd.opam.template similarity index 100% rename from xapi-rrd.opam.template rename to opam/xapi-rrd.opam.template diff --git a/xapi-schema.opam b/opam/xapi-schema.opam similarity index 100% rename from xapi-schema.opam rename to opam/xapi-schema.opam diff --git a/xapi-schema.opam.template b/opam/xapi-schema.opam.template similarity index 100% rename from xapi-schema.opam.template rename to opam/xapi-schema.opam.template diff --git a/xapi-sdk.opam b/opam/xapi-sdk.opam similarity index 100% rename from xapi-sdk.opam rename to opam/xapi-sdk.opam diff --git a/xapi-stdext-encodings.opam b/opam/xapi-stdext-encodings.opam similarity index 100% rename from xapi-stdext-encodings.opam rename to opam/xapi-stdext-encodings.opam diff --git a/xapi-stdext-encodings.opam.template b/opam/xapi-stdext-encodings.opam.template similarity index 100% rename from xapi-stdext-encodings.opam.template rename to opam/xapi-stdext-encodings.opam.template diff --git a/xapi-stdext-pervasives.opam b/opam/xapi-stdext-pervasives.opam similarity index 100% rename from xapi-stdext-pervasives.opam rename to opam/xapi-stdext-pervasives.opam diff --git a/xapi-stdext-std.opam b/opam/xapi-stdext-std.opam similarity index 100% rename from xapi-stdext-std.opam rename to opam/xapi-stdext-std.opam diff --git a/xapi-stdext-threads.opam b/opam/xapi-stdext-threads.opam similarity index 90% rename from xapi-stdext-threads.opam rename to opam/xapi-stdext-threads.opam index ae64e906b29..55653e588c9 100644 --- a/xapi-stdext-threads.opam +++ b/opam/xapi-stdext-threads.opam @@ -8,14 +8,18 @@ homepage: "https://xapi-project.github.io/" bug-reports: "https://github.com/xapi-project/xen-api/issues" depends: [ "dune" {>= "3.15"} + "ambient-context" "base-threads" "base-unix" "alcotest" {with-test} + "clock" {= version} "fmt" {with-test} - "odoc" {with-doc} + "mtime" + "tgroup" + "xapi-log" {= version} "xapi-stdext-pervasives" {= version} - "mtime" {with-test} "xapi-stdext-unix" {= version} + "odoc" {with-doc} ] build: [ ["dune" "subst"] {dev} diff --git a/xapi-stdext-unix.opam b/opam/xapi-stdext-unix.opam similarity index 100% rename from xapi-stdext-unix.opam rename to opam/xapi-stdext-unix.opam diff --git a/xapi-stdext-unix.opam.template b/opam/xapi-stdext-unix.opam.template similarity index 100% rename from xapi-stdext-unix.opam.template rename to opam/xapi-stdext-unix.opam.template diff --git a/xapi-stdext-zerocheck.opam b/opam/xapi-stdext-zerocheck.opam similarity index 100% rename from xapi-stdext-zerocheck.opam rename to opam/xapi-stdext-zerocheck.opam diff --git a/opam/xapi-storage-cli.opam b/opam/xapi-storage-cli.opam new file mode 100644 index 00000000000..c2e092eb9f6 --- /dev/null +++ b/opam/xapi-storage-cli.opam @@ -0,0 +1,36 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "A CLI for xapi storage services" +description: + "The CLI allows you to directly manipulate virtual disk images, without them being attached to VMs." +maintainer: ["Xapi project maintainers"] +authors: ["xen-api@lists.xen.org"] +license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" +homepage: "https://xapi-project.github.io/" +bug-reports: "https://github.com/xapi-project/xen-api/issues" +depends: [ + "dune" {>= "3.15"} + "cmdliner" + "re" + "rpclib" + "ppx_deriving_rpc" + "xapi-client" {= version} + "xapi-idl" {= version} + "xapi-types" {= version} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/xapi-project/xen-api.git" diff --git a/xapi-storage-script.opam b/opam/xapi-storage-script.opam similarity index 100% rename from xapi-storage-script.opam rename to opam/xapi-storage-script.opam diff --git a/xapi-storage-script.opam.template b/opam/xapi-storage-script.opam.template similarity index 100% rename from xapi-storage-script.opam.template rename to opam/xapi-storage-script.opam.template diff --git a/xapi-storage.opam b/opam/xapi-storage.opam similarity index 100% rename from xapi-storage.opam rename to opam/xapi-storage.opam diff --git a/xapi-storage.opam.template b/opam/xapi-storage.opam.template similarity index 100% rename from xapi-storage.opam.template rename to opam/xapi-storage.opam.template diff --git a/xapi-tools.opam b/opam/xapi-tools.opam similarity index 97% rename from xapi-tools.opam rename to opam/xapi-tools.opam index 852102302dd..3116f8d3293 100644 --- a/xapi-tools.opam +++ b/opam/xapi-tools.opam @@ -24,10 +24,12 @@ depends: [ "rpclib" "rresult" "uri" + "tyre" "xenctrl" "xmlm" "yojson" "rrd-transport" + "rrdd-plugin" "xapi-tracing-export" "xen-api-client" "alcotest" {with-test} diff --git a/xapi-tools.opam.template b/opam/xapi-tools.opam.template similarity index 100% rename from xapi-tools.opam.template rename to opam/xapi-tools.opam.template diff --git a/xapi-tracing-export.opam b/opam/xapi-tracing-export.opam similarity index 100% rename from xapi-tracing-export.opam rename to opam/xapi-tracing-export.opam diff --git a/xapi-tracing-export.opam.template b/opam/xapi-tracing-export.opam.template similarity index 100% rename from xapi-tracing-export.opam.template rename to opam/xapi-tracing-export.opam.template diff --git a/xapi-tracing.opam b/opam/xapi-tracing.opam similarity index 100% rename from xapi-tracing.opam rename to opam/xapi-tracing.opam diff --git a/xapi-tracing.opam.template b/opam/xapi-tracing.opam.template similarity index 100% rename from xapi-tracing.opam.template rename to opam/xapi-tracing.opam.template diff --git a/xapi-types.opam b/opam/xapi-types.opam similarity index 100% rename from xapi-types.opam rename to opam/xapi-types.opam diff --git a/xapi-types.opam.template b/opam/xapi-types.opam.template similarity index 100% rename from xapi-types.opam.template rename to opam/xapi-types.opam.template diff --git a/xapi.opam b/opam/xapi.opam similarity index 100% rename from xapi.opam rename to opam/xapi.opam diff --git a/xapi.opam.template b/opam/xapi.opam.template similarity index 100% rename from xapi.opam.template rename to opam/xapi.opam.template diff --git a/xe.opam b/opam/xe.opam similarity index 100% rename from xe.opam rename to opam/xe.opam diff --git a/xe.opam.template b/opam/xe.opam.template similarity index 100% rename from xe.opam.template rename to opam/xe.opam.template diff --git a/xen-api-client-lwt.opam b/opam/xen-api-client-lwt.opam similarity index 100% rename from xen-api-client-lwt.opam rename to opam/xen-api-client-lwt.opam diff --git a/xen-api-client-lwt.opam.template b/opam/xen-api-client-lwt.opam.template similarity index 100% rename from xen-api-client-lwt.opam.template rename to opam/xen-api-client-lwt.opam.template diff --git a/xen-api-client.opam b/opam/xen-api-client.opam similarity index 100% rename from xen-api-client.opam rename to opam/xen-api-client.opam diff --git a/xml-light2.opam b/opam/xml-light2.opam similarity index 100% rename from xml-light2.opam rename to opam/xml-light2.opam diff --git a/xml-light2.opam.template b/opam/xml-light2.opam.template similarity index 100% rename from xml-light2.opam.template rename to opam/xml-light2.opam.template diff --git a/zstd.opam b/opam/zstd.opam similarity index 100% rename from zstd.opam rename to opam/zstd.opam diff --git a/zstd.opam.template b/opam/zstd.opam.template similarity index 100% rename from zstd.opam.template rename to opam/zstd.opam.template diff --git a/python3/Makefile b/python3/Makefile index fb13068ca0e..3646ad9f54a 100644 --- a/python3/Makefile +++ b/python3/Makefile @@ -30,6 +30,7 @@ install: $(IPROG) libexec/mail-alarm $(DESTDIR)$(LIBEXECDIR) $(IPROG) libexec/backup-sr-metadata.py $(DESTDIR)$(LIBEXECDIR) $(IPROG) libexec/restore-sr-metadata.py $(DESTDIR)$(LIBEXECDIR) + $(IPROG) libexec/qcow2-to-stdout.py $(DESTDIR)$(LIBEXECDIR) $(IPROG) bin/hfx_filename $(DESTDIR)$(OPTDIR)/bin $(IPROG) bin/xe-reset-networking $(DESTDIR)$(OPTDIR)/bin diff --git a/python3/bin/xe-reset-networking b/python3/bin/xe-reset-networking index 58091d09120..81b3c57286f 100755 --- a/python3/bin/xe-reset-networking +++ b/python3/bin/xe-reset-networking @@ -87,7 +87,7 @@ if __name__ == "__main__": f = open(pool_conf, 'r') try: l = f.readline() - ls = l.split(':') + ls = l.split(':', maxsplit=1) if ls[0].strip() == 'master': master = True address = 'localhost' diff --git a/python3/libexec/nbd_client_manager.py b/python3/libexec/nbd_client_manager.py index 3d0920a3845..99dd85c6cc9 100644 --- a/python3/libexec/nbd_client_manager.py +++ b/python3/libexec/nbd_client_manager.py @@ -208,7 +208,8 @@ def connect_nbd(path, exportname): path, nbd_device, "-timeout", - "60", + "90", + "-persist", "-name", exportname, ] diff --git a/python3/libexec/qcow2-to-stdout.py b/python3/libexec/qcow2-to-stdout.py new file mode 100755 index 00000000000..b0638bc5904 --- /dev/null +++ b/python3/libexec/qcow2-to-stdout.py @@ -0,0 +1,422 @@ +#!/usr/bin/env python3 + +# This tool reads a disk image in any format and converts it to qcow2, +# writing the result directly to stdout. +# +# Copyright (C) 2024 Igalia, S.L. +# +# Authors: Alberto Garcia +# Madeeha Javed +# +# SPDX-License-Identifier: GPL-2.0-or-later +# +# qcow2 files produced by this script are always arranged like this: +# +# - qcow2 header +# - refcount table +# - refcount blocks +# - L1 table +# - L2 tables +# - Data clusters +# +# A note about variable names: in qcow2 there is one refcount table +# and one (active) L1 table, although each can occupy several +# clusters. For the sake of simplicity the code sometimes talks about +# refcount tables and L1 tables when referring to those clusters. + +import argparse +import math +import os +import struct +import sys + +QCOW2_DEFAULT_CLUSTER_SIZE = 65536 +QCOW2_DEFAULT_REFCOUNT_BITS = 16 +QCOW2_FEATURE_NAME_TABLE = 0x6803F857 +QCOW2_DATA_FILE_NAME_STRING = 0x44415441 +QCOW2_V3_HEADER_LENGTH = 112 # Header length in QEMU 9.0. Must be a multiple of 8 +QCOW2_INCOMPAT_DATA_FILE_BIT = 2 +QCOW2_AUTOCLEAR_DATA_FILE_RAW_BIT = 1 +QCOW_OFLAG_COPIED = 1 << 63 + + +def bitmap_set(bitmap, idx): + bitmap[idx // 8] |= 1 << (idx % 8) + + +def bitmap_is_set(bitmap, idx): + return (bitmap[idx // 8] & (1 << (idx % 8))) != 0 + + +def bitmap_iterator(bitmap, length): + for idx in range(length): + if bitmap_is_set(bitmap, idx): + yield idx + + +def align_up(num, d): + return d * math.ceil(num / d) + + +def write_features(cluster, offset, data_file_name): + if data_file_name is not None: + encoded_name = data_file_name.encode("utf-8") + padded_name_len = align_up(len(encoded_name), 8) + struct.pack_into(f">II{padded_name_len}s", cluster, offset, + QCOW2_DATA_FILE_NAME_STRING, + len(encoded_name), + encoded_name) + offset += 8 + padded_name_len + + qcow2_features = [ + # Incompatible + (0, 0, "dirty bit"), + (0, 1, "corrupt bit"), + (0, 2, "external data file"), + (0, 3, "compression type"), + (0, 4, "extended L2 entries"), + # Compatible + (1, 0, "lazy refcounts"), + # Autoclear + (2, 0, "bitmaps"), + (2, 1, "raw external data"), + ] + struct.pack_into(">I", cluster, offset, QCOW2_FEATURE_NAME_TABLE) + struct.pack_into(">I", cluster, offset + 4, len(qcow2_features) * 48) + offset += 8 + for feature_type, feature_bit, feature_name in qcow2_features: + struct.pack_into(">BB46s", cluster, offset, + feature_type, feature_bit, feature_name.encode("ascii")) + offset += 48 + + +def write_qcow2_content(input_file, cluster_size, refcount_bits, + data_file_name, data_file_raw, diff_file_name): + # Some basic values + l1_entries_per_table = cluster_size // 8 + l2_entries_per_table = cluster_size // 8 + refcounts_per_table = cluster_size // 8 + refcounts_per_block = cluster_size * 8 // refcount_bits + + # Open the input file for reading + fd = os.open(input_file, os.O_RDONLY) + + # Virtual disk size, number of data clusters and L1 entries + block_device_size = os.lseek(fd, 0, os.SEEK_END) + disk_size = align_up(block_device_size, 512) + total_data_clusters = math.ceil(disk_size / cluster_size) + l1_entries = math.ceil(total_data_clusters / l2_entries_per_table) + allocated_l1_tables = math.ceil(l1_entries / l1_entries_per_table) + + # Max L1 table size is 32 MB (QCOW_MAX_L1_SIZE in block/qcow2.h) + if (l1_entries * 8) > (32 * 1024 * 1024): + sys.exit("[Error] The image size is too large. Try using a larger cluster size.") + + # Two bitmaps indicating which L1 and L2 entries are set + l1_bitmap = bytearray(allocated_l1_tables * l1_entries_per_table // 8) + l2_bitmap = bytearray(l1_entries * l2_entries_per_table // 8) + allocated_l2_tables = 0 + allocated_data_clusters = 0 + + if data_file_raw: + # If data_file_raw is set then all clusters are allocated and + # we don't need to read the input file at all. + allocated_l2_tables = l1_entries + for idx in range(l1_entries): + bitmap_set(l1_bitmap, idx) + for idx in range(total_data_clusters): + bitmap_set(l2_bitmap, idx) + else: + # Allocates a cluster in the appropriate bitmaps if it's different + # from cluster_to_compare_with + def check_cluster_allocate(idx, cluster, cluster_to_compare_with): + nonlocal allocated_data_clusters + nonlocal allocated_l2_tables + # If the last cluster is smaller than cluster_size pad it with zeroes + if len(cluster) < cluster_size: + cluster += bytes(cluster_size - len(cluster)) + # If a cluster has different data from the cluster_to_compare_with then it + # must be allocated in the output file and its L2 entry must be set + if cluster != cluster_to_compare_with: + bitmap_set(l2_bitmap, idx) + allocated_data_clusters += 1 + # Allocated data clusters also need their corresponding L1 entry and L2 table + l1_idx = math.floor(idx / l2_entries_per_table) + if not bitmap_is_set(l1_bitmap, l1_idx): + bitmap_set(l1_bitmap, l1_idx) + allocated_l2_tables += 1 + + zero_cluster = bytes(cluster_size) + last_cluster = align_up(block_device_size, cluster_size) // cluster_size + if diff_file_name: + # Read all the clusters that differ from the diff_file_name + diff_fd = os.open(diff_file_name, os.O_RDONLY) + diff_block_device_size = os.lseek(diff_fd, 0, os.SEEK_END) + last_diff_cluster = align_up(diff_block_device_size, cluster_size) // cluster_size + # In case input_file is bigger than diff_file_name, first check + # if clusters from diff_file_name differ, and then check if the + # rest contain data + for idx in range(0, last_diff_cluster): + cluster = os.pread(fd, cluster_size, cluster_size * idx) + original_cluster = os.pread(diff_fd, cluster_size, cluster_size * idx) + + # If a cluster has different data from the original_cluster + # then it must be allocated + check_cluster_allocate(idx, cluster, original_cluster) + for idx in range(last_diff_cluster, last_cluster): + cluster = os.pread(fd, cluster_size, cluster_size * idx) + + # If a cluster has different data from the original_cluster + # then it must be allocated + check_cluster_allocate(idx, cluster, zero_cluster) + else: + # Read all the clusters that contain data + for idx in range(0, last_cluster): + cluster = os.pread(fd, cluster_size, cluster_size * idx) + # If a cluster has non-zero data then it must be allocated + check_cluster_allocate(idx, cluster, zero_cluster) + + # Total amount of allocated clusters excluding the refcount blocks and table + total_allocated_clusters = 1 + allocated_l1_tables + allocated_l2_tables + if data_file_name is None: + total_allocated_clusters += allocated_data_clusters + + # Clusters allocated for the refcount blocks and table + allocated_refcount_blocks = math.ceil(total_allocated_clusters / refcounts_per_block) + allocated_refcount_tables = math.ceil(allocated_refcount_blocks / refcounts_per_table) + + # Now we have a problem because allocated_refcount_blocks and allocated_refcount_tables... + # (a) increase total_allocated_clusters, and + # (b) need to be recalculated when total_allocated_clusters is increased + # So we need to repeat the calculation as long as the numbers change + while True: + new_total_allocated_clusters = total_allocated_clusters + allocated_refcount_tables + allocated_refcount_blocks + new_allocated_refcount_blocks = math.ceil(new_total_allocated_clusters / refcounts_per_block) + if new_allocated_refcount_blocks > allocated_refcount_blocks: + allocated_refcount_blocks = new_allocated_refcount_blocks + allocated_refcount_tables = math.ceil(allocated_refcount_blocks / refcounts_per_table) + else: + break + + # Now that we have the final numbers we can update total_allocated_clusters + total_allocated_clusters += allocated_refcount_tables + allocated_refcount_blocks + + # At this point we have the exact number of clusters that the output + # image is going to use so we can calculate all the offsets. + current_cluster_idx = 1 + + refcount_table_offset = current_cluster_idx * cluster_size + current_cluster_idx += allocated_refcount_tables + + refcount_block_offset = current_cluster_idx * cluster_size + current_cluster_idx += allocated_refcount_blocks + + l1_table_offset = current_cluster_idx * cluster_size + current_cluster_idx += allocated_l1_tables + + l2_table_offset = current_cluster_idx * cluster_size + current_cluster_idx += allocated_l2_tables + + data_clusters_offset = current_cluster_idx * cluster_size + + # Calculate some values used in the qcow2 header + if allocated_l1_tables == 0: + l1_table_offset = 0 + + hdr_cluster_bits = int(math.log2(cluster_size)) + hdr_refcount_bits = int(math.log2(refcount_bits)) + hdr_length = QCOW2_V3_HEADER_LENGTH + hdr_incompat_features = 0 + if data_file_name is not None: + hdr_incompat_features |= 1 << QCOW2_INCOMPAT_DATA_FILE_BIT + hdr_autoclear_features = 0 + if data_file_raw: + hdr_autoclear_features |= 1 << QCOW2_AUTOCLEAR_DATA_FILE_RAW_BIT + + ### Write qcow2 header + cluster = bytearray(cluster_size) + struct.pack_into(">4sIQIIQIIQQIIQQQQII", cluster, 0, + b"QFI\xfb", # QCOW magic string + 3, # version + 0, # backing file offset + 0, # backing file sizes + hdr_cluster_bits, + disk_size, + 0, # encryption method + l1_entries, + l1_table_offset, + refcount_table_offset, + allocated_refcount_tables, + 0, # number of snapshots + 0, # snapshot table offset + hdr_incompat_features, + 0, # compatible features + hdr_autoclear_features, + hdr_refcount_bits, + hdr_length, + ) + + write_features(cluster, hdr_length, data_file_name) + + sys.stdout.buffer.write(cluster) + + ### Write refcount table + cur_offset = refcount_block_offset + remaining_refcount_table_entries = allocated_refcount_blocks # Each entry is a pointer to a refcount block + while remaining_refcount_table_entries > 0: + cluster = bytearray(cluster_size) + to_write = min(remaining_refcount_table_entries, refcounts_per_table) + remaining_refcount_table_entries -= to_write + for idx in range(to_write): + struct.pack_into(">Q", cluster, idx * 8, cur_offset) + cur_offset += cluster_size + sys.stdout.buffer.write(cluster) + + ### Write refcount blocks + remaining_refcount_block_entries = total_allocated_clusters # One entry for each allocated cluster + for tbl in range(allocated_refcount_blocks): + cluster = bytearray(cluster_size) + to_write = min(remaining_refcount_block_entries, refcounts_per_block) + remaining_refcount_block_entries -= to_write + # All refcount entries contain the number 1. The only difference + # is their bit width, defined when the image is created. + for idx in range(to_write): + if refcount_bits == 64: + struct.pack_into(">Q", cluster, idx * 8, 1) + elif refcount_bits == 32: + struct.pack_into(">L", cluster, idx * 4, 1) + elif refcount_bits == 16: + struct.pack_into(">H", cluster, idx * 2, 1) + elif refcount_bits == 8: + cluster[idx] = 1 + elif refcount_bits == 4: + cluster[idx // 2] |= 1 << ((idx % 2) * 4) + elif refcount_bits == 2: + cluster[idx // 4] |= 1 << ((idx % 4) * 2) + elif refcount_bits == 1: + cluster[idx // 8] |= 1 << (idx % 8) + sys.stdout.buffer.write(cluster) + + ### Write L1 table + cur_offset = l2_table_offset + for tbl in range(allocated_l1_tables): + cluster = bytearray(cluster_size) + for idx in range(l1_entries_per_table): + l1_idx = tbl * l1_entries_per_table + idx + if bitmap_is_set(l1_bitmap, l1_idx): + struct.pack_into(">Q", cluster, idx * 8, cur_offset | QCOW_OFLAG_COPIED) + cur_offset += cluster_size + sys.stdout.buffer.write(cluster) + + ### Write L2 tables + cur_offset = data_clusters_offset + for tbl in range(l1_entries): + # Skip the empty L2 tables. We can identify them because + # there is no L1 entry pointing at them. + if bitmap_is_set(l1_bitmap, tbl): + cluster = bytearray(cluster_size) + for idx in range(l2_entries_per_table): + l2_idx = tbl * l2_entries_per_table + idx + if bitmap_is_set(l2_bitmap, l2_idx): + if data_file_name is None: + struct.pack_into(">Q", cluster, idx * 8, cur_offset | QCOW_OFLAG_COPIED) + cur_offset += cluster_size + else: + struct.pack_into(">Q", cluster, idx * 8, (l2_idx * cluster_size) | QCOW_OFLAG_COPIED) + sys.stdout.buffer.write(cluster) + + ### Write data clusters + if data_file_name is None: + for idx in bitmap_iterator(l2_bitmap, total_data_clusters): + cluster = os.pread(fd, cluster_size, cluster_size * idx) + # If the last cluster is smaller than cluster_size pad it with zeroes + if len(cluster) < cluster_size: + cluster += bytes(cluster_size - len(cluster)) + sys.stdout.buffer.write(cluster) + + if not data_file_raw: + os.close(fd) + + +def main(): + # Command-line arguments + parser = argparse.ArgumentParser( + description="This program converts a QEMU disk image to qcow2 " + "and writes it to the standard output" + ) + parser.add_argument("input_file", help="name of the input file") + parser.add_argument( + "--diff", + dest="diff_file_name", + metavar="diff_file_name", + help=("name of the original file to compare input_file against. " + "If specified, will only export clusters that are different " + "between the files"), + default=None, + ) + parser.add_argument( + "-c", + dest="cluster_size", + metavar="cluster_size", + help=f"qcow2 cluster size (default: {QCOW2_DEFAULT_CLUSTER_SIZE})", + default=QCOW2_DEFAULT_CLUSTER_SIZE, + type=int, + choices=[1 << x for x in range(9, 22)], + ) + parser.add_argument( + "-r", + dest="refcount_bits", + metavar="refcount_bits", + help=f"width of the reference count entries (default: {QCOW2_DEFAULT_REFCOUNT_BITS})", + default=QCOW2_DEFAULT_REFCOUNT_BITS, + type=int, + choices=[1 << x for x in range(7)], + ) + parser.add_argument( + "-d", + dest="data_file", + help="create an image with input_file as an external data file", + action="store_true", + ) + parser.add_argument( + "-R", + dest="data_file_raw", + help="enable data_file_raw on the generated image (implies -d)", + action="store_true", + ) + args = parser.parse_args() + + if args.data_file_raw: + args.data_file = True + + if not os.path.exists(args.input_file): + sys.exit(f"[Error] {args.input_file} does not exist.") + + if args.diff_file_name and not os.path.exists(args.diff_file_name): + sys.exit(f"[Error] {args.diff_file_name} does not exist.") + + # A 512 byte header is too small for the data file name extension + if args.data_file and args.cluster_size == 512: + sys.exit("[Error] External data files require a larger cluster size") + + if sys.stdout.isatty(): + sys.exit("[Error] Refusing to write to a tty. Try redirecting stdout.") + + if args.data_file: + data_file_name = args.input_file + else: + data_file_name = None + + write_qcow2_content( + args.input_file, + args.cluster_size, + args.refcount_bits, + data_file_name, + args.data_file_raw, + args.diff_file_name + ) + + +if __name__ == "__main__": + main() + diff --git a/python3/libexec/usb_reset.py b/python3/libexec/usb_reset.py index 573936ae1c3..941259d6182 100755 --- a/python3/libexec/usb_reset.py +++ b/python3/libexec/usb_reset.py @@ -19,41 +19,40 @@ # ./usb_reset.py attach 2-2 -d 12 -p 4130 # ./usb_reset.py attach 2-2 -d 12 -p 4130 -r # 1. reset device -# if without -r, do step 2~4 +# if without -r, do step 2~3 # 2. if it's the first USB device to pass-through -# a) bind mount /dev /sys in chroot directory (/var/xen/qemu/root-) -# b) create new cgroup devices:/qemu-, -# c) blacklist all and add default device whitelist, -# d) join current qemu process to this cgroup -# 3. save device uid/gid to /var/run/nonpersistent/usb/ -# 4. set device file uid/gid to (qemu_base + dom-id) -# 5. add current device to whitelist +# a) bind mount /sys in chroot directory (/var/xen/qemu/root-) +# b) clone (create the device with same major/minor number and mode) +# in chroot directory with same path +# c) bind mount /proc/ to chroot directory (/var/xen/qemu/root-/proc/self) +# 3. set device file uid/gid to (qemu_base + dom-id) # # detach # ./usb_reset.py detach device -d dom-id # ./usb_reset.py detach 2-2 -d 12 -# 1. restore device file uid/gid from /var/run/nonpersistent/usb/ -# 2. remove current device from whitelist +# 1. Remove the cloned device file in chroot directory +# 2. Umount /proc/self from chroot directory if it is mounted # # cleanup # ./usb_reset.py cleanup -d dom-id # ./usb_reset.py cleanup -d 12 -# 1.remove the cgroup if one has been created. -# 2.umount /dev, /sys from chroot directory if they are mounted. +# 1.umount /sys from chroot directory if they are mounted. +# 2.umount /proc/self from chroot directory if they are mounted. +# 3.remove /dev/bus directory in chroot directory if it exists import argparse import ctypes import ctypes.util -import errno import fcntl import grp -import xcp.logger as log # pytype: disable=import-error import logging import os import pwd import re -from stat import S_ISCHR, S_ISBLK +import shutil +import sys +import xcp.logger as log # pytype: disable=import-error def parse_arg(): parser = argparse.ArgumentParser( @@ -85,56 +84,6 @@ def get_root_dir(domid): return "/var/xen/qemu/root-{}".format(domid) -def get_cg_dir(domid): - return "/sys/fs/cgroup/devices/qemu-{}".format(domid) - - -def get_ids_path(device): - usb_dir = "/var/run/nonpersistent/usb" - try: - os.makedirs(usb_dir) - except OSError as e: - if e.errno != errno.EEXIST: - raise - - return os.path.join(usb_dir, device) - - -def save_device_ids(device): - path = dev_path(device) - - try: - stat = os.stat(path) - ids_info = "{} {}".format(stat.st_uid, stat.st_gid) - except OSError as e: - log.error("Failed to stat {}: {}".format(path, str(e))) - exit(1) - - try: - with open(get_ids_path(device), "w") as f: - f.write(ids_info) - except IOError as e: - log.error("Failed to save device ids {}: {}".format(path, str(e))) - exit(1) - - -def load_device_ids(device): - ids_path = get_ids_path(device) - try: - with open(ids_path) as f: - uid, gid = list(map(int, f.readline().split())) - except (IOError, ValueError) as e: - log.error("Failed to load device ids: {}".format(str(e))) - - try: - os.remove(ids_path) - except OSError as e: - # ignore and continue - log.warning("Failed to remove device ids: {}".format(str(e))) - - return uid, gid # pyright: ignore[reportPossiblyUnboundVariable] # pragma: no cover - - # throw IOError, ValueError def read_int(path): with open(path) as f: @@ -147,117 +96,14 @@ def dev_path(device): pat = re.compile(r"\d+-\d+(\.\d+)*$") if pat.match(device) is None: log.error("Unexpected device node: {}".format(device)) - exit(1) + sys.exit(1) try: bus = read_int("/sys/bus/usb/devices/{}/busnum".format(device)) dev = read_int("/sys/bus/usb/devices/{}/devnum".format(device)) return "/dev/bus/usb/{0:03d}/{1:03d}".format(bus, dev) except (IOError, ValueError) as e: log.error("Failed to get device path {}: {}".format(device, str(e))) - exit(1) - - -def get_ctl(path, mode): # type:(str, str) -> str - """get the string to control device access for cgroup - :param path: the device file path - :param mode: either "r" or "rw" - :return: the string to control device access - """ - try: - st = os.stat(path) - except OSError as e: - log.error("Failed to get stat of {}: {}".format(path, str(e))) - raise - - t = "" - if S_ISBLK(st.st_mode): - t = "b" - elif S_ISCHR(st.st_mode): - t = "c" - if t and mode in ("r", "rw"): - return "{} {}:{} {}".format(t, os.major(st.st_rdev), os.minor( - st.st_rdev), mode) - raise RuntimeError("Failed to get control string of {}".format(path)) - - -def _device_ctl(path, domid, allow): - cg_dir = get_cg_dir(domid) - file_name = "/devices.allow" if allow else "/devices.deny" - try: - with open(cg_dir + file_name, "w") as f: - f.write(get_ctl(path, "rw")) - except (IOError, OSError, RuntimeError) as e: - log.error("Failed to {} {}: {}".format( - "allow" if allow else "deny", path, str(e))) - exit(1) - - -def allow_device(path, domid): - _device_ctl(path, domid, True) - - -def deny_device(path, domid): - _device_ctl(path, domid, False) - - -def setup_cgroup(domid, pid): # type:(str, str) -> None - """ - Associate the given process id (pid) with the given Linux kernel control group - and limit it's device access to only /dev/null. - - :param domid (str): The control group ID string (passed on from the command line) - :param pid (str): The process ID string (passed on from the command line) - - If the control group directory does not exist yet, the control group is created. - - - The pid goes into the file "tasks" to associate the process with the cgroup. - - Deny device access by default by writing "a" to devices.deny. - - Grant read-write access to /dev/null, writing it's device IDs to devices.allow. - - If any error occur during the setup process, the error is logged and - the program exits with a status code of 1. - """ - cg_dir = get_cg_dir(domid) - - try: - os.mkdir(cg_dir, 0o755) - except OSError as e: - if e.errno != errno.EEXIST: - log.error("Failed to create cgroup: {}".format(cg_dir)) - exit(1) - - try: - # unbuffered write to ensure each one is flushed immediately - # to the kernel's control group filesystem: - # - # The order of writes is likely not important, but the writes - # may have to be a single write() system call for the entire string. - # - # Using the unbuffered Raw IO mode, we know the write was done - # in exactly this way by the write function call itself, not later. - # - # With small writes like this , splitting them because of overflowing the - # buffer is not expected to happen. To stay safe and keep using unbuffered I/O - # We have to migrate to binary mode in python3,as python3 supports unbuffered - # raw I/O in binary mode. - # - with open(cg_dir + "/tasks", "wb", 0) as tasks, \ - open(cg_dir + "/devices.deny", "wb", 0) as deny, \ - open(cg_dir + "/devices.allow", "wb", 0) as allow: - - # deny all - deny.write(b"a") - - # To write bytes, we've to encode the strings to bytes below: - - # grant rw access to /dev/null by default - allow.write(get_ctl("/dev/null", "rw").encode()) - - tasks.write(str(pid).encode()) - - except (IOError, OSError, RuntimeError) as e: - log.error("Failed to setup cgroup: {}".format(str(e))) - exit(1) + sys.exit(1) def mount(source, target, fs, flags=0): @@ -266,7 +112,7 @@ def mount(source, target, fs, flags=0): log.error("Failed to mount {} ({}) to {} with flags {}: {}". format(source, fs, target, flags, os.strerror(ctypes.get_errno()))) - exit(1) + sys.exit(1) def umount(target): @@ -277,6 +123,42 @@ def umount(target): format(target, os.strerror(ctypes.get_errno()))) +def clone_device(path, root_dir, domid): + """ + Clone the device file into the chroot directory. + + :param path: The source device file under system /dev to clone. + :param root_dir: The root directory of the chroot environment. + :param domid: The domain ID of the VM, used to set the device file's uid/gid. + """ + target_path = os.path.join(root_dir, path.lstrip(os.path.sep)) + if os.path.exists(target_path): + log.info("Device file {} already exists in chroot".format(target_path)) + return + + os.makedirs(os.path.dirname(target_path), exist_ok=True, mode=0o755) + + try: + st = os.stat(path) + except OSError as e: + log.error("Failed to get stat of {}: {}".format(path, str(e))) + sys.exit(1) + + mode = st.st_mode + major = os.major(st.st_rdev) + minor = os.minor(st.st_rdev) + clone_device_id = os.makedev(major, minor) + os.mknod(target_path, mode, clone_device_id) + + # set device file uid/gid + try: + os.chown(target_path, pwd.getpwnam("qemu_base").pw_uid + domid, + grp.getgrnam("qemu_base").gr_gid + domid) + except OSError as e: + log.error("Failed to chown device file {}: {}".format(path, str(e))) + sys.exit(1) + + def attach(device, domid, pid, reset_only): path = dev_path(device) @@ -293,76 +175,53 @@ def attach(device, domid, pid, reset_only): if reset_only: return - save_device_ids(device) - - # set device file uid/gid - try: - os.chown(path, pwd.getpwnam("qemu_base").pw_uid + domid, - grp.getgrnam("qemu_base").gr_gid + domid) - except OSError as e: - log.error("Failed to chown device file {}: {}".format(path, str(e))) - exit(1) - root_dir = get_root_dir(domid) dev_dir = root_dir + "/dev" if not os.path.isdir(root_dir) or not os.path.isdir(dev_dir): log.error("Error: The chroot or dev directory doesn't exist") - exit(1) + sys.exit(1) - if not os.path.isdir(dev_dir + "/bus"): - # first USB device to pass-through - MS_BIND = 4096 # mount flags, from fs.h - mount("/dev", dev_dir, "", MS_BIND) - setup_cgroup(domid, pid) + clone_device(path, root_dir, domid) sys_dir = root_dir + "/sys" + proc_dir = root_dir + "/proc" # sys_dir could already be mounted because of PCI pass-through - if not os.path.isdir(sys_dir): - try: - os.mkdir(sys_dir, 0o755) - except OSError: - log.error("Failed to create sys dir in chroot") - exit(1) + os.makedirs(sys_dir, exist_ok=True, mode=0o755) if not os.path.isdir(sys_dir + "/devices"): mount("/sys", sys_dir, "sysfs") - # add device to cgroup allow list - allow_device(path, domid) + self_dir = os.path.join(proc_dir, "self") + os.makedirs(self_dir , exist_ok=True, mode=0o755) + fd_dir = os.path.join(self_dir, "fd") + if not os.path.isdir(fd_dir): + MS_BIND = 4096 # mount flags, from fs.h + mount(f"/proc/{pid}/", self_dir, "", MS_BIND) def detach(device, domid): path = dev_path(device) - uid, gid = load_device_ids(device) - - # restore uid, gid of the device file. - try: - os.chown(path, uid, gid) - except OSError as e: - log.error("Failed to chown device file {}: {}".format(path, str(e))) - exit(1) - - # remove device from cgroup allow list - deny_device(path, domid) + root_dir = get_root_dir(domid) + target_path = os.path.join(root_dir, path.lstrip(os.path.sep)) + os.remove(target_path) def cleanup(domid): - # remove the cgroup if one has been created. - if os.path.isdir(get_cg_dir(domid)): - try: - os.rmdir(get_cg_dir(domid)) - except OSError as e: - # log and continue - log.error("Failed to remove cgroup qemu-{}: {}" - .format(domid, str(e))) - # umount /dev, /sys from chroot directory if they are mounted. root_dir = get_root_dir(domid) dev_dir = root_dir + "/dev" sys_dir = root_dir + "/sys" - if os.path.isdir(dev_dir + "/bus"): - umount(dev_dir) + bus_dir = dev_dir + "/bus" + proc_dir = root_dir + "/proc" + self_dir = proc_dir + "/self" + if os.path.isdir(bus_dir): + log.info("Removing bus directory: {} for cleanup".format(bus_dir)) + shutil.rmtree(bus_dir) if os.path.isdir(sys_dir + "/devices"): umount(sys_dir) + if os.path.exists(sys_dir) and os.path.ismount(self_dir): + umount(self_dir) + log.info("Removing proc directory: {} for cleanup".format(proc_dir)) + shutil.rmtree(proc_dir) if __name__ == "__main__": @@ -378,4 +237,4 @@ def cleanup(domid): cleanup(arg.domid) else: log.error("Unexpected command: {}".format(arg.command)) - exit(1) + sys.exit(1) diff --git a/python3/libexec/usb_scan.py b/python3/libexec/usb_scan.py index 03d89f7baed..15888a25dff 100755 --- a/python3/libexec/usb_scan.py +++ b/python3/libexec/usb_scan.py @@ -421,6 +421,10 @@ def parse_line(self, line): :param line: (str) single line of policy file :return: None """ + # 0. skip empty lines + if line.strip() == '': + return + # 1. remove comments # ^([^#]*)(#.*)?$ i = line.find("#") diff --git a/python3/packages/observer.py b/python3/packages/observer.py index cf2ebf32226..df53f5f0f1f 100644 --- a/python3/packages/observer.py +++ b/python3/packages/observer.py @@ -266,7 +266,7 @@ def bugtool_filenamer(): tracers = list(map(create_tracer_from_config, configs)) debug("tracers=%s", tracers) - def span_of_tracers(wrapped=None, span_name_prefix=""): + def span_of_tracers(wrapped=None, span_name_prefix="", parent_context=None): """ Public decorator that creates a trace around a function. @@ -289,7 +289,7 @@ def span_of_tracers(wrapped=None, span_name_prefix=""): that the function is decorated properly on the second pass. """ if wrapped is None: # handle decorators with parameters - return functools.partial(span_of_tracers, span_name_prefix=span_name_prefix) + return functools.partial(span_of_tracers, span_name_prefix=span_name_prefix, parent_context=parent_context) @wrapt.decorator def instrument_function(wrapped, _, args, kwargs): @@ -352,11 +352,10 @@ def autoinstrument_class(aclass): traceback.format_exc(), ) - def autoinstrument_module(amodule): """Autoinstrument the classes and functions in a module.""" - with tracers[0].start_as_current_span(f"auto_instrumentation.add_module: {amodule}"): + with tracers[0].start_as_current_span(f"auto_instrumentation.add_module: {amodule}", context=parent_context): # Instrument the methods of the classes in the module for _, aclass in inspect.getmembers(amodule, inspect.isclass): try: @@ -373,14 +372,15 @@ def autoinstrument_module(amodule): return instrument_function(wrapped) - def _patch_module(module_name): + def _patch_module(module_name, parent_context=None): wrapt.importer.discover_post_import_hooks(module_name) wrapt.importer.when_imported(module_name)( - lambda hook: span_of_tracers(wrapped=hook) + lambda hook: span_of_tracers(wrapped=hook, parent_context=parent_context) ) - for m in module_names: - _patch_module(m) + def _patch_modules(parent_context): + for m in module_names: + _patch_module(m, parent_context=parent_context) # Create spans to track observer.py's setup duration t = tracers[0] @@ -388,6 +388,10 @@ def _patch_module(module_name): import_span = t.start_span("observer.py:imports", start_time=import_ts_start) import_span.end(end_time=import_ts_end) + # Set a parent span in the add_module spans' context so that they are kept together + with t.start_span("auto_instrumentation") as aspan: + _patch_modules(trace.set_span_in_context(aspan)) + return span_of_tracers, _patch_module diff --git a/python3/tests/import_helper.py b/python3/tests/import_helper.py index 2fdbd922b95..6e1c5946558 100644 --- a/python3/tests/import_helper.py +++ b/python3/tests/import_helper.py @@ -5,7 +5,7 @@ from types import ModuleType from typing import Generator -from mock import Mock +from unittest.mock import MagicMock @contextmanager @@ -28,7 +28,7 @@ def mocked_modules(*module_names: str) -> Generator[None, None, None]: ``` """ for module_name in module_names: - sys.modules[module_name] = Mock() + sys.modules[module_name] = MagicMock() yield for module_name in module_names: sys.modules.pop(module_name) diff --git a/python3/tests/test_usb_reset.py b/python3/tests/test_usb_reset.py new file mode 100644 index 00000000000..43dae790cb1 --- /dev/null +++ b/python3/tests/test_usb_reset.py @@ -0,0 +1,109 @@ +import unittest +from unittest import mock +from unittest.mock import MagicMock +import sys + +# some mocked arguemtn is not used in the tests, but as side-effects +# disabled pylint warning for unused arguments +# pylint: disable=unused-argument + +from python3.tests.import_helper import import_file_as_module +# mock modules to avoid dependencies +sys.modules["xcp"] = MagicMock() +sys.modules["xcp.logger"] = MagicMock() +usb_reset = import_file_as_module("python3/libexec/usb_reset.py") + + +class TestUsbReset(unittest.TestCase): + @mock.patch("usb_reset.open", new_callable=mock.mock_open, read_data="5\n") + def test_read_int(self, mock_open): + self.assertEqual(usb_reset.read_int("/fake/path"), 5) + mock_open.assert_called_with("/fake/path") + + @mock.patch("usb_reset.read_int", side_effect=[1, 2]) + @mock.patch("usb_reset.log") + def test_dev_path_valid(self, mock_log, mock_read_int): + device = "1-2.3" + path = usb_reset.dev_path(device) + self.assertEqual(path, "/dev/bus/usb/001/002") + mock_log.error.assert_not_called() + + @mock.patch("usb_reset.log") + def test_dev_path_invalid(self, mock_log): + with self.assertRaises(SystemExit): + usb_reset.dev_path("invalid-device") + mock_log.error.assert_called() + + @mock.patch("usb_reset.ctypes.CDLL") + @mock.patch("usb_reset.ctypes.util.find_library", return_value="libc.so.6") + @mock.patch("usb_reset.log") + def test_mount_success(self, mock_log, mock_find_lib, mock_cdll): + mock_cdll.return_value.mount.return_value = 0 + usb_reset.mount("src", "tgt", "fs") + mock_cdll.return_value.mount.assert_called() + + @mock.patch("usb_reset.ctypes.CDLL") + @mock.patch("usb_reset.ctypes.util.find_library", return_value="libc.so.6") + @mock.patch("usb_reset.log") + def test_mount_fail(self, mock_log, mock_find_lib, mock_cdll): + mock_cdll.return_value.mount.return_value = -1 + with self.assertRaises(SystemExit): + usb_reset.mount("src", "tgt", "fs") + mock_log.error.assert_called() + + @mock.patch("usb_reset.ctypes.CDLL") + @mock.patch("usb_reset.ctypes.util.find_library", return_value="libc.so.6") + @mock.patch("usb_reset.log") + def test_umount(self, mock_log, mock_find_lib, mock_cdll): + mock_cdll.return_value.umount.return_value = -1 + usb_reset.umount("tgt") + mock_log.error.assert_called() + + @mock.patch("usb_reset.os") + @mock.patch("usb_reset.pwd.getpwnam") + @mock.patch("usb_reset.grp.getgrnam") + @mock.patch("usb_reset.log") + def test_clone_device(self, mock_log, mock_grp, mock_pwd, mock_os): + mock_os.path.exists.return_value = False + mock_os.path.sep = "/" + mock_os.stat.return_value.st_mode = 0o600 + mock_os.stat.return_value.st_rdev = 0 + mock_os.major.return_value = 1 + mock_os.minor.return_value = 2 + mock_os.makedev.return_value = 1234 + mock_pwd.return_value.pw_uid = 1000 + mock_grp.return_value.gr_gid = 1000 + usb_reset.clone_device("/dev/bus/usb/001/002", "/root", 1) + mock_os.mknod.assert_called() + mock_os.chown.assert_called() + + @mock.patch("usb_reset.dev_path", return_value="/dev/bus/usb/001/002") + @mock.patch("usb_reset.open", new_callable=mock.mock_open) + @mock.patch("usb_reset.fcntl.ioctl") + @mock.patch("usb_reset.log") + def test_attach_reset_only(self, mock_log, mock_ioctl, mock_open, mock_dev_path): + usb_reset.attach("1-2", 1, 123, True) + mock_open.assert_called() + mock_ioctl.assert_called() + + @mock.patch("usb_reset.dev_path", return_value="/dev/bus/usb/001/002") + @mock.patch("usb_reset.os.remove") + @mock.patch("usb_reset.get_root_dir", return_value="/root") + def test_detach(self, mock_get_root_dir, mock_remove, mock_dev_path): + usb_reset.detach("1-2", 1) + mock_remove.assert_called() + + @mock.patch("usb_reset.shutil.rmtree") + @mock.patch("usb_reset.os.path.isdir", return_value=True) + @mock.patch("usb_reset.os.path.exists", return_value=True) + @mock.patch("usb_reset.os.path.ismount", return_value=True) + @mock.patch("usb_reset.umount") + @mock.patch("usb_reset.log") + #pylint: disable=too-many-arguments + def test_cleanup(self, mock_log, mock_umount, mock_ismount, + mock_exists, mock_isdir, mock_rmtree): + usb_reset.cleanup(1) + mock_rmtree.assert_called() + +if __name__ == "__main__": + unittest.main() diff --git a/python3/tests/test_usb_scan.py b/python3/tests/test_usb_scan.py index 8b886194c74..9ed8be1faad 100644 --- a/python3/tests/test_usb_scan.py +++ b/python3/tests/test_usb_scan.py @@ -9,14 +9,14 @@ import unittest from collections.abc import Mapping from typing import cast +from unittest.mock import MagicMock -import mock from python3.tests.import_helper import import_file_as_module # mock modules to avoid dependencies -sys.modules["xcp"] = mock.Mock() -sys.modules["xcp.logger"] = mock.Mock() -sys.modules["pyudev"] = mock.Mock() +sys.modules["xcp"] = MagicMock() +sys.modules["xcp.logger"] = MagicMock() +sys.modules["pyudev"] = MagicMock() usb_scan = import_file_as_module("python3/libexec/usb_scan.py") @@ -90,7 +90,7 @@ def mock_setup(mod, devices, interfaces, path): mod.log.error = verify_log mod.log.debug = verify_log mod.Policy._PATH = path - mod.pyudev.Context = mock.Mock( + mod.pyudev.Context = MagicMock( return_value=MocContext(devices, interfaces)) @@ -372,3 +372,11 @@ def test_usb_config_error_missing_colon(self): ALLOW # Otherwise allow everything else """ self.verify_usb_config_error_common(content, "to unpack") + + def test_usb_config_empty_line(self): + content = """# empty line +ALLOW:vid=056a pid=0314 class=03 # Wacom Intuos tablet + +ALLOW # Otherwise allow everything else +""" + self.verify_usb_config_error_common(content, "") diff --git a/quality-gate.sh b/quality-gate.sh index e59b8e40ccb..cd87c1252b5 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -3,7 +3,7 @@ set -e list-hd () { - N=277 + N=274 LIST_HD=$(git grep -r --count 'List.hd' -- **/*.ml | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$LIST_HD" -eq "$N" ]; then echo "OK counted $LIST_HD List.hd usages" @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=497 + N=463 X="ocaml/tests" X+="|ocaml/quicktest" X+="|ocaml/message-switch/core_test" @@ -44,7 +44,7 @@ mli-files () { } structural-equality () { - N=9 + N=7 EQ=$(git grep -r --count ' == ' -- '**/*.ml' ':!ocaml/sdk-gen/**/*.ml' | cut -d ':' -f 2 | paste -sd+ - | bc) if [ "$EQ" -eq "$N" ]; then echo "OK counted $EQ usages of ' == '" diff --git a/scripts/Makefile b/scripts/Makefile index 5751d8628ca..ad603ac37d8 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -137,6 +137,8 @@ install: mkdir -p $(DESTDIR)/etc/cron.d $(IDATA) xapi-tracing-log-trim.cron $(DESTDIR)/etc/cron.d/xapi-tracing-log-trim.cron mkdir -p $(DESTDIR)/opt/xensource/gpg + $(IPROG) xapi-ssh-monitor $(DESTDIR)$(OPTDIR)/bin + $(IDATA) xapi-ssh-monitor.service $(DESTDIR)/usr/lib/systemd/system/xapi-ssh-monitor.service # host-backup-restore $(IPROG) host-backup-restore/host-backup $(DESTDIR)$(LIBEXECDIR) $(IPROG) host-backup-restore/host-restore $(DESTDIR)$(LIBEXECDIR) diff --git a/scripts/host-bugreport-upload b/scripts/host-bugreport-upload index 766b6964f2e..545b7d561d3 100755 --- a/scripts/host-bugreport-upload +++ b/scripts/host-bugreport-upload @@ -4,8 +4,6 @@ # # Upload a bugreport to the support website -DEFAULT_BASE_URL="ftp://support.xensource.com/uploads/" - # If the user supplies a bare filename without a URI scheme, # we ignore it -- if they _really_ want to upload named files # to our support server, they can specify the URI scheme. @@ -19,7 +17,7 @@ if [ -z "$FILENAME" ]; then . @INVENTORY@ FILENAME=${INSTALLATION_UUID}-${now} fi -[ ! -z "${BASE_URL}" ] || BASE_URL="${DEFAULT_BASE_URL}" +[ -n "${BASE_URL}" ] || exit 1 URL="${BASE_URL}${FILENAME}" diff --git a/scripts/runtests b/scripts/runtests deleted file mode 100755 index cfdc95cf867..00000000000 --- a/scripts/runtests +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash - -# It appears that this file is NOT installed by OMakefile. If installing it in -# the future, make sure that variable BASE_PATH is set correctly. - -if [ -z $1 ] -then - echo "Need to specify the current network interface (e.g. eth0) as a parameter - to this script" - exit 1 -fi - -IF=$1 - -export PATH=$PATH:@OPTDIR@/bin - -# generic stuff, necessary for xenrt too - -#install necessary packages -yum -y install nc -yum -y install rsync -yum -y install wget - -# mount the iso directory -mount bespin:/scratch/images/autoinstall /var/opt/xen/iso_import - -# make a lv for import/export tests, mount it on /mnt -VG=`vgs --noheadings -o size,name,size --separator=, | cut -d, -f2` -lvcreate -n importexport -L 10G $VG -mke2fs /dev/$VG/importexport -mount /dev/$VG/importexport /mnt - -#tmp dir for logging output -mkdir -p /tmp/rt -cd /tmp/rt -wget http://snoosnoo.uk.xensource.com/~jludlam/test.css -wget http://snoosnoo.uk.xensource.com/~jludlam/test_log.js - -# post this line is non-xenrt only. xenrt should provide the vms -# rather than having to import them. - -# mount the volume with the images on -mkdir -p /tmp/vms -mount bespin:/scratch2/jludlam /tmp/vms - -# import them - -cd /tmp/vms/ -./debian.sh $IF -./debian-pv.sh $IF -./windowsxp.sh $IF - -cp /tmp/vms/vncsnapshot /usr/bin/ -cd /tmp/rt - -test_host -a -v debian,debian-pv,windowsxp -i $IF - diff --git a/scripts/usb-policy.conf b/scripts/usb-policy.conf index 777cd96e246..e14a11d68a2 100644 --- a/scripts/usb-policy.conf +++ b/scripts/usb-policy.conf @@ -1,11 +1,16 @@ # When you change this file, run 'xe pusb-scan' to confirm # the file can be parsed correctly. +# You can also run '/opt/xensource/libexec/usb_scan.py -d' to see +# debug output from the script parsing this configuration file. # # Syntax is an ordered list of case insensitive rules where # is line comment # and each rule is (ALLOW | DENY) : ( match )* # and each match is (class|subclass|prot|vid|pid|rel) = hex-number # Maximum hex value for class/subclass/prot is FF, and for vid/pid/rel is FFFF # +# Rules are ordered so that the first matching rule will override +# any other rules for the device below it +# # USB Hubs (class 09) are always denied, independently of the rules in this file DENY: vid=17e9 # All DisplayLink USB displays DENY: class=02 # Communications and CDC-Control diff --git a/scripts/xapi-ssh-monitor b/scripts/xapi-ssh-monitor new file mode 100644 index 00000000000..d3c35658b47 --- /dev/null +++ b/scripts/xapi-ssh-monitor @@ -0,0 +1,297 @@ +#!/usr/bin/env python3 + +import time +import subprocess +import logging +import os.path +import signal +import sys +import re +import XenAPI +import threading +from enum import Enum, auto +from typing import Tuple, List, Optional, Dict, Any +import traceback + +# Configure logging +log_format = '%(asctime)s - %(levelname)s - %(message)s' +log_level = logging.INFO + +logging.basicConfig( + level=log_level, + format=log_format, + handlers=[ + logging.StreamHandler(), + logging.FileHandler('/var/log/daemon.log') + ] +) + +logger = logging.getLogger(__name__) + +# Constants +class SshState(Enum): + DOWN = auto() + ACTIVE = auto() + UNKNOWN = auto() + +INSTALLATION_UUID_REGEX = re.compile("^INSTALLATION_UUID") + +def match_host_id(s): + return INSTALLATION_UUID_REGEX.search(s, 0) + +class XapiMonitor: + XAPI_HEALTH_CHECK = '/opt/xensource/libexec/xapi-health-check' + + def __init__(self): + self.logger = logging.getLogger(__name__) + self.running = True + self.session = None + self.localhost_uuid = self.get_localhost_uuid() + # Create event for graceful exit + self.exit_event = threading.Event() + signal.signal(signal.SIGTERM, self._handle_signal) + signal.signal(signal.SIGINT, self._handle_signal) + signal.signal(signal.SIGHUP, self._handle_signal) + + def _handle_signal(self, signum, frame): + """Handle termination signals""" + signal_names = { + signal.SIGTERM: "SIGTERM", + signal.SIGINT: "SIGINT", + signal.SIGHUP: "SIGHUP" + } + signal_name = signal_names.get(signum, f"Signal {signum}") + self.logger.info(f"Received {signal_name}, preparing to exit...") + self.running = False + # Set event to interrupt any waiting + self.exit_event.set() + + def _create_session(self) -> Optional[Any]: + """Create a session with local XAPI""" + try: + session = XenAPI.xapi_local() + session.login_with_password("", "") + return session + except Exception as e: + self.logger.error(f"Create XAPI session failed: {e}") + return None + + def _logout_session(self) -> None: + """Logout from XAPI session""" + try: + if self.session: + self.session.logout() + self.logger.debug("XAPI session logged out") + except Exception as e: + self.logger.warning(f"Error during session logout: {e}") + + @staticmethod + def get_localhost_uuid() -> str: + """Get the UUID of the local host from inventory file""" + filename = '/etc/xensource-inventory' + try: + with open(filename, 'r') as f: + for line in filter(match_host_id, f.readlines()): + return line.split("'")[1] + except Exception as e: + error_msg = f"Unable to open inventory file [{filename}]: {e}" + logging.getLogger(__name__).error(error_msg) + raise RuntimeError(error_msg) + + # If we get here, we didn't find the UUID + error_msg = f"Could not find INSTALLATION_UUID in {filename}" + logging.getLogger(__name__).error(error_msg) + raise RuntimeError(error_msg) + + def _run_command(self, command: List[str], timeout: int = 10) -> Tuple[int, str, str]: + """Execute command and return results + + Args: + command: Command to execute as list of strings + timeout: Command execution timeout in seconds (default: 10) + + Returns: + Tuple of (return_code, stdout, stderr) + """ + self.logger.debug(f"Running command: {' '.join(command)}") + try: + process = subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True + ) + try: + stdout, stderr = process.communicate(timeout=timeout) + self.logger.debug(f"Command returned: {process.returncode}") + return process.returncode, stdout, stderr + except subprocess.TimeoutExpired: + process.kill() + process.communicate() + self.logger.error(f"Command execution timeout after {timeout}s: {' '.join(command)}") + return -1, "", "Timeout" + except Exception as e: + self.logger.error(f"Error executing command: {e}") + return -1, "", str(e) + + def _check_xapi_health(self) -> bool: + """Check XAPI health status with extended timeout""" + self.logger.debug("Performing XAPI health check") + returncode, stdout, stderr = self._run_command([self.XAPI_HEALTH_CHECK], timeout=120) + + if returncode != 0: + self.logger.warning(f"XAPI health check failed: {stderr}") + + return returncode == 0 + + def _get_ssh_state(self) -> SshState: + """Get SSH service status""" + returncode, stdout, stderr = self._run_command(['systemctl', 'is-active', 'sshd']) + status = stdout.strip() + + if status == 'active': + return SshState.ACTIVE + if status in ('inactive', 'failed', 'unknown'): + return SshState.DOWN + + self.logger.warning(f"Unexpected SSH status: {status}, stderr: {stderr}") + return SshState.UNKNOWN + + def _control_ssh_service(self, enable: bool) -> bool: + """Control SSH service + + Returns: + bool: True if operation was successful, False otherwise + """ + action = "starting" if enable else "stopping" + try: + firewall_cmd = '/usr/bin/firewall-cmd' + use_firewalld = os.path.exists(firewall_cmd) + if enable: + if use_firewalld: + ret0, _, stderr0 = self._run_command([firewall_cmd, '--add-service', 'ssh']) + else: + ret0, stderr0 = 0, "n/a" + ret1, _, stderr1 = self._run_command(['systemctl', 'enable', 'sshd']) + ret2, _, stderr2 = self._run_command(['systemctl', 'start', 'sshd']) + success = (ret0 == 0 and ret1 == 0 and ret2 == 0) + else: + ret2, _, stderr2 = self._run_command(['systemctl', 'stop', 'sshd']) + ret1, _, stderr1 = self._run_command(['systemctl', 'disable', 'sshd']) + if use_firewalld: + ret0, _, stderr0 = self._run_command([firewall_cmd, '--remove-service', 'ssh']) + else: + ret0, stderr0 = 0, "n/a" + success = (ret0 == 0 and ret1 == 0 and ret2 == 0) + + if success: + self.logger.info(f"SSH service {action} successful") + else: + err_msg = f"""SSH service {action} failed: enable/disable firewalld service stderr: {stderr0}, + enable/disable sshd stderr: {stderr1}, start/stop sshd stderr: {stderr2} """ + self.logger.error(err_msg) + + return success + except Exception as e: + self.logger.error(f"SSH service {action} failed with exception: {e}") + self.logger.debug(traceback.format_exc()) + return False + + def _disable_ssh_via_api(self) -> bool: + """Disable SSH via XAPI, max retries 3 times""" + if not self.session: + self.session = self._create_session() + if not self.session: + return False + + retry_count = 0 + max_retries = 3 + retry_interval = 5 + + while retry_count < max_retries and self.running: + try: + host = self.session.xenapi.host.get_by_uuid(self.localhost_uuid) + self.session.xenapi.host.disable_ssh(host) + self.logger.info("Successfully disabled SSH via XAPI") + return True + except Exception as e: + retry_count += 1 + self.logger.warning(f"Disable SSH via API failed ({retry_count}/{max_retries}): {e}") + if retry_count < max_retries and self.running: + # Use interruptible sleep + if self.exit_event.wait(retry_interval): + return False + self._logout_session() + self.session = self._create_session() + + if not self.running: + return False + + self.logger.error(f"Disable SSH via API failed, max retries reached ({max_retries})") + return False + + def run(self): + """Main monitoring loop""" + self.logger.info("Starting XAPI and SSH service monitoring...") + + self.session = self._create_session() + if not self.session: + self.logger.warning("Initial session creation failed, will retry later") + + while self.running: + try: + # Check XAPI health - always perform the check + xapi_healthy = self._check_xapi_health() + + # Get current SSH state + current_ssh_state = self._get_ssh_state() + self.logger.debug(f"Current SSH state: {current_ssh_state}") + + if xapi_healthy: + if current_ssh_state == SshState.ACTIVE: + self.logger.info("XAPI healthy: Stopping SSH service") + if not self._disable_ssh_via_api(): + self.logger.warning("Disable SSH via API failed, keeping SSH service running") + else: + if current_ssh_state != SshState.ACTIVE: + self.logger.info("XAPI unhealthy: Starting SSH service") + self._control_ssh_service(True) + + except Exception as e: + self.logger.error(f"Runtime error: {e}") + self.logger.debug(traceback.format_exc()) + + self._logout_session() + + self.session = None + + # Use interruptible sleep with a fixed interval when there is an error + if self.exit_event.wait(5): + break + + continue + + # Use interruptible sleep for main loop + if self.exit_event.wait(60): + break + + self._logout_session() + + self.logger.info("Monitoring service stopped") + +def main(): + logger.info(f"SSH Control Service starting (PID: {os.getpid()})") + + try: + monitor = XapiMonitor() + monitor.run() + except Exception as e: + logger.critical(f"Fatal error in main process: {e}") + logger.critical(traceback.format_exc()) + sys.exit(1) + + logger.info("SSH Control Service exited normally") + sys.exit(0) + +if __name__ == '__main__': + main() diff --git a/scripts/xapi-ssh-monitor.service b/scripts/xapi-ssh-monitor.service new file mode 100644 index 00000000000..f38685e7082 --- /dev/null +++ b/scripts/xapi-ssh-monitor.service @@ -0,0 +1,14 @@ +[Unit] +Description=XAPI SSH monitor service +After=network.target +After=xapi.service +OnFailure=sshd.service + +[Service] +Type=simple +RemainAfterExit=true +ExecStart=/opt/xensource/bin/xapi-ssh-monitor +ExecStop=/bin/true + +[Install] +WantedBy=multi-user.target diff --git a/scripts/xapi.conf b/scripts/xapi.conf index 46f859a8d42..8736fed6c0d 100644 --- a/scripts/xapi.conf +++ b/scripts/xapi.conf @@ -159,7 +159,8 @@ sparse_dd = /usr/libexec/xapi/sparse_dd # Directory containing supplemental pack data # packs-dir = @ETCXENDIR@/installed-repos -# Directory containing SM plugins +# Directory containing SM plugins. This path changes in XenServer 9 with a +# configuration coming from /etc/xapi.conf.d/, which takes precedence # sm-dir = @OPTDIR@/sm # Whitelist of SM plugins @@ -328,7 +329,7 @@ sm-plugins=ext nfs iscsi lvmoiscsi dummy file hba rawhba udev iso lvm lvmohba lv # ha_monitor_interval = 20 # Unconditionally replan every once in a while just in case the overcommit -# protection is buggy and we don't notice +# protection is buggy and we don't notice # ha_monitor_plan_interval = 1800 # ha_monitor_startup_timeout = 1800 @@ -370,7 +371,7 @@ sm-plugins=ext nfs iscsi lvmoiscsi dummy file hba rawhba udev iso lvm lvmohba lv # The default time, in µs, in which tapdisk3 will keep polling the # vbd ring buffer in expectation for extra requests from the guest -# default-vbd3-polling-duration = 1000 +# default-vbd3-polling-duration = 8000 # The default % of idle dom0 cpu above which tapdisk3 will keep polling # the vbd ring buffer @@ -385,7 +386,7 @@ sm-plugins=ext nfs iscsi lvmoiscsi dummy file hba rawhba udev iso lvm lvmohba lv # evacuation-batch-size = 10 # number of VMs migrated in parallel in Host.evacuate -# How often tracing will export spans to endpoints +# How often tracing will export spans to endpoints # export-interval = 30. # The file to check if host reboot required diff --git a/scripts/xe-backup-metadata b/scripts/xe-backup-metadata index 19f0cf0e4a9..88980776b9b 100755 --- a/scripts/xe-backup-metadata +++ b/scripts/xe-backup-metadata @@ -24,7 +24,7 @@ if [ "${master_uuid}" != "${INSTALLATION_UUID}" ]; then exit 1 fi -history_kept=25 +history_kept=12 metadata_version=1 debug=/bin/true @@ -129,7 +129,7 @@ if [ -z "${vdi_uuid}" ]; then echo -n "Creating new backup VDI: " label="Pool Metadata Backup" # the label must match what xapi_vdi.ml is using for backup VDIs - vdi_uuid=$(${XE} vdi-create virtual-size=500MiB sr-uuid="${sr_uuid}" type=user name-label="${label}") + vdi_uuid=$(${XE} vdi-create virtual-size=1GiB sr-uuid="${sr_uuid}" type=user name-label="${label}") init_fs=1 if [ $? -ne 0 ]; then echo failed diff --git a/unixpwd/src/unixpwd.mli b/unixpwd/src/unixpwd.mli index c191870388f..f613bf01d5d 100644 --- a/unixpwd/src/unixpwd.mli +++ b/unixpwd/src/unixpwd.mli @@ -30,7 +30,7 @@ val get : string -> string * /etc/shadow database if an entry exists, otherwise it tries to * obtain the password from the /etc/passwd database. It raises [Error] * if that fails. - * *) + *) val setpwd : string -> string -> unit diff --git a/xapi-log.opam b/xapi-log.opam deleted file mode 100644 index d83f9bec7c6..00000000000 --- a/xapi-log.opam +++ /dev/null @@ -1,31 +0,0 @@ -# This file is generated by dune, edit dune-project instead -license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" -opam-version: "2.0" -maintainer: "xen-api@lists.xen.org" -authors: "xen-api@lists.xen.org" -homepage: "https://xapi-project.github.io/" -bug-reports: "https://github.com/xapi-project/xen-api.git" -dev-repo: "git+https://github.com/xapi-project/xen-api.git" -build: [ - ["dune" "build" "-p" name "-j" jobs] - ["dune" "runtest" "-p" name "-j" jobs] {with-test} -] -available: [ os = "linux" ] -depends: [ - "ocaml" - "dune" {>= "3.15"} - "astring" - "fmt" - "logs" - "mtime" - "xapi-backtrace" - "xapi-stdext-pervasives" -] -synopsis: "Library required by xapi" -description: """ -These libraries are provided for backwards compatibility only. -No new code should use these libraries.""" -url { - src: - "https://github.com/xapi-project/xen-api/archive/master.tar.gz" -} diff --git a/xapi-log.opam.template b/xapi-log.opam.template deleted file mode 100644 index 00b5cce6fd5..00000000000 --- a/xapi-log.opam.template +++ /dev/null @@ -1,29 +0,0 @@ -opam-version: "2.0" -maintainer: "xen-api@lists.xen.org" -authors: "xen-api@lists.xen.org" -homepage: "https://xapi-project.github.io/" -bug-reports: "https://github.com/xapi-project/xen-api.git" -dev-repo: "git+https://github.com/xapi-project/xen-api.git" -build: [ - ["dune" "build" "-p" name "-j" jobs] - ["dune" "runtest" "-p" name "-j" jobs] {with-test} -] -available: [ os = "linux" ] -depends: [ - "ocaml" - "dune" {>= "3.15"} - "astring" - "fmt" - "logs" - "mtime" - "xapi-backtrace" - "xapi-stdext-pervasives" -] -synopsis: "Library required by xapi" -description: """ -These libraries are provided for backwards compatibility only. -No new code should use these libraries.""" -url { - src: - "https://github.com/xapi-project/xen-api/archive/master.tar.gz" -} diff --git a/xapi-storage-cli.opam b/xapi-storage-cli.opam deleted file mode 100644 index c58a06832eb..00000000000 --- a/xapi-storage-cli.opam +++ /dev/null @@ -1,28 +0,0 @@ -# This file is generated by dune, edit dune-project instead - -opam-version: "2.0" -name: "xapi-storage-cli" -maintainer: "xen-api@lists.xen.org" -authors: [ "xen-api@lists.xen.org" ] -license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" -homepage: "https://github.com/xapi-project/xen-api" -bug-reports: "https://github.com/xapi-project/xen-api/issues" -dev-repo: "git+https://github.com/xapi-project/xen-api.git" -build: [[ "dune" "build" "-p" name "-j" jobs ]] -depends: [ - "ocaml" - "dune" {>= "3.15"} - "base-threads" - "re" - "rpclib" - "ppx_deriving_rpc" - "xapi-idl" - "cmdliner" -] -synopsis: "A CLI for xapi storage services" -description: """ -The CLI allows you to directly manipulate virtual disk images, without -them being attached to VMs.""" -url { - src: "https://github.com/xapi-project/xen-api/archive/master.tar.gz" -} diff --git a/xapi-storage-cli.opam.template b/xapi-storage-cli.opam.template deleted file mode 100644 index 3ffbe86d8a3..00000000000 --- a/xapi-storage-cli.opam.template +++ /dev/null @@ -1,26 +0,0 @@ -opam-version: "2.0" -name: "xapi-storage-cli" -maintainer: "xen-api@lists.xen.org" -authors: [ "xen-api@lists.xen.org" ] -license: "LGPL-2.1-only WITH OCaml-LGPL-linking-exception" -homepage: "https://github.com/xapi-project/xen-api" -bug-reports: "https://github.com/xapi-project/xen-api/issues" -dev-repo: "git+https://github.com/xapi-project/xen-api.git" -build: [[ "dune" "build" "-p" name "-j" jobs ]] -depends: [ - "ocaml" - "dune" {>= "3.15"} - "base-threads" - "re" - "rpclib" - "ppx_deriving_rpc" - "xapi-idl" - "cmdliner" -] -synopsis: "A CLI for xapi storage services" -description: """ -The CLI allows you to directly manipulate virtual disk images, without -them being attached to VMs.""" -url { - src: "https://github.com/xapi-project/xen-api/archive/master.tar.gz" -}